7#include "../utility/utility.hpp"
8#include "../concurrency/concurrency.hpp"
9#include "../numeric/module.hpp"
10#include "../macros.hpp"
15#if HI_OPERATING_SYSTEM == HI_OS_WINDOWS
17#elif HI_OPERATING_SYSTEM == HI_OS_LINUX
39 constexpr time_stamp_count(uint64_t count, uint32_t aux) noexcept : _count(count), _aux(aux), _thread_id(0) {}
45#if HI_PROCESSOR == HI_CPU_X64
47 _count = __rdtscp(&tmp);
49#error "Not Implemented"
57#if HI_PROCESSOR == HI_CPU_X64
58 _count = __rdtscp(&_aux);
60#error "Not Implemented"
68#if HI_PROCESSOR == HI_CPU_X64 and HI_OPERATING_SYSTEM == HI_OS_WINDOWS
69 constexpr uint64_t NT_TIB_CurrentThreadID = 0x48;
71 _count = __rdtscp(&_aux);
72 _thread_id = __readgsdword(NT_TIB_CurrentThreadID);
74#error "Not Implemented"
92 [[nodiscard]] ssize_t
cpu_id() const noexcept
94 if (_aux_is_cpu_id.load(std::memory_order::relaxed)) {
98 return cpu_id_fallback();
105 [[nodiscard]]
constexpr uint32_t
thread_id() const noexcept
114 [[nodiscard]]
constexpr uint64_t
count() const noexcept
126 using namespace std::chrono_literals;
128 hilet[lo,
hi] = mul_carry(count, _period.load(std::memory_order::relaxed));
129 return 1ns *
static_cast<int64_t
>((
hi << 32) | (lo >> 32));
138 return duration_from_count(_count);
147 [[nodiscard]]
constexpr time_stamp_count operator+(uint64_t rhs)
const noexcept
164 utc_nanoseconds shortest_tp;
168 for (
auto i = 0; i != 10; ++i) {
169 hilet tmp_tsc1 = time_stamp_count::now();
170 hilet tmp_tp = std::chrono::utc_clock::now();
171 hilet tmp_tsc2 = time_stamp_count::now();
173 if (tmp_tsc1.cpu_id() != tmp_tsc2.cpu_id()) {
174 throw os_error(
"CPU Switch detected during get_sample(), which should never happen");
177 if (tmp_tsc1.count() > tmp_tsc2.count()) {
184 hilet diff = tmp_tsc2.count() - tmp_tsc1.count();
186 if (diff < shortest_diff) {
187 shortest_diff = diff;
188 shortest_tp = tmp_tp;
189 shortest_tsc = tmp_tsc1 + (diff / 2);
194 throw os_error(
"Unable to get TSC sample.");
197 return {shortest_tp, shortest_tsc};
210 using namespace std::chrono_literals;
213 hilet prev_mask = set_thread_affinity(current_cpu_id());
215 hilet [tp1, tsc1] = time_stamp_utc_sample();
217 hilet [tp2, tsc2] = time_stamp_utc_sample();
220 set_thread_affinity_mask(prev_mask);
222 if (tsc1._aux != tsc2._aux) {
225 throw os_error(
"CPU Switch detected when measuring the TSC frequency.");
228 if (tsc1.count() >= tsc2.count()) {
233 throw os_error(
"TSC Did not advance during measuring its frequency.");
244 hilet[delta_tsc_lo, delta_tsc_hi] = mul_carry(tsc2.count() - tsc1.count(), uint64_t{1'000'000'000});
245 auto duration = narrow_cast<uint64_t>((tp2 - tp1) / 1ns);
246 return wide_div(delta_tsc_lo, delta_tsc_hi, duration);
249 static void set_frequency(uint64_t frequency)
noexcept
251 hilet period = (uint64_t{1'000'000'000} << 32) / frequency;
252 _period.store(period, std::memory_order_relaxed);
263 hilet frequency = configure_frequency();
264 hilet aux_is_cpu_id = populate_aux_values();
265 return {frequency, aux_is_cpu_id};
309 [[nodiscard]] ssize_t cpu_id_fallback() const noexcept
311 auto aux_value_ = _mm_set1_epi32(_aux);
313 hilet num_aux_values = _num_aux_values.
load(std::memory_order_acquire);
314 hi_assert(_aux_values.
size() == _cpu_ids.
size());
315 hi_assert_bounds(num_aux_values, _aux_values);
317 for (
std::size_t i = 0; i < num_aux_values; i += 4) {
318 hilet row = _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(_aux_values.
data() + i));
319 hilet row_result = _mm_cmpeq_epi32(row, aux_value_);
320 hilet row_result_ = _mm_castsi128_ps(row_result);
321 hilet row_result_mask = _mm_movemask_ps(row_result_);
322 if (to_bool(row_result_mask)) {
323 hilet j = i + std::countr_zero(narrow_cast<unsigned int>(row_result_mask));
324 if (j < num_aux_values) {
335 static bool populate_aux_values()
343 bool aux_is_cpu_id =
true;
347 auto i = _num_aux_values.
load(std::memory_order::acquire);
348 auto tsc = time_stamp_count::now();
349 _aux_values[i] = tsc._aux;
350 _cpu_ids[i] = current_cpu;
351 _num_aux_values.
store(i + 1, std::memory_order::release);
353 if ((tsc._aux & 0xfff) != current_cpu) {
354 aux_is_cpu_id =
false;
357 }
while (next_cpu > current_cpu);
359 _aux_is_cpu_id.
store(aux_is_cpu_id, std::memory_order_relaxed);
363 return aux_is_cpu_id;
365 static uint64_t configure_frequency()
367 using namespace std::chrono_literals;
374 uint64_t frequency = 0;
375 uint64_t num_samples = 0;
376 for (
int i = 0; i != 4; ++i) {
377 hilet f = time_stamp_count::measure_frequency(25ms);
383 if (num_samples == 0) {
384 throw os_error(
"Unable the measure the frequency of the TSC. The UTC time did not advance.");
386 frequency /= num_samples;
388 time_stamp_count::set_frequency(frequency);
std::vector< bool > set_thread_affinity_mask(std::vector< bool > const &mask)
Set the current thread CPU affinity mask.
std::size_t current_cpu_id() noexcept
Get the current CPU id.
std::size_t advance_thread_affinity(std::size_t &cpu) noexcept
Advance thread affinity to the next CPU.
Definition thread_intf.hpp:122
std::vector< bool > set_thread_affinity(std::size_t cpu_id)
Set the current thread CPU affinity to a single CPU.
Definition thread_intf.hpp:104
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
Since Window's 10 QueryPerformanceCounter() counts at only 10MHz which is too low to measure performa...
Definition time_stamp_count.hpp:31
constexpr uint32_t thread_id() const noexcept
Get the thread id.
Definition time_stamp_count.hpp:105
time_stamp_count(time_stamp_count::inplace_with_cpu_id) noexcept
Use a constructor to in-place create the timestamp.
Definition time_stamp_count.hpp:55
static time_stamp_count now() noexcept
Get the current count from the CPU's time stamp count.
Definition time_stamp_count.hpp:81
time_stamp_count(time_stamp_count::inplace) noexcept
Use a constructor to in-place create the timestamp.
Definition time_stamp_count.hpp:43
static std::pair< uint64_t, bool > start_subsystem()
Start the time_stamp_count subsystem.
Definition time_stamp_count.hpp:261
static std::pair< utc_nanoseconds, time_stamp_count > time_stamp_utc_sample()
Get a good quality time sample.
Definition time_stamp_count.hpp:160
ssize_t cpu_id() const noexcept
Get the logical CPU index.
Definition time_stamp_count.hpp:92
time_stamp_count(time_stamp_count::inplace_with_thread_id) noexcept
Use a constructor to in-place create the timestamp.
Definition time_stamp_count.hpp:66
static uint64_t measure_frequency(std::chrono::milliseconds sample_duration)
Measure the frequency of the time_stamp_count.
Definition time_stamp_count.hpp:208
static std::chrono::nanoseconds duration_from_count(uint64_t count) noexcept
Convert a time-stamp count to a duration.
Definition time_stamp_count.hpp:124
std::chrono::nanoseconds time_since_epoch() const noexcept
Convert to nanoseconds since epoch.
Definition time_stamp_count.hpp:136
constexpr uint64_t count() const noexcept
Get the count since epoch.
Definition time_stamp_count.hpp:114
Definition time_stamp_count.hpp:33
Definition time_stamp_count.hpp:34
Definition time_stamp_count.hpp:35