8#include "../utility/utility.hpp"
9#include "../concurrency/concurrency.hpp"
10#include "../numeric/numeric.hpp"
11#include "../macros.hpp"
19#if HI_OPERATING_SYSTEM == HI_OS_WINDOWS
21#elif HI_OPERATING_SYSTEM == HI_OS_LINUX
25hi_export_module(hikogui.time.time_stamp_count);
28hi_export
namespace hi::inline
v1 {
44 constexpr time_stamp_count(uint64_t count, uint32_t aux) noexcept : _count(count), _aux(aux), _thread_id(0) {}
50#if HI_PROCESSOR == HI_CPU_X86_64
52 _count = __rdtscp(&tmp);
62#if HI_PROCESSOR == HI_CPU_X86_64
63 _count = __rdtscp(&_aux);
74#if HI_PROCESSOR == HI_CPU_X86_64 and HI_OPERATING_SYSTEM == HI_OS_WINDOWS
75 constexpr uint64_t NT_TIB_CurrentThreadID = 0x48;
77 _count = __rdtscp(&_aux);
78 _thread_id = __readgsdword(NT_TIB_CurrentThreadID);
100 [[nodiscard]] ssize_t
cpu_id() const noexcept
102 if (_aux_is_cpu_id.load(std::memory_order::relaxed)) {
106 return cpu_id_fallback();
113 [[nodiscard]]
constexpr uint32_t
thread_id() const noexcept
122 [[nodiscard]]
constexpr uint64_t
count() const noexcept
134 using namespace std::chrono_literals;
136 auto const[lo,
hi] = mul_carry(count, _period.load(std::memory_order::relaxed));
137 return 1ns *
static_cast<int64_t
>((
hi << 32) | (lo >> 32));
146 return duration_from_count(_count);
155 [[nodiscard]]
constexpr time_stamp_count operator+(uint64_t rhs)
const noexcept
172 utc_nanoseconds shortest_tp;
176 for (
auto i = 0; i != 10; ++i) {
177 auto const tmp_tsc1 = time_stamp_count::now();
178 auto const tmp_tp = std::chrono::utc_clock::now();
179 auto const tmp_tsc2 = time_stamp_count::now();
181 if (tmp_tsc1.cpu_id() != tmp_tsc2.cpu_id()) {
182 throw os_error(
"CPU Switch detected during get_sample(), which should never happen");
185 if (tmp_tsc1.count() > tmp_tsc2.count()) {
192 auto const diff = tmp_tsc2.count() - tmp_tsc1.count();
194 if (diff < shortest_diff) {
195 shortest_diff = diff;
196 shortest_tp = tmp_tp;
197 shortest_tsc = tmp_tsc1 + (diff / 2);
202 throw os_error(
"Unable to get TSC sample.");
205 return {shortest_tp, shortest_tsc};
218 using namespace std::chrono_literals;
221 auto const prev_mask = set_thread_affinity(current_cpu_id());
223 auto const [tp1, tsc1] = time_stamp_utc_sample();
225 auto const [tp2, tsc2] = time_stamp_utc_sample();
228 set_thread_affinity_mask(prev_mask);
230 if (tsc1._aux != tsc2._aux) {
233 throw os_error(
"CPU Switch detected when measuring the TSC frequency.");
236 if (tsc1.count() >= tsc2.count()) {
241 throw os_error(
"TSC Did not advance during measuring its frequency.");
252 auto const[delta_tsc_lo, delta_tsc_hi] = mul_carry(tsc2.count() - tsc1.count(), uint64_t{1'000'000'000});
253 auto duration = narrow_cast<uint64_t>((tp2 - tp1) / 1ns);
254 return wide_div(delta_tsc_lo, delta_tsc_hi, duration);
257 static void set_frequency(uint64_t frequency)
noexcept
259 auto const period = (uint64_t{1'000'000'000} << 32) / frequency;
260 _period.store(period, std::memory_order_relaxed);
271 auto const frequency = configure_frequency();
272 auto const aux_is_cpu_id = populate_aux_values();
273 return {frequency, aux_is_cpu_id};
317 [[nodiscard]] ssize_t cpu_id_fallback() const noexcept
319 auto aux_value_ = _mm_set1_epi32(_aux);
321 auto const num_aux_values = _num_aux_values.
load(std::memory_order_acquire);
322 hi_assert(_aux_values.
size() == _cpu_ids.
size());
323 hi_assert_bounds(num_aux_values, _aux_values);
325 for (
std::size_t i = 0; i < num_aux_values; i += 4) {
326 auto const row = _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(_aux_values.
data() + i));
327 auto const row_result = _mm_cmpeq_epi32(row, aux_value_);
328 auto const row_result_ = _mm_castsi128_ps(row_result);
329 auto const row_result_mask = _mm_movemask_ps(row_result_);
330 if (to_bool(row_result_mask)) {
331 auto const j = i + std::countr_zero(narrow_cast<unsigned int>(row_result_mask));
332 if (j < num_aux_values) {
343 static bool populate_aux_values()
351 bool aux_is_cpu_id =
true;
355 auto i = _num_aux_values.
load(std::memory_order::acquire);
356 auto tsc = time_stamp_count::now();
357 _aux_values[i] = tsc._aux;
358 _cpu_ids[i] = current_cpu;
359 _num_aux_values.
store(i + 1, std::memory_order::release);
361 if ((tsc._aux & 0xfff) != current_cpu) {
362 aux_is_cpu_id =
false;
365 }
while (next_cpu > current_cpu);
367 _aux_is_cpu_id.
store(aux_is_cpu_id, std::memory_order_relaxed);
371 return aux_is_cpu_id;
373 static uint64_t configure_frequency()
375 using namespace std::chrono_literals;
382 uint64_t frequency = 0;
383 uint64_t num_samples = 0;
384 for (
int i = 0; i != 4; ++i) {
385 auto const f = time_stamp_count::measure_frequency(25ms);
391 if (num_samples == 0) {
392 throw os_error(
"Unable the measure the frequency of the TSC. The UTC time did not advance.");
394 frequency /= num_samples;
396 time_stamp_count::set_frequency(frequency);
std::vector< bool > set_thread_affinity_mask(std::vector< bool > const &mask)
Set the current thread CPU affinity mask.
std::size_t current_cpu_id() noexcept
Get the current CPU id.
std::size_t advance_thread_affinity(std::size_t &cpu) noexcept
Advance thread affinity to the next CPU.
Definition thread_intf.hpp:121
std::vector< bool > set_thread_affinity(std::size_t cpu_id)
Set the current thread CPU affinity to a single CPU.
Definition thread_intf.hpp:103
The HikoGUI namespace.
Definition array_generic.hpp:20
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
Since Window's 10 QueryPerformanceCounter() counts at only 10MHz which is too low to measure performa...
Definition time_stamp_count.hpp:36
constexpr uint32_t thread_id() const noexcept
Get the thread id.
Definition time_stamp_count.hpp:113
time_stamp_count(time_stamp_count::inplace_with_cpu_id) noexcept
Use a constructor to in-place create the timestamp.
Definition time_stamp_count.hpp:60
static time_stamp_count now() noexcept
Get the current count from the CPU's time stamp count.
Definition time_stamp_count.hpp:89
time_stamp_count(time_stamp_count::inplace) noexcept
Use a constructor to in-place create the timestamp.
Definition time_stamp_count.hpp:48
static std::pair< uint64_t, bool > start_subsystem()
Start the time_stamp_count subsystem.
Definition time_stamp_count.hpp:269
static std::pair< utc_nanoseconds, time_stamp_count > time_stamp_utc_sample()
Get a good quality time sample.
Definition time_stamp_count.hpp:168
ssize_t cpu_id() const noexcept
Get the logical CPU index.
Definition time_stamp_count.hpp:100
time_stamp_count(time_stamp_count::inplace_with_thread_id) noexcept
Use a constructor to in-place create the timestamp.
Definition time_stamp_count.hpp:72
static uint64_t measure_frequency(std::chrono::milliseconds sample_duration)
Measure the frequency of the time_stamp_count.
Definition time_stamp_count.hpp:216
static std::chrono::nanoseconds duration_from_count(uint64_t count) noexcept
Convert a time-stamp count to a duration.
Definition time_stamp_count.hpp:132
std::chrono::nanoseconds time_since_epoch() const noexcept
Convert to nanoseconds since epoch.
Definition time_stamp_count.hpp:144
constexpr uint64_t count() const noexcept
Get the count since epoch.
Definition time_stamp_count.hpp:122
Definition time_stamp_count.hpp:38
Definition time_stamp_count.hpp:39
Definition time_stamp_count.hpp:40