32hi_warning_ignore_msvc(26472)
34hi_export namespace
hi {
43template<std::
unsigned_
integral T>
44[[nodiscard]] hi_force_inline
constexpr T get_bit(T
const *lhs,
std::size_t index)
noexcept
46 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
48 auto const digit_count = index / bits_per_digit;
49 auto const bit_count = index % bits_per_digit;
51 return (lhs[digit_count] >> bit_count) & 1;
61template<std::
unsigned_
integral T>
62constexpr void set_bit(T *r,
std::size_t index, T value = T{1})
noexcept
66 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
68 auto const digit_count = index / bits_per_digit;
69 auto const bit_count = index % bits_per_digit;
72 auto const mask = ~(T{1} << bit_count);
73 r[digit_count] = (r[digit_count] & mask) | value;
82template<std::
unsigned_
integral T>
85 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
86 auto const reverse_count = num_bits - rhs;
88 return {(lhs << rhs) | carry, lhs >> reverse_count};
97template<std::
unsigned_
integral T>
100 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
101 auto const reverse_count = num_bits - rhs;
103 return {(lhs >> rhs) | carry, lhs << reverse_count};
111template<std::
unsigned_
integral T>
114 using S = std::make_signed_t<T>;
116 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
117 auto const reverse_count = num_bits - rhs;
119 return {(
static_cast<S
>(lhs) >> rhs), lhs << reverse_count};
128template<std::
unsigned_
integral T>
129hi_force_inline
constexpr std::pair<T, T> add_carry(T lhs, T rhs, T carry = T{0})
noexcept
131 hi_axiom(carry <= 1);
133 constexpr std::size_t num_bits =
sizeof(T) * CHAR_BIT;
135 if constexpr (has_native_uintxx_v<num_bits * 2>) {
137 using U = make_uintxx_t<num_bits * 2>;
139 auto const r =
static_cast<U
>(lhs) +
static_cast<U
>(rhs) +
static_cast<U
>(carry);
140 return {
static_cast<T
>(r),
static_cast<T
>(r >> num_bits)};
142 }
else if (not std::is_constant_evaluated()) {
143#if HI_COMPILER == HI_CC_MSVC
145 auto const c = _addcarry_u64(
static_cast<unsigned char>(carry), lhs, rhs, &r);
146 return {r,
static_cast<T
>(c)};
151 auto const r =
static_cast<T
>(lhs + rhs + carry);
152 auto const c =
static_cast<T
>(r < lhs or r < rhs);
167template<std::
unsigned_
integral T>
168hi_force_inline
constexpr std::pair<T, T> mul_carry(T lhs, T rhs, T carry = T{0}, T accumulator = T{0})
noexcept
170 constexpr std::size_t num_bits =
sizeof(T) * CHAR_BIT;
172 if constexpr (has_native_uintxx_v<num_bits * 2>) {
174 using U = make_uintxx_t<num_bits * 2>;
176 auto const r =
static_cast<U
>(lhs) *
static_cast<U
>(rhs) +
static_cast<U
>(carry) +
static_cast<U
>(accumulator);
177 return {
static_cast<T
>(r),
static_cast<T
>(r >> num_bits)};
179 }
else if (not std::is_constant_evaluated()) {
180#if HI_COMPILER == HI_CC_MSVC
181 if constexpr (
sizeof(T) == 8) {
183 uint64_t lo = _umul128(lhs, rhs, &
hi);
185 std::tie(lo, c) = add_carry(lo, carry, uint64_t{0});
187 std::tie(lo, c) = add_carry(lo, accumulator, uint64_t{0});
194 constexpr std::size_t num_half_bits = num_bits / 2;
195 constexpr T half_mask = (T{1} << num_half_bits) - T{1};
197 auto const A = lhs >> num_half_bits;
198 auto const B = lhs & half_mask;
199 auto const C = rhs >> num_half_bits;
200 auto const D = rhs & half_mask;
201 auto const AC = A * C;
202 auto const AD = A * D;
203 auto const BC = B * C;
204 auto const BD = B * D;
212 auto const AD_lo = AD << num_half_bits;
213 auto const AD_hi = AD >> num_half_bits;
214 auto const BC_lo = BC << num_half_bits;
215 auto const BC_hi = BC >> num_half_bits;
217 std::tie(lo, c) = add_carry(lo, AD_lo, T{0});
219 std::tie(lo, c) = add_carry(lo, BC_lo, T{0});
223 std::tie(lo, c) = add_carry(lo, carry, T{0});
225 std::tie(lo, c) = add_carry(lo, accumulator, T{0});
239template<std::
unsigned_
integral T>
240hi_force_inline
constexpr T wide_div(T lhs_lo, T lhs_hi, T rhs)
noexcept
242 if constexpr (
sizeof(T) == 1) {
243 auto const lhs =
static_cast<uint16_t
>(lhs_hi) << 8 |
static_cast<uint16_t
>(lhs_lo);
244 return narrow_cast<uint8_t>(lhs / rhs);
246 }
else if constexpr (
sizeof(T) == 2) {
247 auto const lhs =
static_cast<uint32_t
>(lhs_hi) << 16 |
static_cast<uint32_t
>(lhs_lo);
248 return narrow_cast<uint16_t>(lhs / rhs);
250 }
else if constexpr (
sizeof(T) == 4) {
251 auto const lhs =
static_cast<uint64_t
>(lhs_hi) << 32 |
static_cast<uint64_t
>(lhs_lo);
252 return narrow_cast<uint32_t>(lhs / rhs);
254 }
else if constexpr (
sizeof(T) == 8) {
255#if HI_COMPILER == HI_CC_MSVC
257 return _udiv128(lhs_hi, lhs_lo, rhs, &remainder);
259#elif HI_COMPILER == HI_CC_CLANG && HI_STD_LIBRARY == HI_STL_MS
263 :
"+d" (lhs_hi),
"+a" (lhs_lo)
270#elif HI_COMPILER == HI_CC_CLANG || HI_COMPILER == HI_CC_GCC
271 auto const lhs =
static_cast<unsigned __int128
>(lhs_hi) << 64 |
static_cast<unsigned __int128
>(lhs_lo);
272 return static_cast<uint64_t
>(lhs / rhs);
274 hi_axiom(rhs != 0,
"divide by zero.");
276 auto R = uint64_t{0};
277 auto Q_hi = uint64_t{0};
278 for (
auto mask = 0x8000'0000'0000'0000ULL; mask != 0; mask >>= 1) {
280 R |= (lhs_hi & mask) != 0 ? 1 : 0;
282 hi_axiom(R < rhs,
"result overflow");
289 auto Q_lo = uint64_t{0};
290 for (
auto mask = 0x8000'0000'0000'0000ULL; mask != 0; mask >>= 1) {
292 R |= (lhs_lo & mask) != 0 ? 1 : 0;
312template<std::
unsigned_
integral T>
313[[nodiscard]] hi_force_inline
constexpr ssize_t bsr_carry_chain(T
const *lhs,
std::size_t n)
noexcept
315 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
317 for (ssize_t i =
static_cast<ssize_t
>(n) - 1; i >= 0; i--) {
318 auto tmp = std::countl_zero(lhs[i]);
319 if (tmp < bits_per_digit) {
320 return i * bits_per_digit + bits_per_digit - tmp - 1;
333template<std::
unsigned_
integral T>
334hi_force_inline
constexpr void invert_carry_chain(T *r, T
const *rhs,
std::size_t n)
noexcept
348template<std::
unsigned_
integral T>
349hi_force_inline
constexpr void sll_carry_chain(T *r, T
const *lhs,
std::size_t rhs,
std::size_t n)
noexcept
351 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
353 auto const digit_count =
static_cast<ssize_t
>(rhs / bits_per_digit);
354 auto const bit_count = rhs % bits_per_digit;
356 if (r != lhs or digit_count > 0) {
358 for (i =
static_cast<ssize_t
>(n) - 1; i >= digit_count; --i) {
359 r[i] = lhs[i - digit_count];
361 for (; i >= 0; --i) {
369 std::tie(r[i], carry) = sll_carry(r[i], bit_count, carry);
381template<std::
unsigned_
integral T>
382hi_force_inline
constexpr void srl_carry_chain(T *r, T
const *lhs,
std::size_t rhs,
std::size_t n)
noexcept
384 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
386 auto const digit_count = rhs / bits_per_digit;
387 auto const bit_count = rhs % bits_per_digit;
389 if (r != lhs or digit_count > 0) {
391 for (; i != (n - digit_count); ++i) {
392 r[i] = lhs[i + digit_count];
394 for (; i != n; ++i) {
402 for (ssize_t i =
static_cast<ssize_t
>(n) - digit_count - 1; i >= 0; --i) {
403 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
416template<std::
unsigned_
integral T>
417hi_force_inline
constexpr void sra_carry_chain(T *r, T
const *lhs,
std::size_t rhs,
std::size_t n)
noexcept
419 using S = std::make_signed_t<T>;
420 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
422 auto const digit_count = rhs / bits_per_digit;
423 auto const bit_count = rhs % bits_per_digit;
425 if (r != lhs or digit_count > 0) {
426 hi_axiom(digit_count < n);
429 for (; i != (n - digit_count); ++i) {
430 r[i] = lhs[i + digit_count];
434 auto const sign = lhs[n - 1] < 0 ? S{-1} : S{0};
435 for (; i != n; ++i) {
445 ssize_t i =
static_cast<ssize_t
>(n) - digit_count - 1;
446 std::tie(r[i], carry) = sra_carry(r[i], bit_count);
450 for (; i >= 0; --i) {
451 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
463template<std::
unsigned_
integral T>
464hi_force_inline
constexpr void and_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
467 r[i] = lhs[i] & rhs[i];
478template<std::
unsigned_
integral T>
479hi_force_inline
constexpr void or_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
482 r[i] = lhs[i] | rhs[i];
493template<std::
unsigned_
integral T>
494hi_force_inline
constexpr void xor_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
497 r[i] = lhs[i] ^ rhs[i];
501template<std::
unsigned_
integral T>
502[[nodiscard]] hi_force_inline
constexpr bool eq_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
505 if (lhs[i] != rhs[i]) {
512template<std::
unsigned_
integral T>
513[[nodiscard]] hi_force_inline
constexpr bool ne_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
515 return not eq_carry_chain(lhs, rhs, n);
518template<std::
unsigned_
integral T>
519[[nodiscard]] hi_force_inline
constexpr std::strong_ordering
520cmp_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
522 for (ssize_t i =
static_cast<ssize_t
>(n) - 1; i >= 0; --i) {
523 auto const r = lhs[i] <=> rhs[i];
524 if (r != std::strong_ordering::equal) {
528 return std::strong_ordering::equal;
531template<std::
unsigned_
integral T>
532[[nodiscard]] hi_force_inline
constexpr std::strong_ordering cmp_signed_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
534 using S = std::make_signed_t<T>;
538 auto const r =
static_cast<S
>(lhs[n - 1]) <=>
static_cast<S
>(rhs[n - 1]);
539 if (r != std::strong_ordering::equal) {
546 for (ssize_t i =
static_cast<ssize_t
>(n) - 2; i >= 0; --i) {
547 auto const r = lhs[i] <=> rhs[i];
548 if (r != std::strong_ordering::equal) {
552 return std::strong_ordering::equal;
555template<std::
unsigned_
integral T>
556[[nodiscard]] hi_force_inline
constexpr bool lt_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
558 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::less;
561template<std::
unsigned_
integral T>
562[[nodiscard]] hi_force_inline
constexpr bool gt_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
564 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::greater;
567template<std::
unsigned_
integral T>
568[[nodiscard]] hi_force_inline
constexpr bool ge_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
570 return lt_unsigned_carry_chain(rhs, lhs, n);
573template<std::
unsigned_
integral T>
574[[nodiscard]] hi_force_inline
constexpr bool le_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
576 return gt_unsigned_carry_chain(rhs, lhs, n);
586template<std::
unsigned_
integral T>
587hi_force_inline
constexpr void neg_carry_chain(T *r, T
const *rhs,
std::size_t n)
noexcept
591 std::tie(r[i], carry) = add_carry(~rhs[i], T{0}, carry);
602template<std::
unsigned_
integral T>
603hi_force_inline
constexpr void add_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
607 std::tie(r[i], carry) = add_carry(lhs[i], rhs[i], carry);
618template<std::
unsigned_
integral T>
619hi_force_inline
constexpr void sub_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
623 std::tie(r[i], carry) = add_carry(lhs[i], ~rhs[i], carry);
635template<std::
unsigned_
integral T>
636hi_force_inline
constexpr void mul_carry_chain(T *hi_restrict r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
638 hi_axiom(r != lhs and r != rhs);
640 for (
auto rhs_index = 0; rhs_index < n; rhs_index++) {
641 auto const rhs_digit = rhs[rhs_index];
644 for (
auto lhs_index = 0; (lhs_index + rhs_index) < n; lhs_index++) {
645 auto const lhs_digit = lhs[lhs_index];
648 T accumulator = r[rhs_index + lhs_index];
649 std::tie(result, carry) = mul_carry(lhs_digit, rhs_digit, carry, accumulator);
650 r[rhs_index + lhs_index] = result;
665template<std::
unsigned_
integral T>
666constexpr void div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
668 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
669 hi_axiom(remainder != lhs and remainder != rhs);
671 auto const nr_bits =
static_cast<ssize_t
>(n *
sizeof(T) * CHAR_BIT);
673 for (ssize_t i = nr_bits - 1; i >= 0; i--) {
674 sll_carry_chain(remainder, remainder, 1, n);
675 remainder[0] |= get_bit(lhs, i);
676 if (ge_unsigned_carry_chain(remainder, rhs, n)) {
677 sub_carry_chain(remainder, remainder, rhs, n);
678 set_bit(quotient, i);
694template<std::
unsigned_
integral T>
696signed_div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
699 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
700 hi_axiom(remainder != lhs and remainder != rhs);
702 using signed_type = std::make_signed_t<T>;
704 auto const lhs_is_negative =
static_cast<signed_type
>(lhs[n - 1]) < 0;
705 auto const rhs_is_negative =
static_cast<signed_type
>(rhs[n - 1]) < 0;
708 if (lhs_is_negative or rhs_is_negative) {
712 if (lhs_is_negative) {
715 neg_carry_chain(p, lhs, n);
719 if (rhs_is_negative) {
721 T *p = tmp.data() * n;
722 neg_carry_chain(p, rhs, n);
727 div_carry_chain(quotient, remainder, lhs, rhs, n);
729 if (lhs_is_negative != rhs_is_negative) {
731 neg_carry_chain(quotient, quotient, n);
733 if (lhs_is_negative) {
735 neg_carry_chain(remainder, remainder, n);