18#if HI_COMPILER == HI_CC_MSVC
21#if HI_PROCESSOR == HI_CPU_X64
27hi_warning_ignore_msvc(4702);
38template<std::
unsigned_
integral T>
39[[nodiscard]] hi_force_inline
constexpr T get_bit(T
const *lhs,
std::size_t index)
noexcept
41 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
43 hilet digit_count = index / bits_per_digit;
44 hilet bit_count = index % bits_per_digit;
46 return (lhs[digit_count] >> bit_count) & 1;
56template<std::
unsigned_
integral T>
57constexpr void set_bit(T *r,
std::size_t index, T value = T{1})
noexcept
61 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
63 hilet digit_count = index / bits_per_digit;
64 hilet bit_count = index % bits_per_digit;
67 hilet mask = ~(T{1} << bit_count);
68 r[digit_count] = (r[digit_count] & mask) | value;
77template<std::
unsigned_
integral T>
80 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
81 hilet reverse_count = num_bits - rhs;
83 return {(lhs << rhs) | carry, lhs >> reverse_count};
92template<std::
unsigned_
integral T>
95 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
96 hilet reverse_count = num_bits - rhs;
98 return {(lhs >> rhs) | carry, lhs << reverse_count};
106template<std::
unsigned_
integral T>
109 using S = std::make_signed_t<T>;
111 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
112 hilet reverse_count = num_bits - rhs;
114 return {(
static_cast<S
>(lhs) >> rhs), lhs << reverse_count};
123template<std::
unsigned_
integral T>
124hi_force_inline
constexpr std::pair<T, T> add_carry(T lhs, T rhs, T carry = T{0})
noexcept
126 hi_axiom(carry <= 1);
128 constexpr std::size_t num_bits =
sizeof(T) * CHAR_BIT;
130 if constexpr (has_uintxx_v<num_bits * 2>) {
132 using U = make_uintxx_t<num_bits * 2>;
134 hilet r =
static_cast<U
>(lhs) +
static_cast<U
>(rhs) +
static_cast<U
>(carry);
135 return {
static_cast<T
>(r),
static_cast<T
>(r >> num_bits)};
137 }
else if (not std::is_constant_evaluated()) {
138#if HI_COMPILER == HI_CC_MSVC
140 hilet c = _addcarry_u64(
static_cast<unsigned char>(carry), lhs, rhs, &r);
141 return {r,
static_cast<T
>(c)};
146 hilet r =
static_cast<T
>(lhs + rhs + carry);
147 hilet c =
static_cast<T
>(r < lhs or r < rhs);
162template<std::
unsigned_
integral T>
163hi_force_inline
constexpr std::pair<T, T> mul_carry(T lhs, T rhs, T carry = T{0}, T accumulator = T{0})
noexcept
165 constexpr std::size_t num_bits =
sizeof(T) * CHAR_BIT;
167 if constexpr (has_uintxx_v<num_bits * 2>) {
169 using U = make_uintxx_t<num_bits * 2>;
171 hilet r =
static_cast<U
>(lhs) *
static_cast<U
>(rhs) +
static_cast<U
>(carry) +
static_cast<U
>(accumulator);
172 return {
static_cast<T
>(r),
static_cast<T
>(r >> num_bits)};
174 }
else if (not std::is_constant_evaluated()) {
175#if HI_COMPILER == HI_CC_MSVC
176 if constexpr (
sizeof(T) == 8) {
178 uint64_t lo = _umul128(lhs, rhs, &hi);
180 std::tie(lo, c) = add_carry(lo, carry, uint64_t{0});
181 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
182 std::tie(lo, c) = add_carry(lo, accumulator, uint64_t{0});
183 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
189 constexpr std::size_t num_half_bits = num_bits / 2;
190 constexpr T half_mask = (T{1} << num_half_bits) - T{1};
192 hilet A = lhs >> num_half_bits;
193 hilet B = lhs & half_mask;
194 hilet C = rhs >> num_half_bits;
195 hilet D = rhs & half_mask;
207 hilet AD_lo = AD << num_half_bits;
208 hilet AD_hi = AD >> num_half_bits;
209 hilet BC_lo = BC << num_half_bits;
210 hilet BC_hi = BC >> num_half_bits;
212 std::tie(lo, c) = add_carry(lo, AD_lo, T{0});
213 std::tie(hi, c) = add_carry(hi, AD_hi, c);
214 std::tie(lo, c) = add_carry(lo, BC_lo, T{0});
215 std::tie(hi, c) = add_carry(hi, BC_hi, c);
218 std::tie(lo, c) = add_carry(lo, carry, T{0});
219 std::tie(hi, c) = add_carry(hi, T{0}, c);
220 std::tie(lo, c) = add_carry(lo, accumulator, T{0});
221 std::tie(hi, c) = add_carry(hi, T{0}, c);
234template<std::
unsigned_
integral T>
235hi_force_inline
constexpr T wide_div(T lhs_lo, T lhs_hi, T rhs)
noexcept
237 if constexpr (
sizeof(T) == 1) {
238 hilet lhs =
static_cast<uint16_t
>(lhs_hi) << 8 |
static_cast<uint16_t
>(lhs_lo);
239 return narrow_cast<uint8_t>(lhs / rhs);
241 }
else if constexpr (
sizeof(T) == 2) {
242 hilet lhs =
static_cast<uint32_t
>(lhs_hi) << 16 |
static_cast<uint32_t
>(lhs_lo);
243 return narrow_cast<uint16_t>(lhs / rhs);
245 }
else if constexpr (
sizeof(T) == 4) {
246 hilet lhs =
static_cast<uint64_t
>(lhs_hi) << 32 |
static_cast<uint64_t
>(lhs_lo);
247 return narrow_cast<uint32_t>(lhs / rhs);
249 }
else if constexpr (
sizeof(T) == 8) {
250#if HI_COMPILER == HI_CC_MSVC
252 return _udiv128(lhs_hi, lhs_lo, rhs, &remainder);
254#elif HI_COMPILER == HI_CC_CLANG || HI_COMPILER == HI_CC_GCC
255 hilet lhs =
static_cast<__uint128_t
>(lhs_hi) << 64 |
static_cast<__uint128_t
>(lhs_lo);
256 return narrow_cast<uint64_t>(lhs / rhs);
258#error "Not implemented"
269template<std::
unsigned_
integral T>
270[[nodiscard]] hi_force_inline
constexpr ssize_t bsr_carry_chain(T
const *lhs,
std::size_t n)
noexcept
272 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
274 for (ssize_t i =
static_cast<ssize_t>(n) - 1; i >= 0; i--) {
275 auto tmp = std::countl_zero(lhs[i]);
276 if (tmp < bits_per_digit) {
277 return i * bits_per_digit + bits_per_digit - tmp - 1;
290template<std::
unsigned_
integral T>
291hi_force_inline
constexpr void invert_carry_chain(T *r, T
const *rhs,
std::size_t n)
noexcept
305template<std::
unsigned_
integral T>
306hi_force_inline
constexpr void sll_carry_chain(T *r, T
const *lhs,
std::size_t rhs,
std::size_t n)
noexcept
308 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
310 hilet digit_count =
static_cast<ssize_t>(rhs / bits_per_digit);
311 hilet bit_count = rhs % bits_per_digit;
313 if (r != lhs or digit_count > 0) {
315 for (i =
static_cast<ssize_t>(n) - 1; i >= digit_count; --i) {
316 r[i] = lhs[i - digit_count];
318 for (; i >= 0; --i) {
326 std::tie(r[i], carry) = sll_carry(r[i], bit_count, carry);
338template<std::
unsigned_
integral T>
339hi_force_inline
constexpr void srl_carry_chain(T *r, T
const *lhs,
std::size_t rhs,
std::size_t n)
noexcept
341 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
343 hilet digit_count = rhs / bits_per_digit;
344 hilet bit_count = rhs % bits_per_digit;
346 if (r != lhs or digit_count > 0) {
348 for (; i != (n - digit_count); ++i) {
349 r[i] = lhs[i + digit_count];
351 for (; i != n; ++i) {
359 for (ssize_t i =
static_cast<ssize_t>(n) - digit_count - 1; i >= 0; --i) {
360 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
373template<std::
unsigned_
integral T>
374hi_force_inline
constexpr void sra_carry_chain(T *r, T
const *lhs,
std::size_t rhs,
std::size_t n)
noexcept
376 using S = std::make_signed_t<T>;
377 constexpr std::size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
379 hilet digit_count = rhs / bits_per_digit;
380 hilet bit_count = rhs % bits_per_digit;
382 if (r != lhs or digit_count > 0) {
383 hi_axiom(digit_count < n);
386 for (; i != (n - digit_count); ++i) {
387 r[i] = lhs[i + digit_count];
391 hilet sign = lhs[n - 1] < 0 ? S{-1} : S{0};
392 for (; i != n; ++i) {
403 std::tie(r[i], carry) = sra_carry(r[i], bit_count);
407 for (; i >= 0; --i) {
408 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
420template<std::
unsigned_
integral T>
421hi_force_inline
constexpr void and_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
424 r[i] = lhs[i] & rhs[i];
435template<std::
unsigned_
integral T>
436hi_force_inline
constexpr void or_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
439 r[i] = lhs[i] | rhs[i];
450template<std::
unsigned_
integral T>
451hi_force_inline
constexpr void xor_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
454 r[i] = lhs[i] ^ rhs[i];
458template<std::
unsigned_
integral T>
459[[nodiscard]] hi_force_inline
constexpr bool eq_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
462 if (lhs[i] != rhs[i]) {
469template<std::
unsigned_
integral T>
470[[nodiscard]] hi_force_inline
constexpr bool ne_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
472 return not eq_carry_chain(lhs, rhs, n);
475template<std::
unsigned_
integral T>
476[[nodiscard]] hi_force_inline
constexpr std::strong_ordering
477cmp_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
479 for (ssize_t i =
static_cast<ssize_t>(n) - 1; i >= 0; --i) {
480 hilet r = lhs[i] <=> rhs[i];
481 if (r != std::strong_ordering::equal) {
485 return std::strong_ordering::equal;
488template<std::
unsigned_
integral T>
489[[nodiscard]] hi_force_inline
constexpr std::strong_ordering cmp_signed_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
491 using S = std::make_signed_t<T>;
495 hilet r =
static_cast<S
>(lhs[n - 1]) <=>
static_cast<S
>(rhs[n - 1]);
496 if (r != std::strong_ordering::equal) {
503 for (ssize_t i =
static_cast<ssize_t>(n) - 2; i >= 0; --i) {
504 hilet r = lhs[i] <=> rhs[i];
505 if (r != std::strong_ordering::equal) {
509 return std::strong_ordering::equal;
512template<std::
unsigned_
integral T>
513[[nodiscard]] hi_force_inline
constexpr bool lt_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
515 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::less;
518template<std::
unsigned_
integral T>
519[[nodiscard]] hi_force_inline
constexpr bool gt_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
521 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::greater;
524template<std::
unsigned_
integral T>
525[[nodiscard]] hi_force_inline
constexpr bool ge_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
527 return lt_unsigned_carry_chain(rhs, lhs, n);
530template<std::
unsigned_
integral T>
531[[nodiscard]] hi_force_inline
constexpr bool le_unsigned_carry_chain(T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
533 return gt_unsigned_carry_chain(rhs, lhs, n);
543template<std::
unsigned_
integral T>
544hi_force_inline
constexpr void neg_carry_chain(T *r, T
const *rhs,
std::size_t n)
noexcept
548 std::tie(r[i], carry) = add_carry(~rhs[i], T{0}, carry);
559template<std::
unsigned_
integral T>
560hi_force_inline
constexpr void add_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
564 std::tie(r[i], carry) = add_carry(lhs[i], rhs[i], carry);
575template<std::
unsigned_
integral T>
576hi_force_inline
constexpr void sub_carry_chain(T *r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
580 std::tie(r[i], carry) = add_carry(lhs[i], ~rhs[i], carry);
592template<std::
unsigned_
integral T>
593hi_force_inline
constexpr void mul_carry_chain(T *hi_restrict r, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
595 hi_axiom(r != lhs and r != rhs);
597 for (
auto rhs_index = 0; rhs_index < n; rhs_index++) {
598 hilet rhs_digit = rhs[rhs_index];
601 for (
auto lhs_index = 0; (lhs_index + rhs_index) < n; lhs_index++) {
602 hilet lhs_digit = lhs[lhs_index];
605 T accumulator = r[rhs_index + lhs_index];
606 std::tie(result, carry) = mul_carry(lhs_digit, rhs_digit, carry, accumulator);
607 r[rhs_index + lhs_index] = result;
622template<std::
unsigned_
integral T>
623constexpr void div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
625 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
626 hi_axiom(remainder != lhs and remainder != rhs);
628 hilet nr_bits =
static_cast<ssize_t>(n *
sizeof(T) * CHAR_BIT);
630 for (ssize_t i = nr_bits - 1; i >= 0; i--) {
631 sll_carry_chain(remainder, remainder, 1, n);
633 if (ge_unsigned_carry_chain(remainder, rhs, n)) {
634 sub_carry_chain(remainder, remainder, rhs, n);
635 set_bit(quotient, i);
651template<std::
unsigned_
integral T>
653signed_div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T
const *lhs, T
const *rhs,
std::size_t n)
noexcept
656 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
657 hi_axiom(remainder != lhs and remainder != rhs);
659 using signed_type = std::make_signed_t<T>;
661 hilet lhs_is_negative =
static_cast<signed_type
>(lhs[n - 1]) < 0;
662 hilet rhs_is_negative =
static_cast<signed_type
>(rhs[n - 1]) < 0;
665 if (lhs_is_negative or rhs_is_negative) {
669 if (lhs_is_negative) {
672 neg_carry_chain(p, lhs, n);
676 if (rhs_is_negative) {
678 T *p = tmp.data() * n;
679 neg_carry_chain(p, rhs, n);
684 div_carry_chain(quotient, remainder, lhs, rhs, n);
686 if (lhs_is_negative != rhs_is_negative) {
688 neg_carry_chain(quotient, quotient, n);
690 if (lhs_is_negative) {
692 neg_carry_chain(remainder, remainder, n);
This file includes required definitions.
std::ptrdiff_t ssize_t
Signed size/index into an array.
Definition required.hpp:162
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
Functions and macros for handling architectural difference between compilers, CPUs and operating syst...