10#include "architecture.hpp"
13 tt_msvc_pragma("warning( disable : 4702 )")
22#if TT_COMPILER == TT_CC_MSVC
25#if TT_PROCESSOR == TT_CPU_X64
38 template<std::
unsigned_
integral T>
39 [[nodiscard]] tt_force_inline
constexpr T get_bit(T
const *lhs,
size_t index)
noexcept
41 constexpr size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
43 ttlet digit_count = index / bits_per_digit;
44 ttlet bit_count = index % bits_per_digit;
46 return (lhs[digit_count] >> bit_count) & 1;
56 template<std::
unsigned_
integral T>
57 constexpr void set_bit(T * r,
size_t index, T value = T{1})
noexcept
61 constexpr size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
63 ttlet digit_count = index / bits_per_digit;
64 ttlet bit_count = index % bits_per_digit;
67 ttlet mask = ~(T{1} << bit_count);
68 r[digit_count] = (r[digit_count] & mask) | value;
77 template<std::
unsigned_
integral T>
78 tt_force_inline
constexpr std::pair<T, T> sll_carry(T lhs,
size_t rhs, T carry = T{0})
noexcept
80 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
81 ttlet reverse_count = num_bits - rhs;
83 return {(lhs << rhs) | carry, lhs >> reverse_count};
92 template<std::
unsigned_
integral T>
93 tt_force_inline
constexpr std::pair<T, T> srl_carry(T lhs,
size_t rhs, T carry = T{0})
noexcept
95 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
96 ttlet reverse_count = num_bits - rhs;
98 return {(lhs >> rhs) | carry, lhs << reverse_count};
106 template<std::
unsigned_
integral T>
107 tt_force_inline
constexpr std::pair<T, T> sra_carry(T lhs,
size_t rhs)
noexcept
109 using S = std::make_signed_t<T>;
111 constexpr auto num_bits =
sizeof(T) * CHAR_BIT;
112 ttlet reverse_count = num_bits - rhs;
114 return {(
static_cast<S
>(lhs) >> rhs), lhs << reverse_count};
123 template<std::
unsigned_
integral T>
124 tt_force_inline
constexpr std::pair<T, T> add_carry(T lhs, T rhs, T carry = T{0})
noexcept
126 tt_axiom(carry <= 1);
128 constexpr size_t num_bits =
sizeof(T) * CHAR_BIT;
130 if constexpr (has_uintxx_v<num_bits * 2>) {
132 using U = make_uintxx_t<num_bits * 2>;
134 ttlet r =
static_cast<U
>(lhs) +
static_cast<U
>(rhs) +
static_cast<U
>(carry);
135 return {
static_cast<T
>(r),
static_cast<T
>(r >> num_bits)};
137 }
else if (not std::is_constant_evaluated()) {
138#if TT_COMPILER == TT_CC_MSVC
140 ttlet c = _addcarry_u64(
static_cast<unsigned char>(carry), lhs, rhs, &r);
141 return {r,
static_cast<T
>(c)};
146 ttlet r =
static_cast<T
>(lhs + rhs + carry);
147 ttlet c =
static_cast<T
>(r < lhs or r < rhs);
162 template<std::
unsigned_
integral T>
163 tt_force_inline
constexpr std::pair<T, T> mul_carry(T lhs, T rhs, T carry = T{0}, T accumulator = T{0})
noexcept
165 constexpr size_t num_bits =
sizeof(T) * CHAR_BIT;
167 if constexpr (has_uintxx_v<num_bits * 2>) {
169 using U = make_uintxx_t<num_bits * 2>;
171 ttlet r =
static_cast<U
>(lhs) *
static_cast<U
>(rhs) +
static_cast<U
>(carry) +
static_cast<U
>(accumulator);
172 return {
static_cast<T
>(r),
static_cast<T
>(r >> num_bits)};
174 }
else if (not std::is_constant_evaluated()) {
175#if TT_COMPILER == TT_CC_MSVC
176 if constexpr (
sizeof(T) == 8) {
178 uint64_t lo = _umul128(lhs, rhs, &hi);
180 std::tie(lo, c) = add_carry(lo, carry, uint64_t{0});
181 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
182 std::tie(lo, c) = add_carry(lo, accumulator, uint64_t{0});
183 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
189 constexpr size_t num_half_bits = num_bits / 2;
190 constexpr T half_mask = (T{1} << num_half_bits) - T{1};
192 ttlet A = lhs >> num_half_bits;
193 ttlet B = lhs & half_mask;
194 ttlet C = rhs >> num_half_bits;
195 ttlet D = rhs & half_mask;
207 ttlet AD_lo = AD << num_half_bits;
208 ttlet AD_hi = AD >> num_half_bits;
209 ttlet BC_lo = BC << num_half_bits;
210 ttlet BC_hi = BC >> num_half_bits;
212 std::tie(lo, c) = add_carry(lo, AD_lo, T{0});
213 std::tie(hi, c) = add_carry(hi, AD_hi, c);
214 std::tie(lo, c) = add_carry(lo, BC_lo, T{0});
215 std::tie(hi, c) = add_carry(hi, BC_hi, c);
218 std::tie(lo, c) = add_carry(lo, carry, T{0});
219 std::tie(hi, c) = add_carry(hi, T{0}, c);
220 std::tie(lo, c) = add_carry(lo, accumulator, T{0});
221 std::tie(hi, c) = add_carry(hi, T{0}, c);
234 template<std::
unsigned_
integral T>
235 tt_force_inline
constexpr T wide_div(T lhs_lo, T lhs_hi, T rhs)
noexcept
237 if constexpr (
sizeof(T) == 1) {
238 ttlet lhs =
static_cast<uint16_t
>(lhs_hi) << 8 |
static_cast<uint16_t
>(lhs_lo);
239 return narrow_cast<uint8_t>(lhs / rhs);
241 }
else if constexpr (
sizeof(T) == 2) {
242 ttlet lhs =
static_cast<uint32_t
>(lhs_hi) << 16 |
static_cast<uint32_t
>(lhs_lo);
243 return narrow_cast<uint16_t>(lhs / rhs);
245 }
else if constexpr (
sizeof(T) == 4) {
246 ttlet lhs =
static_cast<uint64_t
>(lhs_hi) << 32 |
static_cast<uint64_t
>(lhs_lo);
247 return narrow_cast<uint32_t>(lhs / rhs);
249 }
else if constexpr (
sizeof(T) == 8) {
250#if TT_COMPILER == TT_CC_MSVC
252 return _udiv128(lhs_hi, lhs_lo, rhs, &remainder);
254#elif TT_COMPILER == TT_CC_CLANG || TT_COMPILER == TT_CC_GCC
255 ttlet lhs =
static_cast<__uint128_t
>(lhs_hi) << 64 |
static_cast<__uint128_t
>(lhs_lo);
256 return narrow_cast<uint64_t>(lhs / rhs);
258#error "Not implemented"
269 template<std::
unsigned_
integral T>
270 [[nodiscard]] tt_force_inline
constexpr ssize_t bsr_carry_chain(T
const *lhs,
size_t n)
noexcept
272 constexpr size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
274 for (ssize_t i =
static_cast<ssize_t
>(n) - 1; i >= 0; i--) {
275 auto tmp = std::countl_zero(lhs[i]);
276 if (tmp < bits_per_digit) {
277 return i * bits_per_digit + bits_per_digit - tmp - 1;
290 template<std::
unsigned_
integral T>
291 tt_force_inline
constexpr void invert_carry_chain(T * r, T
const *rhs,
size_t n)
noexcept
293 for (
size_t i = 0; i != n; ++i) {
305 template<std::
unsigned_
integral T>
306 tt_force_inline
constexpr void sll_carry_chain(T * r, T
const *lhs,
size_t rhs,
size_t n)
noexcept
308 constexpr size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
310 ttlet digit_count =
static_cast<ssize_t
>(rhs / bits_per_digit);
311 ttlet bit_count = rhs % bits_per_digit;
313 if (r != lhs or digit_count > 0) {
315 for (i =
static_cast<ssize_t
>(n) - 1; i >= digit_count; --i) {
316 r[i] = lhs[i - digit_count];
318 for (; i >= 0; --i) {
325 for (
size_t i = 0; i != n; ++i) {
326 std::tie(r[i], carry) = sll_carry(r[i], bit_count, carry);
338 template<std::
unsigned_
integral T>
339 tt_force_inline
constexpr void srl_carry_chain(T * r, T
const *lhs,
size_t rhs,
size_t n)
noexcept
341 constexpr size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
343 ttlet digit_count = rhs / bits_per_digit;
344 ttlet bit_count = rhs % bits_per_digit;
346 if (r != lhs or digit_count > 0) {
348 for (; i != (n - digit_count); ++i) {
349 r[i] = lhs[i + digit_count];
351 for (; i != n; ++i) {
359 for (ssize_t i =
static_cast<ssize_t
>(n) - digit_count - 1; i >= 0; --i) {
360 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
373 template<std::
unsigned_
integral T>
374 tt_force_inline
constexpr void sra_carry_chain(T * r, T
const *lhs,
size_t rhs,
size_t n)
noexcept
376 using S = std::make_signed_t<T>;
377 constexpr size_t bits_per_digit =
sizeof(T) * CHAR_BIT;
379 ttlet digit_count = rhs / bits_per_digit;
380 ttlet bit_count = rhs % bits_per_digit;
382 if (r != lhs or digit_count > 0) {
383 tt_axiom(digit_count < n);
386 for (; i != (n - digit_count); ++i) {
387 r[i] = lhs[i + digit_count];
391 ttlet sign = lhs[n - 1] < 0 ? S{-1} : S{0};
392 for (; i != n; ++i) {
402 ssize_t i =
static_cast<ssize_t
>(n) - digit_count - 1;
403 std::tie(r[i], carry) = sra_carry(r[i], bit_count);
407 for (; i >= 0; --i) {
408 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
420 template<std::
unsigned_
integral T>
421 tt_force_inline
constexpr void and_carry_chain(T * r, T
const *lhs, T
const *rhs,
size_t n)
noexcept
423 for (
size_t i = 0; i != n; ++i) {
424 r[i] = lhs[i] & rhs[i];
435 template<std::
unsigned_
integral T>
436 tt_force_inline
constexpr void or_carry_chain(T * r, T
const *lhs, T
const *rhs,
size_t n)
noexcept
438 for (
size_t i = 0; i != n; ++i) {
439 r[i] = lhs[i] | rhs[i];
450 template<std::
unsigned_
integral T>
451 tt_force_inline
constexpr void xor_carry_chain(T * r, T
const *lhs, T
const *rhs,
size_t n)
noexcept
453 for (
size_t i = 0; i != n; ++i) {
454 r[i] = lhs[i] ^ rhs[i];
458 template<std::
unsigned_
integral T>
459 [[nodiscard]] tt_force_inline
constexpr bool eq_carry_chain(T
const *lhs, T
const *rhs,
size_t n)
noexcept
461 for (
size_t i = 0; i != n; ++i) {
462 if (lhs[i] != rhs[i]) {
469 template<std::
unsigned_
integral T>
470 [[nodiscard]] tt_force_inline
constexpr bool ne_carry_chain(T
const *lhs, T
const *rhs,
size_t n)
noexcept
472 return not eq_carry_chain(lhs, rhs, n);
475 template<std::
unsigned_
integral T>
476 [[nodiscard]] tt_force_inline
constexpr std::strong_ordering cmp_unsigned_carry_chain(
477 T
const *lhs, T
const *rhs,
size_t n)
noexcept
479 for (ssize_t i =
static_cast<ssize_t
>(n) - 1; i >= 0; --i) {
480 ttlet r = lhs[i] <=> rhs[i];
481 if (r != std::strong_ordering::equal) {
485 return std::strong_ordering::equal;
488 template<std::
unsigned_
integral T>
489 [[nodiscard]] tt_force_inline
constexpr std::strong_ordering cmp_signed_carry_chain(
490 T
const *lhs, T
const *rhs,
size_t n)
noexcept
492 using S = std::make_signed_t<T>;
496 ttlet r =
static_cast<S
>(lhs[n - 1]) <=>
static_cast<S
>(rhs[n - 1]);
497 if (r != std::strong_ordering::equal) {
504 for (ssize_t i =
static_cast<ssize_t
>(n) - 2; i >= 0; --i) {
505 ttlet r = lhs[i] <=> rhs[i];
506 if (r != std::strong_ordering::equal) {
510 return std::strong_ordering::equal;
513 template<std::
unsigned_
integral T>
514 [[nodiscard]] tt_force_inline
constexpr bool lt_unsigned_carry_chain(T
const *lhs, T
const *rhs,
size_t n)
noexcept
516 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::less;
519 template<std::
unsigned_
integral T>
520 [[nodiscard]] tt_force_inline
constexpr bool gt_unsigned_carry_chain(T
const *lhs, T
const *rhs,
size_t n)
noexcept
522 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::greater;
525 template<std::
unsigned_
integral T>
526 [[nodiscard]] tt_force_inline
constexpr bool ge_unsigned_carry_chain(T
const *lhs, T
const *rhs,
size_t n)
noexcept
528 return lt_unsigned_carry_chain(rhs, lhs, n);
531 template<std::
unsigned_
integral T>
532 [[nodiscard]] tt_force_inline
constexpr bool le_unsigned_carry_chain(T
const *lhs, T
const *rhs,
size_t n)
noexcept
534 return gt_unsigned_carry_chain(rhs, lhs, n);
544 template<std::
unsigned_
integral T>
545 tt_force_inline
constexpr void neg_carry_chain(T * r, T
const *rhs,
size_t n)
noexcept
548 for (
size_t i = 0; i != n; ++i) {
549 std::tie(r[i], carry) = add_carry(~rhs[i], T{0}, carry);
560 template<std::
unsigned_
integral T>
561 tt_force_inline
constexpr void add_carry_chain(T * r, T
const *lhs, T
const *rhs,
size_t n)
noexcept
564 for (
size_t i = 0; i != n; ++i) {
565 std::tie(r[i], carry) = add_carry(lhs[i], rhs[i], carry);
576 template<std::
unsigned_
integral T>
577 tt_force_inline
constexpr void sub_carry_chain(T * r, T
const *lhs, T
const *rhs,
size_t n)
noexcept
580 for (
size_t i = 0; i != n; ++i) {
581 std::tie(r[i], carry) = add_carry(lhs[i], ~rhs[i], carry);
593 template<std::
unsigned_
integral T>
594 tt_force_inline
constexpr void mul_carry_chain(T * tt_restrict r, T
const *lhs, T
const *rhs,
size_t n)
noexcept
596 tt_axiom(r != lhs and r != rhs);
598 for (
auto rhs_index = 0; rhs_index < n; rhs_index++) {
599 ttlet rhs_digit = rhs[rhs_index];
602 for (
auto lhs_index = 0; (lhs_index + rhs_index) < n; lhs_index++) {
603 ttlet lhs_digit = lhs[lhs_index];
606 T accumulator = r[rhs_index + lhs_index];
607 std::tie(result, carry) = mul_carry(lhs_digit, rhs_digit, carry, accumulator);
608 r[rhs_index + lhs_index] = result;
623 template<std::
unsigned_
integral T>
624 constexpr void div_carry_chain(
625 T * tt_restrict quotient, T * tt_restrict remainder, T
const *lhs, T
const *rhs,
size_t n)
noexcept
627 tt_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
628 tt_axiom(remainder != lhs and remainder != rhs);
630 ttlet nr_bits =
static_cast<ssize_t
>(n *
sizeof(T) * CHAR_BIT);
632 for (ssize_t i = nr_bits - 1; i >= 0; i--) {
633 sll_carry_chain(remainder, remainder, 1, n);
635 if (ge_unsigned_carry_chain(remainder, rhs, n)) {
636 sub_carry_chain(remainder, remainder, rhs, n);
637 set_bit(quotient, i);
653 template<std::
unsigned_
integral T>
654 constexpr void signed_div_carry_chain(
655 T * tt_restrict quotient, T * tt_restrict remainder, T
const *lhs, T
const *rhs,
size_t n)
noexcept
658 tt_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
659 tt_axiom(remainder != lhs and remainder != rhs);
661 using signed_type = std::make_signed_t<T>;
663 ttlet lhs_is_negative =
static_cast<signed_type
>(lhs[n - 1]) < 0;
664 ttlet rhs_is_negative =
static_cast<signed_type
>(rhs[n - 1]) < 0;
667 if (lhs_is_negative or rhs_is_negative) {
671 if (lhs_is_negative) {
674 neg_carry_chain(p, lhs, n);
678 if (rhs_is_negative) {
680 T *p = tmp.data() * n;
681 neg_carry_chain(p, rhs, n);
686 div_carry_chain(quotient, remainder, lhs, rhs, n);
688 if (lhs_is_negative != rhs_is_negative) {
690 neg_carry_chain(quotient, quotient, n);
692 if (lhs_is_negative) {
694 neg_carry_chain(remainder, remainder, n);