HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
int_carry.hpp
1// Copyright Take Vos 2019-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "required.hpp"
8#include "assert.hpp"
9#include "type_traits.hpp"
10#include "architecture.hpp"
11tt_warning_push()
12 // C4702 unreachable code: Suppressed due intrinsics and std::is_constant_evaluated()
13 tt_msvc_pragma("warning( disable : 4702 )")
14
15#include <complex>
16#include <cmath>
17#include <limits>
18#include <span>
19#include <tuple>
20#include <concepts>
21
22#if TT_COMPILER == TT_CC_MSVC
23#include <intrin.h>
24#endif
25#if TT_PROCESSOR == TT_CPU_X64
26#include <immintrin.h>
27#endif
28
29 namespace tt
30{
38 template<std::unsigned_integral T>
39 [[nodiscard]] tt_force_inline constexpr T get_bit(T const *lhs, size_t index) noexcept
40 {
41 constexpr size_t bits_per_digit = sizeof(T) * CHAR_BIT;
42
43 ttlet digit_count = index / bits_per_digit;
44 ttlet bit_count = index % bits_per_digit;
45
46 return (lhs[digit_count] >> bit_count) & 1;
47 }
48
56 template<std::unsigned_integral T>
57 constexpr void set_bit(T * r, size_t index, T value = T{1}) noexcept
58 {
59 tt_axiom(value <= 1);
60
61 constexpr size_t bits_per_digit = sizeof(T) * CHAR_BIT;
62
63 ttlet digit_count = index / bits_per_digit;
64 ttlet bit_count = index % bits_per_digit;
65
66 value <<= bit_count;
67 ttlet mask = ~(T{1} << bit_count);
68 r[digit_count] = (r[digit_count] & mask) | value;
69 }
70
77 template<std::unsigned_integral T>
78 tt_force_inline constexpr std::pair<T, T> sll_carry(T lhs, size_t rhs, T carry = T{0}) noexcept
79 {
80 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
81 ttlet reverse_count = num_bits - rhs;
82
83 return {(lhs << rhs) | carry, lhs >> reverse_count};
84 }
85
92 template<std::unsigned_integral T>
93 tt_force_inline constexpr std::pair<T, T> srl_carry(T lhs, size_t rhs, T carry = T{0}) noexcept
94 {
95 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
96 ttlet reverse_count = num_bits - rhs;
97
98 return {(lhs >> rhs) | carry, lhs << reverse_count};
99 }
100
106 template<std::unsigned_integral T>
107 tt_force_inline constexpr std::pair<T, T> sra_carry(T lhs, size_t rhs) noexcept
108 {
109 using S = std::make_signed_t<T>;
110
111 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
112 ttlet reverse_count = num_bits - rhs;
113
114 return {(static_cast<S>(lhs) >> rhs), lhs << reverse_count};
115 }
116
123 template<std::unsigned_integral T>
124 tt_force_inline constexpr std::pair<T, T> add_carry(T lhs, T rhs, T carry = T{0}) noexcept
125 {
126 tt_axiom(carry <= 1);
127
128 constexpr size_t num_bits = sizeof(T) * CHAR_BIT;
129
130 if constexpr (has_uintxx_v<num_bits * 2>) {
131 // We can use a native type that has double the size.
132 using U = make_uintxx_t<num_bits * 2>;
133
134 ttlet r = static_cast<U>(lhs) + static_cast<U>(rhs) + static_cast<U>(carry);
135 return {static_cast<T>(r), static_cast<T>(r >> num_bits)};
136
137 } else if (not std::is_constant_evaluated()) {
138#if TT_COMPILER == TT_CC_MSVC
139 uint64_t r;
140 ttlet c = _addcarry_u64(static_cast<unsigned char>(carry), lhs, rhs, &r);
141 return {r, static_cast<T>(c)};
142#endif
143 }
144
145 // Carry can directly be added the sum without a double overflow.
146 ttlet r = static_cast<T>(lhs + rhs + carry);
147 ttlet c = static_cast<T>(r < lhs or r < rhs);
148 return {r, c};
149 }
150
162 template<std::unsigned_integral T>
163 tt_force_inline constexpr std::pair<T, T> mul_carry(T lhs, T rhs, T carry = T{0}, T accumulator = T{0}) noexcept
164 {
165 constexpr size_t num_bits = sizeof(T) * CHAR_BIT;
166
167 if constexpr (has_uintxx_v<num_bits * 2>) {
168 // We can use a native type that has double the size.
169 using U = make_uintxx_t<num_bits * 2>;
170
171 ttlet r = static_cast<U>(lhs) * static_cast<U>(rhs) + static_cast<U>(carry) + static_cast<U>(accumulator);
172 return {static_cast<T>(r), static_cast<T>(r >> num_bits)};
173
174 } else if (not std::is_constant_evaluated()) {
175#if TT_COMPILER == TT_CC_MSVC
176 if constexpr (sizeof(T) == 8) {
177 uint64_t hi = 0;
178 uint64_t lo = _umul128(lhs, rhs, &hi);
179 uint64_t c = 0;
180 std::tie(lo, c) = add_carry(lo, carry, uint64_t{0});
181 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
182 std::tie(lo, c) = add_carry(lo, accumulator, uint64_t{0});
183 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
184 return {lo, hi};
185 }
186#endif
187 }
188
189 constexpr size_t num_half_bits = num_bits / 2;
190 constexpr T half_mask = (T{1} << num_half_bits) - T{1};
191
192 ttlet A = lhs >> num_half_bits;
193 ttlet B = lhs & half_mask;
194 ttlet C = rhs >> num_half_bits;
195 ttlet D = rhs & half_mask;
196 ttlet AC = A * C;
197 ttlet AD = A * D;
198 ttlet BC = B * C;
199 ttlet BD = B * D;
200
201 // Provisional result.
202 auto hi = AC;
203 auto lo = BD;
204 auto c = T{0};
205
206 // AD and BC are shifted half way across the lo and hi of the result.
207 ttlet AD_lo = AD << num_half_bits;
208 ttlet AD_hi = AD >> num_half_bits;
209 ttlet BC_lo = BC << num_half_bits;
210 ttlet BC_hi = BC >> num_half_bits;
211
212 std::tie(lo, c) = add_carry(lo, AD_lo, T{0});
213 std::tie(hi, c) = add_carry(hi, AD_hi, c);
214 std::tie(lo, c) = add_carry(lo, BC_lo, T{0});
215 std::tie(hi, c) = add_carry(hi, BC_hi, c);
216
217 // Now add the carry and accumulator arguments.
218 std::tie(lo, c) = add_carry(lo, carry, T{0});
219 std::tie(hi, c) = add_carry(hi, T{0}, c);
220 std::tie(lo, c) = add_carry(lo, accumulator, T{0});
221 std::tie(hi, c) = add_carry(hi, T{0}, c);
222 return {lo, hi};
223 }
224
234 template<std::unsigned_integral T>
235 tt_force_inline constexpr T wide_div(T lhs_lo, T lhs_hi, T rhs) noexcept
236 {
237 if constexpr (sizeof(T) == 1) {
238 ttlet lhs = static_cast<uint16_t>(lhs_hi) << 8 | static_cast<uint16_t>(lhs_lo);
239 return narrow_cast<uint8_t>(lhs / rhs);
240
241 } else if constexpr (sizeof(T) == 2) {
242 ttlet lhs = static_cast<uint32_t>(lhs_hi) << 16 | static_cast<uint32_t>(lhs_lo);
243 return narrow_cast<uint16_t>(lhs / rhs);
244
245 } else if constexpr (sizeof(T) == 4) {
246 ttlet lhs = static_cast<uint64_t>(lhs_hi) << 32 | static_cast<uint64_t>(lhs_lo);
247 return narrow_cast<uint32_t>(lhs / rhs);
248
249 } else if constexpr (sizeof(T) == 8) {
250#if TT_COMPILER == TT_CC_MSVC
251 uint64_t remainder;
252 return _udiv128(lhs_hi, lhs_lo, rhs, &remainder);
253
254#elif TT_COMPILER == TT_CC_CLANG || TT_COMPILER == TT_CC_GCC
255 ttlet lhs = static_cast<__uint128_t>(lhs_hi) << 64 | static_cast<__uint128_t>(lhs_lo);
256 return narrow_cast<uint64_t>(lhs / rhs);
257#else
258#error "Not implemented"
259#endif
260 }
261 }
262
269 template<std::unsigned_integral T>
270 [[nodiscard]] tt_force_inline constexpr ssize_t bsr_carry_chain(T const *lhs, size_t n) noexcept
271 {
272 constexpr size_t bits_per_digit = sizeof(T) * CHAR_BIT;
273
274 for (ssize_t i = static_cast<ssize_t>(n) - 1; i >= 0; i--) {
275 auto tmp = std::countl_zero(lhs[i]);
276 if (tmp < bits_per_digit) {
277 return i * bits_per_digit + bits_per_digit - tmp - 1;
278 }
279 }
280 return -1;
281 }
282
290 template<std::unsigned_integral T>
291 tt_force_inline constexpr void invert_carry_chain(T * r, T const *rhs, size_t n) noexcept
292 {
293 for (size_t i = 0; i != n; ++i) {
294 r[i] = ~rhs[i];
295 }
296 }
297
305 template<std::unsigned_integral T>
306 tt_force_inline constexpr void sll_carry_chain(T * r, T const *lhs, size_t rhs, size_t n) noexcept
307 {
308 constexpr size_t bits_per_digit = sizeof(T) * CHAR_BIT;
309
310 ttlet digit_count = static_cast<ssize_t>(rhs / bits_per_digit);
311 ttlet bit_count = rhs % bits_per_digit;
312
313 if (r != lhs or digit_count > 0) {
314 ssize_t i;
315 for (i = static_cast<ssize_t>(n) - 1; i >= digit_count; --i) {
316 r[i] = lhs[i - digit_count];
317 }
318 for (; i >= 0; --i) {
319 r[i] = T{0};
320 }
321 }
322
323 if (bit_count > 0) {
324 auto carry = T{0};
325 for (size_t i = 0; i != n; ++i) {
326 std::tie(r[i], carry) = sll_carry(r[i], bit_count, carry);
327 }
328 }
329 }
330
338 template<std::unsigned_integral T>
339 tt_force_inline constexpr void srl_carry_chain(T * r, T const *lhs, size_t rhs, size_t n) noexcept
340 {
341 constexpr size_t bits_per_digit = sizeof(T) * CHAR_BIT;
342
343 ttlet digit_count = rhs / bits_per_digit;
344 ttlet bit_count = rhs % bits_per_digit;
345
346 if (r != lhs or digit_count > 0) {
347 size_t i = 0;
348 for (; i != (n - digit_count); ++i) {
349 r[i] = lhs[i + digit_count];
350 }
351 for (; i != n; ++i) {
352 r[i] = T{0};
353 }
354 }
355
356 if (bit_count > 0) {
357 auto carry = T{0};
358
359 for (ssize_t i = static_cast<ssize_t>(n) - digit_count - 1; i >= 0; --i) {
360 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
361 }
362 }
363 }
364
373 template<std::unsigned_integral T>
374 tt_force_inline constexpr void sra_carry_chain(T * r, T const *lhs, size_t rhs, size_t n) noexcept
375 {
376 using S = std::make_signed_t<T>;
377 constexpr size_t bits_per_digit = sizeof(T) * CHAR_BIT;
378
379 ttlet digit_count = rhs / bits_per_digit;
380 ttlet bit_count = rhs % bits_per_digit;
381
382 if (r != lhs or digit_count > 0) {
383 tt_axiom(digit_count < n);
384
385 size_t i = 0;
386 for (; i != (n - digit_count); ++i) {
387 r[i] = lhs[i + digit_count];
388 }
389
390 // Sign extent the digits that are unused after a large shift.
391 ttlet sign = lhs[n - 1] < 0 ? S{-1} : S{0};
392 for (; i != n; ++i) {
393 r[i] = sign;
394 }
395 }
396
397 if (bit_count > 0) {
398 tt_axiom(n > 0);
399 auto carry = T{};
400
401 // The most significant digit is sign extended.
402 ssize_t i = static_cast<ssize_t>(n) - digit_count - 1;
403 std::tie(r[i], carry) = sra_carry(r[i], bit_count);
404 --i;
405
406 // The rest of the digits pass through the carry.
407 for (; i >= 0; --i) {
408 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
409 }
410 }
411 }
412
420 template<std::unsigned_integral T>
421 tt_force_inline constexpr void and_carry_chain(T * r, T const *lhs, T const *rhs, size_t n) noexcept
422 {
423 for (size_t i = 0; i != n; ++i) {
424 r[i] = lhs[i] & rhs[i];
425 }
426 }
427
435 template<std::unsigned_integral T>
436 tt_force_inline constexpr void or_carry_chain(T * r, T const *lhs, T const *rhs, size_t n) noexcept
437 {
438 for (size_t i = 0; i != n; ++i) {
439 r[i] = lhs[i] | rhs[i];
440 }
441 }
442
450 template<std::unsigned_integral T>
451 tt_force_inline constexpr void xor_carry_chain(T * r, T const *lhs, T const *rhs, size_t n) noexcept
452 {
453 for (size_t i = 0; i != n; ++i) {
454 r[i] = lhs[i] ^ rhs[i];
455 }
456 }
457
458 template<std::unsigned_integral T>
459 [[nodiscard]] tt_force_inline constexpr bool eq_carry_chain(T const *lhs, T const *rhs, size_t n) noexcept
460 {
461 for (size_t i = 0; i != n; ++i) {
462 if (lhs[i] != rhs[i]) {
463 return false;
464 }
465 }
466 return true;
467 }
468
469 template<std::unsigned_integral T>
470 [[nodiscard]] tt_force_inline constexpr bool ne_carry_chain(T const *lhs, T const *rhs, size_t n) noexcept
471 {
472 return not eq_carry_chain(lhs, rhs, n);
473 }
474
475 template<std::unsigned_integral T>
476 [[nodiscard]] tt_force_inline constexpr std::strong_ordering cmp_unsigned_carry_chain(
477 T const *lhs, T const *rhs, size_t n) noexcept
478 {
479 for (ssize_t i = static_cast<ssize_t>(n) - 1; i >= 0; --i) {
480 ttlet r = lhs[i] <=> rhs[i];
481 if (r != std::strong_ordering::equal) {
482 return r;
483 }
484 }
485 return std::strong_ordering::equal;
486 }
487
488 template<std::unsigned_integral T>
489 [[nodiscard]] tt_force_inline constexpr std::strong_ordering cmp_signed_carry_chain(
490 T const *lhs, T const *rhs, size_t n) noexcept
491 {
492 using S = std::make_signed_t<T>;
493
494 // Compare the ms-digit using signed comparison, because it includes the sign-bit
495 if (n > 0) {
496 ttlet r = static_cast<S>(lhs[n - 1]) <=> static_cast<S>(rhs[n - 1]);
497 if (r != std::strong_ordering::equal) {
498 return r;
499 }
500 }
501
502 // At this point both values have the same sign, and since the rest of the digits
503 // do not have a sign bit, use unsigned comparison.
504 for (ssize_t i = static_cast<ssize_t>(n) - 2; i >= 0; --i) {
505 ttlet r = lhs[i] <=> rhs[i];
506 if (r != std::strong_ordering::equal) {
507 return r;
508 }
509 }
510 return std::strong_ordering::equal;
511 }
512
513 template<std::unsigned_integral T>
514 [[nodiscard]] tt_force_inline constexpr bool lt_unsigned_carry_chain(T const *lhs, T const *rhs, size_t n) noexcept
515 {
516 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::less;
517 }
518
519 template<std::unsigned_integral T>
520 [[nodiscard]] tt_force_inline constexpr bool gt_unsigned_carry_chain(T const *lhs, T const *rhs, size_t n) noexcept
521 {
522 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::greater;
523 }
524
525 template<std::unsigned_integral T>
526 [[nodiscard]] tt_force_inline constexpr bool ge_unsigned_carry_chain(T const *lhs, T const *rhs, size_t n) noexcept
527 {
528 return lt_unsigned_carry_chain(rhs, lhs, n);
529 }
530
531 template<std::unsigned_integral T>
532 [[nodiscard]] tt_force_inline constexpr bool le_unsigned_carry_chain(T const *lhs, T const *rhs, size_t n) noexcept
533 {
534 return gt_unsigned_carry_chain(rhs, lhs, n);
535 }
536
544 template<std::unsigned_integral T>
545 tt_force_inline constexpr void neg_carry_chain(T * r, T const *rhs, size_t n) noexcept
546 {
547 auto carry = T{1};
548 for (size_t i = 0; i != n; ++i) {
549 std::tie(r[i], carry) = add_carry(~rhs[i], T{0}, carry);
550 }
551 }
552
560 template<std::unsigned_integral T>
561 tt_force_inline constexpr void add_carry_chain(T * r, T const *lhs, T const *rhs, size_t n) noexcept
562 {
563 auto carry = T{0};
564 for (size_t i = 0; i != n; ++i) {
565 std::tie(r[i], carry) = add_carry(lhs[i], rhs[i], carry);
566 }
567 }
568
576 template<std::unsigned_integral T>
577 tt_force_inline constexpr void sub_carry_chain(T * r, T const *lhs, T const *rhs, size_t n) noexcept
578 {
579 auto carry = T{1};
580 for (size_t i = 0; i != n; ++i) {
581 std::tie(r[i], carry) = add_carry(lhs[i], ~rhs[i], carry);
582 }
583 }
584
593 template<std::unsigned_integral T>
594 tt_force_inline constexpr void mul_carry_chain(T * tt_restrict r, T const *lhs, T const *rhs, size_t n) noexcept
595 {
596 tt_axiom(r != lhs and r != rhs);
597
598 for (auto rhs_index = 0; rhs_index < n; rhs_index++) {
599 ttlet rhs_digit = rhs[rhs_index];
600
601 T carry = 0;
602 for (auto lhs_index = 0; (lhs_index + rhs_index) < n; lhs_index++) {
603 ttlet lhs_digit = lhs[lhs_index];
604
605 T result;
606 T accumulator = r[rhs_index + lhs_index];
607 std::tie(result, carry) = mul_carry(lhs_digit, rhs_digit, carry, accumulator);
608 r[rhs_index + lhs_index] = result;
609 }
610 }
611 }
612
623 template<std::unsigned_integral T>
624 constexpr void div_carry_chain(
625 T * tt_restrict quotient, T * tt_restrict remainder, T const *lhs, T const *rhs, size_t n) noexcept
626 {
627 tt_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
628 tt_axiom(remainder != lhs and remainder != rhs);
629
630 ttlet nr_bits = static_cast<ssize_t>(n * sizeof(T) * CHAR_BIT);
631
632 for (ssize_t i = nr_bits - 1; i >= 0; i--) {
633 sll_carry_chain(remainder, remainder, 1, n);
634 remainder[0] |= get_bit(lhs, i);
635 if (ge_unsigned_carry_chain(remainder, rhs, n)) {
636 sub_carry_chain(remainder, remainder, rhs, n);
637 set_bit(quotient, i);
638 }
639 }
640 }
641
653 template<std::unsigned_integral T>
654 constexpr void signed_div_carry_chain(
655 T * tt_restrict quotient, T * tt_restrict remainder, T const *lhs, T const *rhs, size_t n) noexcept
656 {
657 tt_axiom(n > 0);
658 tt_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
659 tt_axiom(remainder != lhs and remainder != rhs);
660
661 using signed_type = std::make_signed_t<T>;
662
663 ttlet lhs_is_negative = static_cast<signed_type>(lhs[n - 1]) < 0;
664 ttlet rhs_is_negative = static_cast<signed_type>(rhs[n - 1]) < 0;
665
666 auto tmp = std::vector<T>{};
667 if (lhs_is_negative or rhs_is_negative) {
668 // Allocate room for negative lhs and rhs together, so only one allocation is needed.
669 tmp.resize(n * 2);
670
671 if (lhs_is_negative) {
672 // Negate lhs and point it to the tmp array.
673 T *p = tmp.data();
674 neg_carry_chain(p, lhs, n);
675 lhs = p;
676 }
677
678 if (rhs_is_negative) {
679 // Negate rhs and point it to the tmp array.
680 T *p = tmp.data() * n;
681 neg_carry_chain(p, rhs, n);
682 rhs = p;
683 }
684 }
685
686 div_carry_chain(quotient, remainder, lhs, rhs, n);
687
688 if (lhs_is_negative != rhs_is_negative) {
689 // Negate the result if the sign of lhs and rhs are different.
690 neg_carry_chain(quotient, quotient, n);
691 }
692 if (lhs_is_negative) {
693 // Remainder has same sign as divisor.
694 neg_carry_chain(remainder, remainder, n);
695 }
696 }
697
698} // namespace tt
699
700tt_warning_pop()
T remainder(T... args)
T resize(T... args)
T tie(T... args)