HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
int_carry.hpp
1// Copyright Take Vos 2019, 2021-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "utility/module.hpp"
8#include <complex>
9#include <cmath>
10#include <limits>
11#include <span>
12#include <tuple>
13#include <concepts>
14
15#if HI_COMPILER == HI_CC_MSVC
16#include <intrin.h>
17#endif
18#if HI_PROCESSOR == HI_CPU_X64
19#include <immintrin.h>
20#endif
21
22hi_warning_push();
23// C4702 unreachable code: Suppressed due intrinsics and std::is_constant_evaluated()
24hi_warning_ignore_msvc(4702);
25// C26472: Don't use static_cast for arithmetic conversions... (type.1)
26// int_carry does many static_casts on purpose.
27hi_warning_ignore_msvc(26472)
28
29namespace hi {
30
38template<std::unsigned_integral T>
39[[nodiscard]] hi_force_inline constexpr T get_bit(T const *lhs, std::size_t index) noexcept
40{
41 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
42
43 hilet digit_count = index / bits_per_digit;
44 hilet bit_count = index % bits_per_digit;
45
46 return (lhs[digit_count] >> bit_count) & 1;
47}
48
56template<std::unsigned_integral T>
57constexpr void set_bit(T *r, std::size_t index, T value = T{1}) noexcept
58{
59 hi_axiom(value <= 1);
60
61 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
62
63 hilet digit_count = index / bits_per_digit;
64 hilet bit_count = index % bits_per_digit;
65
66 value <<= bit_count;
67 hilet mask = ~(T{1} << bit_count);
68 r[digit_count] = (r[digit_count] & mask) | value;
69}
70
77template<std::unsigned_integral T>
78hi_force_inline constexpr std::pair<T, T> sll_carry(T lhs, std::size_t rhs, T carry = T{0}) noexcept
79{
80 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
81 hilet reverse_count = num_bits - rhs;
82
83 return {(lhs << rhs) | carry, lhs >> reverse_count};
84}
85
92template<std::unsigned_integral T>
93hi_force_inline constexpr std::pair<T, T> srl_carry(T lhs, std::size_t rhs, T carry = T{0}) noexcept
94{
95 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
96 hilet reverse_count = num_bits - rhs;
97
98 return {(lhs >> rhs) | carry, lhs << reverse_count};
99}
100
106template<std::unsigned_integral T>
107hi_force_inline constexpr std::pair<T, T> sra_carry(T lhs, std::size_t rhs) noexcept
108{
109 using S = std::make_signed_t<T>;
110
111 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
112 hilet reverse_count = num_bits - rhs;
113
114 return {(static_cast<S>(lhs) >> rhs), lhs << reverse_count};
115}
116
123template<std::unsigned_integral T>
124hi_force_inline constexpr std::pair<T, T> add_carry(T lhs, T rhs, T carry = T{0}) noexcept
125{
126 hi_axiom(carry <= 1);
127
128 constexpr std::size_t num_bits = sizeof(T) * CHAR_BIT;
129
130 if constexpr (has_native_uintxx_v<num_bits * 2>) {
131 // We can use a native type that has double the size.
132 using U = make_uintxx_t<num_bits * 2>;
133
134 hilet r = static_cast<U>(lhs) + static_cast<U>(rhs) + static_cast<U>(carry);
135 return {static_cast<T>(r), static_cast<T>(r >> num_bits)};
136
137 } else if (not std::is_constant_evaluated()) {
138#if HI_COMPILER == HI_CC_MSVC
139 uint64_t r;
140 hilet c = _addcarry_u64(static_cast<unsigned char>(carry), lhs, rhs, &r);
141 return {r, static_cast<T>(c)};
142#endif
143 }
144
145 // Carry can directly be added the sum without a double overflow.
146 hilet r = static_cast<T>(lhs + rhs + carry);
147 hilet c = static_cast<T>(r < lhs or r < rhs);
148 return {r, c};
149}
150
162template<std::unsigned_integral T>
163hi_force_inline constexpr std::pair<T, T> mul_carry(T lhs, T rhs, T carry = T{0}, T accumulator = T{0}) noexcept
164{
165 constexpr std::size_t num_bits = sizeof(T) * CHAR_BIT;
166
167 if constexpr (has_native_uintxx_v<num_bits * 2>) {
168 // We can use a native type that has double the size.
169 using U = make_uintxx_t<num_bits * 2>;
170
171 hilet r = static_cast<U>(lhs) * static_cast<U>(rhs) + static_cast<U>(carry) + static_cast<U>(accumulator);
172 return {static_cast<T>(r), static_cast<T>(r >> num_bits)};
173
174 } else if (not std::is_constant_evaluated()) {
175#if HI_COMPILER == HI_CC_MSVC
176 if constexpr (sizeof(T) == 8) {
177 uint64_t hi = 0;
178 uint64_t lo = _umul128(lhs, rhs, &hi);
179 uint64_t c = 0;
180 std::tie(lo, c) = add_carry(lo, carry, uint64_t{0});
181 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
182 std::tie(lo, c) = add_carry(lo, accumulator, uint64_t{0});
183 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
184 return {lo, hi};
185 }
186#endif
187 }
188
189 constexpr std::size_t num_half_bits = num_bits / 2;
190 constexpr T half_mask = (T{1} << num_half_bits) - T{1};
191
192 hilet A = lhs >> num_half_bits;
193 hilet B = lhs & half_mask;
194 hilet C = rhs >> num_half_bits;
195 hilet D = rhs & half_mask;
196 hilet AC = A * C;
197 hilet AD = A * D;
198 hilet BC = B * C;
199 hilet BD = B * D;
200
201 // Provisional result.
202 auto hi = AC;
203 auto lo = BD;
204 auto c = T{0};
205
206 // AD and BC are shifted half way across the lo and hi of the result.
207 hilet AD_lo = AD << num_half_bits;
208 hilet AD_hi = AD >> num_half_bits;
209 hilet BC_lo = BC << num_half_bits;
210 hilet BC_hi = BC >> num_half_bits;
211
212 std::tie(lo, c) = add_carry(lo, AD_lo, T{0});
213 std::tie(hi, c) = add_carry(hi, AD_hi, c);
214 std::tie(lo, c) = add_carry(lo, BC_lo, T{0});
215 std::tie(hi, c) = add_carry(hi, BC_hi, c);
216
217 // Now add the carry and accumulator arguments.
218 std::tie(lo, c) = add_carry(lo, carry, T{0});
219 std::tie(hi, c) = add_carry(hi, T{0}, c);
220 std::tie(lo, c) = add_carry(lo, accumulator, T{0});
221 std::tie(hi, c) = add_carry(hi, T{0}, c);
222 return {lo, hi};
223}
224
234template<std::unsigned_integral T>
235hi_force_inline constexpr T wide_div(T lhs_lo, T lhs_hi, T rhs) noexcept
236{
237 if constexpr (sizeof(T) == 1) {
238 hilet lhs = static_cast<uint16_t>(lhs_hi) << 8 | static_cast<uint16_t>(lhs_lo);
239 return narrow_cast<uint8_t>(lhs / rhs);
240
241 } else if constexpr (sizeof(T) == 2) {
242 hilet lhs = static_cast<uint32_t>(lhs_hi) << 16 | static_cast<uint32_t>(lhs_lo);
243 return narrow_cast<uint16_t>(lhs / rhs);
244
245 } else if constexpr (sizeof(T) == 4) {
246 hilet lhs = static_cast<uint64_t>(lhs_hi) << 32 | static_cast<uint64_t>(lhs_lo);
247 return narrow_cast<uint32_t>(lhs / rhs);
248
249 } else if constexpr (sizeof(T) == 8) {
250#if HI_COMPILER == HI_CC_MSVC
251 uint64_t remainder;
252 return _udiv128(lhs_hi, lhs_lo, rhs, &remainder);
253
254#elif HI_COMPILER == HI_CC_CLANG || HI_COMPILER == HI_CC_GCC
255 hilet lhs = static_cast<__uint128_t>(lhs_hi) << 64 | static_cast<__uint128_t>(lhs_lo);
256 return narrow_cast<uint64_t>(lhs / rhs);
257#else
258#error "Not implemented"
259#endif
260 }
261}
262
269template<std::unsigned_integral T>
270[[nodiscard]] hi_force_inline constexpr ssize_t bsr_carry_chain(T const *lhs, std::size_t n) noexcept
271{
272 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
273
274 for (ssize_t i = static_cast<ssize_t>(n) - 1; i >= 0; i--) {
275 auto tmp = std::countl_zero(lhs[i]);
276 if (tmp < bits_per_digit) {
277 return i * bits_per_digit + bits_per_digit - tmp - 1;
278 }
279 }
280 return -1;
281}
282
290template<std::unsigned_integral T>
291hi_force_inline constexpr void invert_carry_chain(T *r, T const *rhs, std::size_t n) noexcept
292{
293 for (std::size_t i = 0; i != n; ++i) {
294 r[i] = ~rhs[i];
295 }
296}
297
305template<std::unsigned_integral T>
306hi_force_inline constexpr void sll_carry_chain(T *r, T const *lhs, std::size_t rhs, std::size_t n) noexcept
307{
308 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
309
310 hilet digit_count = static_cast<ssize_t>(rhs / bits_per_digit);
311 hilet bit_count = rhs % bits_per_digit;
312
313 if (r != lhs or digit_count > 0) {
314 ssize_t i;
315 for (i = static_cast<ssize_t>(n) - 1; i >= digit_count; --i) {
316 r[i] = lhs[i - digit_count];
317 }
318 for (; i >= 0; --i) {
319 r[i] = T{0};
320 }
321 }
322
323 if (bit_count > 0) {
324 auto carry = T{0};
325 for (std::size_t i = 0; i != n; ++i) {
326 std::tie(r[i], carry) = sll_carry(r[i], bit_count, carry);
327 }
328 }
329}
330
338template<std::unsigned_integral T>
339hi_force_inline constexpr void srl_carry_chain(T *r, T const *lhs, std::size_t rhs, std::size_t n) noexcept
340{
341 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
342
343 hilet digit_count = rhs / bits_per_digit;
344 hilet bit_count = rhs % bits_per_digit;
345
346 if (r != lhs or digit_count > 0) {
347 std::size_t i = 0;
348 for (; i != (n - digit_count); ++i) {
349 r[i] = lhs[i + digit_count];
350 }
351 for (; i != n; ++i) {
352 r[i] = T{0};
353 }
354 }
355
356 if (bit_count > 0) {
357 auto carry = T{0};
358
359 for (ssize_t i = static_cast<ssize_t>(n) - digit_count - 1; i >= 0; --i) {
360 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
361 }
362 }
363}
364
373template<std::unsigned_integral T>
374hi_force_inline constexpr void sra_carry_chain(T *r, T const *lhs, std::size_t rhs, std::size_t n) noexcept
375{
376 using S = std::make_signed_t<T>;
377 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
378
379 hilet digit_count = rhs / bits_per_digit;
380 hilet bit_count = rhs % bits_per_digit;
381
382 if (r != lhs or digit_count > 0) {
383 hi_axiom(digit_count < n);
384
385 std::size_t i = 0;
386 for (; i != (n - digit_count); ++i) {
387 r[i] = lhs[i + digit_count];
388 }
389
390 // Sign extent the digits that are unused after a large shift.
391 hilet sign = lhs[n - 1] < 0 ? S{-1} : S{0};
392 for (; i != n; ++i) {
393 r[i] = sign;
394 }
395 }
396
397 if (bit_count > 0) {
398 hi_axiom(n > 0);
399 auto carry = T{};
400
401 // The most significant digit is sign extended.
402 ssize_t i = static_cast<ssize_t>(n) - digit_count - 1;
403 std::tie(r[i], carry) = sra_carry(r[i], bit_count);
404 --i;
405
406 // The rest of the digits pass through the carry.
407 for (; i >= 0; --i) {
408 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
409 }
410 }
411}
412
420template<std::unsigned_integral T>
421hi_force_inline constexpr void and_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
422{
423 for (std::size_t i = 0; i != n; ++i) {
424 r[i] = lhs[i] & rhs[i];
425 }
426}
427
435template<std::unsigned_integral T>
436hi_force_inline constexpr void or_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
437{
438 for (std::size_t i = 0; i != n; ++i) {
439 r[i] = lhs[i] | rhs[i];
440 }
441}
442
450template<std::unsigned_integral T>
451hi_force_inline constexpr void xor_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
452{
453 for (std::size_t i = 0; i != n; ++i) {
454 r[i] = lhs[i] ^ rhs[i];
455 }
456}
457
458template<std::unsigned_integral T>
459[[nodiscard]] hi_force_inline constexpr bool eq_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
460{
461 for (std::size_t i = 0; i != n; ++i) {
462 if (lhs[i] != rhs[i]) {
463 return false;
464 }
465 }
466 return true;
467}
468
469template<std::unsigned_integral T>
470[[nodiscard]] hi_force_inline constexpr bool ne_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
471{
472 return not eq_carry_chain(lhs, rhs, n);
473}
474
475template<std::unsigned_integral T>
476[[nodiscard]] hi_force_inline constexpr std::strong_ordering
477cmp_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
478{
479 for (ssize_t i = static_cast<ssize_t>(n) - 1; i >= 0; --i) {
480 hilet r = lhs[i] <=> rhs[i];
481 if (r != std::strong_ordering::equal) {
482 return r;
483 }
484 }
485 return std::strong_ordering::equal;
486}
487
488template<std::unsigned_integral T>
489[[nodiscard]] hi_force_inline constexpr std::strong_ordering cmp_signed_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
490{
491 using S = std::make_signed_t<T>;
492
493 // Compare the ms-digit using signed comparison, because it includes the sign-bit
494 if (n > 0) {
495 hilet r = static_cast<S>(lhs[n - 1]) <=> static_cast<S>(rhs[n - 1]);
496 if (r != std::strong_ordering::equal) {
497 return r;
498 }
499 }
500
501 // At this point both values have the same sign, and since the rest of the digits
502 // do not have a sign bit, use unsigned comparison.
503 for (ssize_t i = static_cast<ssize_t>(n) - 2; i >= 0; --i) {
504 hilet r = lhs[i] <=> rhs[i];
505 if (r != std::strong_ordering::equal) {
506 return r;
507 }
508 }
509 return std::strong_ordering::equal;
510}
511
512template<std::unsigned_integral T>
513[[nodiscard]] hi_force_inline constexpr bool lt_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
514{
515 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::less;
516}
517
518template<std::unsigned_integral T>
519[[nodiscard]] hi_force_inline constexpr bool gt_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
520{
521 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::greater;
522}
523
524template<std::unsigned_integral T>
525[[nodiscard]] hi_force_inline constexpr bool ge_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
526{
527 return lt_unsigned_carry_chain(rhs, lhs, n);
528}
529
530template<std::unsigned_integral T>
531[[nodiscard]] hi_force_inline constexpr bool le_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
532{
533 return gt_unsigned_carry_chain(rhs, lhs, n);
534}
535
543template<std::unsigned_integral T>
544hi_force_inline constexpr void neg_carry_chain(T *r, T const *rhs, std::size_t n) noexcept
545{
546 auto carry = T{1};
547 for (std::size_t i = 0; i != n; ++i) {
548 std::tie(r[i], carry) = add_carry(~rhs[i], T{0}, carry);
549 }
550}
551
559template<std::unsigned_integral T>
560hi_force_inline constexpr void add_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
561{
562 auto carry = T{0};
563 for (std::size_t i = 0; i != n; ++i) {
564 std::tie(r[i], carry) = add_carry(lhs[i], rhs[i], carry);
565 }
566}
567
575template<std::unsigned_integral T>
576hi_force_inline constexpr void sub_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
577{
578 auto carry = T{1};
579 for (std::size_t i = 0; i != n; ++i) {
580 std::tie(r[i], carry) = add_carry(lhs[i], ~rhs[i], carry);
581 }
582}
583
592template<std::unsigned_integral T>
593hi_force_inline constexpr void mul_carry_chain(T *hi_restrict r, T const *lhs, T const *rhs, std::size_t n) noexcept
594{
595 hi_axiom(r != lhs and r != rhs);
596
597 for (auto rhs_index = 0; rhs_index < n; rhs_index++) {
598 hilet rhs_digit = rhs[rhs_index];
599
600 T carry = 0;
601 for (auto lhs_index = 0; (lhs_index + rhs_index) < n; lhs_index++) {
602 hilet lhs_digit = lhs[lhs_index];
603
604 T result;
605 T accumulator = r[rhs_index + lhs_index];
606 std::tie(result, carry) = mul_carry(lhs_digit, rhs_digit, carry, accumulator);
607 r[rhs_index + lhs_index] = result;
608 }
609 }
610}
611
622template<std::unsigned_integral T>
623constexpr void div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T const *lhs, T const *rhs, std::size_t n) noexcept
624{
625 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
626 hi_axiom(remainder != lhs and remainder != rhs);
627
628 hilet nr_bits = static_cast<ssize_t>(n * sizeof(T) * CHAR_BIT);
629
630 for (ssize_t i = nr_bits - 1; i >= 0; i--) {
631 sll_carry_chain(remainder, remainder, 1, n);
632 remainder[0] |= get_bit(lhs, i);
633 if (ge_unsigned_carry_chain(remainder, rhs, n)) {
634 sub_carry_chain(remainder, remainder, rhs, n);
635 set_bit(quotient, i);
636 }
637 }
638}
639
651template<std::unsigned_integral T>
652constexpr void
653signed_div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T const *lhs, T const *rhs, std::size_t n) noexcept
654{
655 hi_axiom(n > 0);
656 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
657 hi_axiom(remainder != lhs and remainder != rhs);
658
659 using signed_type = std::make_signed_t<T>;
660
661 hilet lhs_is_negative = static_cast<signed_type>(lhs[n - 1]) < 0;
662 hilet rhs_is_negative = static_cast<signed_type>(rhs[n - 1]) < 0;
663
664 auto tmp = std::vector<T>{};
665 if (lhs_is_negative or rhs_is_negative) {
666 // Allocate room for negative lhs and rhs together, so only one allocation is needed.
667 tmp.resize(n * 2);
668
669 if (lhs_is_negative) {
670 // Negate lhs and point it to the tmp array.
671 T *p = tmp.data();
672 neg_carry_chain(p, lhs, n);
673 lhs = p;
674 }
675
676 if (rhs_is_negative) {
677 // Negate rhs and point it to the tmp array.
678 T *p = tmp.data() * n;
679 neg_carry_chain(p, rhs, n);
680 rhs = p;
681 }
682 }
683
684 div_carry_chain(quotient, remainder, lhs, rhs, n);
685
686 if (lhs_is_negative != rhs_is_negative) {
687 // Negate the result if the sign of lhs and rhs are different.
688 neg_carry_chain(quotient, quotient, n);
689 }
690 if (lhs_is_negative) {
691 // Remainder has same sign as divisor.
692 neg_carry_chain(remainder, remainder, n);
693 }
694}
695
696} // namespace hi
697
698hi_warning_pop();
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
geometry/margins.hpp
Definition cache.hpp:11
T resize(T... args)
T tie(T... args)