HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
int_carry.hpp
1// Copyright Take Vos 2019, 2021-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../utility/utility.hpp"
8#include "../macros.hpp"
9#include <complex>
10#include <cmath>
11#include <limits>
12#include <span>
13#include <tuple>
14#include <concepts>
15
16
17
18#if HI_COMPILER == HI_CC_MSVC
19#include <intrin.h>
20#endif
21#if HI_PROCESSOR == HI_CPU_X64
22#include <immintrin.h>
23#endif
24
25hi_warning_push();
26// C4702 unreachable code: Suppressed due intrinsics and std::is_constant_evaluated()
27hi_warning_ignore_msvc(4702);
28// C26472: Don't use static_cast for arithmetic conversions... (type.1)
29// int_carry does many static_casts on purpose.
30hi_warning_ignore_msvc(26472)
31
32namespace hi {
33
41template<std::unsigned_integral T>
42[[nodiscard]] hi_force_inline constexpr T get_bit(T const *lhs, std::size_t index) noexcept
43{
44 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
45
46 hilet digit_count = index / bits_per_digit;
47 hilet bit_count = index % bits_per_digit;
48
49 return (lhs[digit_count] >> bit_count) & 1;
50}
51
59template<std::unsigned_integral T>
60constexpr void set_bit(T *r, std::size_t index, T value = T{1}) noexcept
61{
62 hi_axiom(value <= 1);
63
64 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
65
66 hilet digit_count = index / bits_per_digit;
67 hilet bit_count = index % bits_per_digit;
68
69 value <<= bit_count;
70 hilet mask = ~(T{1} << bit_count);
71 r[digit_count] = (r[digit_count] & mask) | value;
72}
73
80template<std::unsigned_integral T>
81hi_force_inline constexpr std::pair<T, T> sll_carry(T lhs, std::size_t rhs, T carry = T{0}) noexcept
82{
83 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
84 hilet reverse_count = num_bits - rhs;
85
86 return {(lhs << rhs) | carry, lhs >> reverse_count};
87}
88
95template<std::unsigned_integral T>
96hi_force_inline constexpr std::pair<T, T> srl_carry(T lhs, std::size_t rhs, T carry = T{0}) noexcept
97{
98 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
99 hilet reverse_count = num_bits - rhs;
100
101 return {(lhs >> rhs) | carry, lhs << reverse_count};
102}
103
109template<std::unsigned_integral T>
110hi_force_inline constexpr std::pair<T, T> sra_carry(T lhs, std::size_t rhs) noexcept
111{
112 using S = std::make_signed_t<T>;
113
114 constexpr auto num_bits = sizeof(T) * CHAR_BIT;
115 hilet reverse_count = num_bits - rhs;
116
117 return {(static_cast<S>(lhs) >> rhs), lhs << reverse_count};
118}
119
126template<std::unsigned_integral T>
127hi_force_inline constexpr std::pair<T, T> add_carry(T lhs, T rhs, T carry = T{0}) noexcept
128{
129 hi_axiom(carry <= 1);
130
131 constexpr std::size_t num_bits = sizeof(T) * CHAR_BIT;
132
133 if constexpr (has_native_uintxx_v<num_bits * 2>) {
134 // We can use a native type that has double the size.
135 using U = make_uintxx_t<num_bits * 2>;
136
137 hilet r = static_cast<U>(lhs) + static_cast<U>(rhs) + static_cast<U>(carry);
138 return {static_cast<T>(r), static_cast<T>(r >> num_bits)};
139
140 } else if (not std::is_constant_evaluated()) {
141#if HI_COMPILER == HI_CC_MSVC
142 uint64_t r;
143 hilet c = _addcarry_u64(static_cast<unsigned char>(carry), lhs, rhs, &r);
144 return {r, static_cast<T>(c)};
145#endif
146 }
147
148 // Carry can directly be added the sum without a double overflow.
149 hilet r = static_cast<T>(lhs + rhs + carry);
150 hilet c = static_cast<T>(r < lhs or r < rhs);
151 return {r, c};
152}
153
165template<std::unsigned_integral T>
166hi_force_inline constexpr std::pair<T, T> mul_carry(T lhs, T rhs, T carry = T{0}, T accumulator = T{0}) noexcept
167{
168 constexpr std::size_t num_bits = sizeof(T) * CHAR_BIT;
169
170 if constexpr (has_native_uintxx_v<num_bits * 2>) {
171 // We can use a native type that has double the size.
172 using U = make_uintxx_t<num_bits * 2>;
173
174 hilet r = static_cast<U>(lhs) * static_cast<U>(rhs) + static_cast<U>(carry) + static_cast<U>(accumulator);
175 return {static_cast<T>(r), static_cast<T>(r >> num_bits)};
176
177 } else if (not std::is_constant_evaluated()) {
178#if HI_COMPILER == HI_CC_MSVC
179 if constexpr (sizeof(T) == 8) {
180 uint64_t hi = 0;
181 uint64_t lo = _umul128(lhs, rhs, &hi);
182 uint64_t c = 0;
183 std::tie(lo, c) = add_carry(lo, carry, uint64_t{0});
184 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
185 std::tie(lo, c) = add_carry(lo, accumulator, uint64_t{0});
186 std::tie(hi, c) = add_carry(hi, uint64_t{0}, c);
187 return {lo, hi};
188 }
189#endif
190 }
191
192 constexpr std::size_t num_half_bits = num_bits / 2;
193 constexpr T half_mask = (T{1} << num_half_bits) - T{1};
194
195 hilet A = lhs >> num_half_bits;
196 hilet B = lhs & half_mask;
197 hilet C = rhs >> num_half_bits;
198 hilet D = rhs & half_mask;
199 hilet AC = A * C;
200 hilet AD = A * D;
201 hilet BC = B * C;
202 hilet BD = B * D;
203
204 // Provisional result.
205 auto hi = AC;
206 auto lo = BD;
207 auto c = T{0};
208
209 // AD and BC are shifted half way across the lo and hi of the result.
210 hilet AD_lo = AD << num_half_bits;
211 hilet AD_hi = AD >> num_half_bits;
212 hilet BC_lo = BC << num_half_bits;
213 hilet BC_hi = BC >> num_half_bits;
214
215 std::tie(lo, c) = add_carry(lo, AD_lo, T{0});
216 std::tie(hi, c) = add_carry(hi, AD_hi, c);
217 std::tie(lo, c) = add_carry(lo, BC_lo, T{0});
218 std::tie(hi, c) = add_carry(hi, BC_hi, c);
219
220 // Now add the carry and accumulator arguments.
221 std::tie(lo, c) = add_carry(lo, carry, T{0});
222 std::tie(hi, c) = add_carry(hi, T{0}, c);
223 std::tie(lo, c) = add_carry(lo, accumulator, T{0});
224 std::tie(hi, c) = add_carry(hi, T{0}, c);
225 return {lo, hi};
226}
227
237template<std::unsigned_integral T>
238hi_force_inline constexpr T wide_div(T lhs_lo, T lhs_hi, T rhs) noexcept
239{
240 if constexpr (sizeof(T) == 1) {
241 hilet lhs = static_cast<uint16_t>(lhs_hi) << 8 | static_cast<uint16_t>(lhs_lo);
242 return narrow_cast<uint8_t>(lhs / rhs);
243
244 } else if constexpr (sizeof(T) == 2) {
245 hilet lhs = static_cast<uint32_t>(lhs_hi) << 16 | static_cast<uint32_t>(lhs_lo);
246 return narrow_cast<uint16_t>(lhs / rhs);
247
248 } else if constexpr (sizeof(T) == 4) {
249 hilet lhs = static_cast<uint64_t>(lhs_hi) << 32 | static_cast<uint64_t>(lhs_lo);
250 return narrow_cast<uint32_t>(lhs / rhs);
251
252 } else if constexpr (sizeof(T) == 8) {
253#if HI_COMPILER == HI_CC_MSVC
254 uint64_t remainder;
255 return _udiv128(lhs_hi, lhs_lo, rhs, &remainder);
256
257#elif HI_COMPILER == HI_CC_CLANG || HI_COMPILER == HI_CC_GCC
258 hilet lhs = static_cast<__uint128_t>(lhs_hi) << 64 | static_cast<__uint128_t>(lhs_lo);
259 return narrow_cast<uint64_t>(lhs / rhs);
260#else
261#error "Not implemented"
262#endif
263 }
264}
265
272template<std::unsigned_integral T>
273[[nodiscard]] hi_force_inline constexpr ssize_t bsr_carry_chain(T const *lhs, std::size_t n) noexcept
274{
275 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
276
277 for (ssize_t i = static_cast<ssize_t>(n) - 1; i >= 0; i--) {
278 auto tmp = std::countl_zero(lhs[i]);
279 if (tmp < bits_per_digit) {
280 return i * bits_per_digit + bits_per_digit - tmp - 1;
281 }
282 }
283 return -1;
284}
285
293template<std::unsigned_integral T>
294hi_force_inline constexpr void invert_carry_chain(T *r, T const *rhs, std::size_t n) noexcept
295{
296 for (std::size_t i = 0; i != n; ++i) {
297 r[i] = ~rhs[i];
298 }
299}
300
308template<std::unsigned_integral T>
309hi_force_inline constexpr void sll_carry_chain(T *r, T const *lhs, std::size_t rhs, std::size_t n) noexcept
310{
311 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
312
313 hilet digit_count = static_cast<ssize_t>(rhs / bits_per_digit);
314 hilet bit_count = rhs % bits_per_digit;
315
316 if (r != lhs or digit_count > 0) {
317 ssize_t i;
318 for (i = static_cast<ssize_t>(n) - 1; i >= digit_count; --i) {
319 r[i] = lhs[i - digit_count];
320 }
321 for (; i >= 0; --i) {
322 r[i] = T{0};
323 }
324 }
325
326 if (bit_count > 0) {
327 auto carry = T{0};
328 for (std::size_t i = 0; i != n; ++i) {
329 std::tie(r[i], carry) = sll_carry(r[i], bit_count, carry);
330 }
331 }
332}
333
341template<std::unsigned_integral T>
342hi_force_inline constexpr void srl_carry_chain(T *r, T const *lhs, std::size_t rhs, std::size_t n) noexcept
343{
344 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
345
346 hilet digit_count = rhs / bits_per_digit;
347 hilet bit_count = rhs % bits_per_digit;
348
349 if (r != lhs or digit_count > 0) {
350 std::size_t i = 0;
351 for (; i != (n - digit_count); ++i) {
352 r[i] = lhs[i + digit_count];
353 }
354 for (; i != n; ++i) {
355 r[i] = T{0};
356 }
357 }
358
359 if (bit_count > 0) {
360 auto carry = T{0};
361
362 for (ssize_t i = static_cast<ssize_t>(n) - digit_count - 1; i >= 0; --i) {
363 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
364 }
365 }
366}
367
376template<std::unsigned_integral T>
377hi_force_inline constexpr void sra_carry_chain(T *r, T const *lhs, std::size_t rhs, std::size_t n) noexcept
378{
379 using S = std::make_signed_t<T>;
380 constexpr std::size_t bits_per_digit = sizeof(T) * CHAR_BIT;
381
382 hilet digit_count = rhs / bits_per_digit;
383 hilet bit_count = rhs % bits_per_digit;
384
385 if (r != lhs or digit_count > 0) {
386 hi_axiom(digit_count < n);
387
388 std::size_t i = 0;
389 for (; i != (n - digit_count); ++i) {
390 r[i] = lhs[i + digit_count];
391 }
392
393 // Sign extent the digits that are unused after a large shift.
394 hilet sign = lhs[n - 1] < 0 ? S{-1} : S{0};
395 for (; i != n; ++i) {
396 r[i] = sign;
397 }
398 }
399
400 if (bit_count > 0) {
401 hi_axiom(n > 0);
402 auto carry = T{};
403
404 // The most significant digit is sign extended.
405 ssize_t i = static_cast<ssize_t>(n) - digit_count - 1;
406 std::tie(r[i], carry) = sra_carry(r[i], bit_count);
407 --i;
408
409 // The rest of the digits pass through the carry.
410 for (; i >= 0; --i) {
411 std::tie(r[i], carry) = srl_carry(r[i], bit_count, carry);
412 }
413 }
414}
415
423template<std::unsigned_integral T>
424hi_force_inline constexpr void and_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
425{
426 for (std::size_t i = 0; i != n; ++i) {
427 r[i] = lhs[i] & rhs[i];
428 }
429}
430
438template<std::unsigned_integral T>
439hi_force_inline constexpr void or_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
440{
441 for (std::size_t i = 0; i != n; ++i) {
442 r[i] = lhs[i] | rhs[i];
443 }
444}
445
453template<std::unsigned_integral T>
454hi_force_inline constexpr void xor_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
455{
456 for (std::size_t i = 0; i != n; ++i) {
457 r[i] = lhs[i] ^ rhs[i];
458 }
459}
460
461template<std::unsigned_integral T>
462[[nodiscard]] hi_force_inline constexpr bool eq_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
463{
464 for (std::size_t i = 0; i != n; ++i) {
465 if (lhs[i] != rhs[i]) {
466 return false;
467 }
468 }
469 return true;
470}
471
472template<std::unsigned_integral T>
473[[nodiscard]] hi_force_inline constexpr bool ne_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
474{
475 return not eq_carry_chain(lhs, rhs, n);
476}
477
478template<std::unsigned_integral T>
479[[nodiscard]] hi_force_inline constexpr std::strong_ordering
480cmp_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
481{
482 for (ssize_t i = static_cast<ssize_t>(n) - 1; i >= 0; --i) {
483 hilet r = lhs[i] <=> rhs[i];
484 if (r != std::strong_ordering::equal) {
485 return r;
486 }
487 }
488 return std::strong_ordering::equal;
489}
490
491template<std::unsigned_integral T>
492[[nodiscard]] hi_force_inline constexpr std::strong_ordering cmp_signed_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
493{
494 using S = std::make_signed_t<T>;
495
496 // Compare the ms-digit using signed comparison, because it includes the sign-bit
497 if (n > 0) {
498 hilet r = static_cast<S>(lhs[n - 1]) <=> static_cast<S>(rhs[n - 1]);
499 if (r != std::strong_ordering::equal) {
500 return r;
501 }
502 }
503
504 // At this point both values have the same sign, and since the rest of the digits
505 // do not have a sign bit, use unsigned comparison.
506 for (ssize_t i = static_cast<ssize_t>(n) - 2; i >= 0; --i) {
507 hilet r = lhs[i] <=> rhs[i];
508 if (r != std::strong_ordering::equal) {
509 return r;
510 }
511 }
512 return std::strong_ordering::equal;
513}
514
515template<std::unsigned_integral T>
516[[nodiscard]] hi_force_inline constexpr bool lt_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
517{
518 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::less;
519}
520
521template<std::unsigned_integral T>
522[[nodiscard]] hi_force_inline constexpr bool gt_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
523{
524 return cmp_unsigned_carry_chain(lhs, rhs, n) == std::strong_ordering::greater;
525}
526
527template<std::unsigned_integral T>
528[[nodiscard]] hi_force_inline constexpr bool ge_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
529{
530 return lt_unsigned_carry_chain(rhs, lhs, n);
531}
532
533template<std::unsigned_integral T>
534[[nodiscard]] hi_force_inline constexpr bool le_unsigned_carry_chain(T const *lhs, T const *rhs, std::size_t n) noexcept
535{
536 return gt_unsigned_carry_chain(rhs, lhs, n);
537}
538
546template<std::unsigned_integral T>
547hi_force_inline constexpr void neg_carry_chain(T *r, T const *rhs, std::size_t n) noexcept
548{
549 auto carry = T{1};
550 for (std::size_t i = 0; i != n; ++i) {
551 std::tie(r[i], carry) = add_carry(~rhs[i], T{0}, carry);
552 }
553}
554
562template<std::unsigned_integral T>
563hi_force_inline constexpr void add_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
564{
565 auto carry = T{0};
566 for (std::size_t i = 0; i != n; ++i) {
567 std::tie(r[i], carry) = add_carry(lhs[i], rhs[i], carry);
568 }
569}
570
578template<std::unsigned_integral T>
579hi_force_inline constexpr void sub_carry_chain(T *r, T const *lhs, T const *rhs, std::size_t n) noexcept
580{
581 auto carry = T{1};
582 for (std::size_t i = 0; i != n; ++i) {
583 std::tie(r[i], carry) = add_carry(lhs[i], ~rhs[i], carry);
584 }
585}
586
595template<std::unsigned_integral T>
596hi_force_inline constexpr void mul_carry_chain(T *hi_restrict r, T const *lhs, T const *rhs, std::size_t n) noexcept
597{
598 hi_axiom(r != lhs and r != rhs);
599
600 for (auto rhs_index = 0; rhs_index < n; rhs_index++) {
601 hilet rhs_digit = rhs[rhs_index];
602
603 T carry = 0;
604 for (auto lhs_index = 0; (lhs_index + rhs_index) < n; lhs_index++) {
605 hilet lhs_digit = lhs[lhs_index];
606
607 T result;
608 T accumulator = r[rhs_index + lhs_index];
609 std::tie(result, carry) = mul_carry(lhs_digit, rhs_digit, carry, accumulator);
610 r[rhs_index + lhs_index] = result;
611 }
612 }
613}
614
625template<std::unsigned_integral T>
626constexpr void div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T const *lhs, T const *rhs, std::size_t n) noexcept
627{
628 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
629 hi_axiom(remainder != lhs and remainder != rhs);
630
631 hilet nr_bits = static_cast<ssize_t>(n * sizeof(T) * CHAR_BIT);
632
633 for (ssize_t i = nr_bits - 1; i >= 0; i--) {
634 sll_carry_chain(remainder, remainder, 1, n);
635 remainder[0] |= get_bit(lhs, i);
636 if (ge_unsigned_carry_chain(remainder, rhs, n)) {
637 sub_carry_chain(remainder, remainder, rhs, n);
638 set_bit(quotient, i);
639 }
640 }
641}
642
654template<std::unsigned_integral T>
655constexpr void
656signed_div_carry_chain(T *hi_restrict quotient, T *hi_restrict remainder, T const *lhs, T const *rhs, std::size_t n) noexcept
657{
658 hi_axiom(n > 0);
659 hi_axiom(quotient != lhs and quotient != rhs and quotient != remainder);
660 hi_axiom(remainder != lhs and remainder != rhs);
661
662 using signed_type = std::make_signed_t<T>;
663
664 hilet lhs_is_negative = static_cast<signed_type>(lhs[n - 1]) < 0;
665 hilet rhs_is_negative = static_cast<signed_type>(rhs[n - 1]) < 0;
666
667 auto tmp = std::vector<T>{};
668 if (lhs_is_negative or rhs_is_negative) {
669 // Allocate room for negative lhs and rhs together, so only one allocation is needed.
670 tmp.resize(n * 2);
671
672 if (lhs_is_negative) {
673 // Negate lhs and point it to the tmp array.
674 T *p = tmp.data();
675 neg_carry_chain(p, lhs, n);
676 lhs = p;
677 }
678
679 if (rhs_is_negative) {
680 // Negate rhs and point it to the tmp array.
681 T *p = tmp.data() * n;
682 neg_carry_chain(p, rhs, n);
683 rhs = p;
684 }
685 }
686
687 div_carry_chain(quotient, remainder, lhs, rhs, n);
688
689 if (lhs_is_negative != rhs_is_negative) {
690 // Negate the result if the sign of lhs and rhs are different.
691 neg_carry_chain(quotient, quotient, n);
692 }
693 if (lhs_is_negative) {
694 // Remainder has same sign as divisor.
695 neg_carry_chain(remainder, remainder, n);
696 }
697}
698
699} // namespace hi
700
701hi_warning_pop();
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
T resize(T... args)
T tie(T... args)