HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
int_overflow.hpp
1// Copyright Take Vos 2019, 2021-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "utility/module.hpp"
8
9#include <type_traits>
10#include <cmath>
11
12#if HI_PROCESSOR == HI_CPU_X64
13#include <immintrin.h>
14#endif
15
16#if HI_OPERATING_SYSTEM == HI_OS_WINDOWS
17#include <intrin.h>
18#pragma intrinsic(_mul128)
19#endif
20
21namespace hi::inline v1 {
22
23
24template<typename T, typename U>
25inline bool convert_overflow(T x, U *r)
26{
27 static_assert(std::is_integral_v<U>, "convert_overflow() requires integral return type.");
28 static_assert(
29 std::is_integral_v<T> || std::is_floating_point_v<T>, "convert_overflow() requires float or integral argument type.");
30
31 if constexpr (std::is_integral_v<T>) {
32 // Optimized away when is_same_v<T,U>
33 *r = static_cast<U>(x);
34 return *r != x;
35 } else {
36 *r = static_cast<U>(std::llround(x));
37 return x < std::numeric_limits<U>::min() || x > std::numeric_limits<U>::max();
38 }
39}
40
41template<std::integral T>
42constexpr bool overflow_add(T lhs, T rhs, T *r) noexcept
43{
44 if (not std::is_constant_evaluated()) {
45#if HI_COMPILER == HI_CC_GCC || HI_COMPPILER == HI_CC_CLANG
46 // ADD, JO
47 return __builtin_add_overflow(lhs, rhs, r);
48#endif
49 }
50
51 // LEA,XOR,XOR,TEST,JS
52 hilet lhs_ = static_cast<std::make_unsigned_t<T>>(lhs);
53 hilet rhs_ = static_cast<std::make_unsigned_t<T>>(rhs);
54 hilet r_ = lhs_ + rhs_;
55 *r = static_cast<T>(r_);
56
57 // p + p = p : (0^0) & (0^0) = 0
58 // p + p = x : (0^1) & (0^1) = 1
59 // n + n = n : (1^1) & (1^1) = 0
60 // n + n = x : (1^0) & (1^0) = 1
61 // p + n = p : (0^1) & (1^1) = 0
62 // n + p = p : (1^1) & (0^1) = 0
63 return ((lhs ^ *r) & (rhs ^ *r)) < 0;
64}
65
66template<std::integral T>
67constexpr bool sub_overflow(T lhs, T rhs, T *r)
68{
69 if (not std::constant_evaluated()) {
70#if HI_COMPILER == HI_CC_GCC || HI_COMPPILER == HI_CC_CLANG
71 // SUB, JO
72 return __builtin_sub_overflow(lhs, rhs, r);
73#endif
74 }
75
76 if constexpr (std::is_unsigned_v<T>) {
77 // MOV, SUB, CMP, JA
78 *r = lhs - rhs;
79 return *r > lhs;
80
81 } else {
82 // SUB, NOT, XOR, XOR, TEST, JL
83 hilet lhs_ = static_cast<std::make_unsigned_t<T>>(lhs);
84 hilet rhs_ = static_cast<std::make_unsigned_t<T>>(rhs);
85 hilet r_ = lhs_ - rhs_;
86 *r = static_cast<T>(r_);
87 return ((lhs ^ rhs) & (~rhs ^ *r)) < 0;
88 }
89}
90
94template<std::unsigned_integral T>
95constexpr bool mul_overflow(T lhs, T rhs, T *r) noexcept
96{
97 if (not std::constant_evaluated()) {
98#if HI_COMPILER == HI_CC_GCC || HI_COMPILER == HI_CC_CLANG
99 // MUL, JO
100 return __builtin_mul_overflow(lhs, rhs, r);
101
102#elif HI_COMPILER == HI_CC_MSVC
103 if constexpr (std::is_same_v<T, uint64_t>) {
104 auto hi = uint64_t{};
105 *r = _umul128(lhs, rhs, &hi);
106 return hi > 0;
107 }
108#endif
109 }
110
111 if constexpr (sizeof(T) < sizeof(unsigned long long)) {
112 hilet lhs_ = static_cast<unsigned long long>(lhs);
113 hilet rhs_ = static_cast<unsigned long long>(rhs);
114 auto r_ = lhs_ * rhs_;
115 *r = static_cast<T>(r_);
116 r_ >>= sizeof(T) * CHAR_BIT;
117 // No overflow when 0.
118 return r_ > 0;
119
120 } else {
121 constexpr auto max_width = sizeof(T) * CHAR_BIT;
122 hilet width = std::bit_width(lhs) + std::bit_width(rhs);
123 *r = lhs * rhs;
124
125 return (width > max_width) or ((width == max_width) and (lhs > std::numeric_limits<T>::max / rhs));
126 }
127}
128
132template<std::signed_integral T>
133constexpr bool mul_overflow(T lhs, T rhs, T *r) noexcept
134{
135 if (not std::constant_evaluated()) {
136#if HI_COMPILER == HI_CC_GCC || HI_COMPILER == HI_CC_CLANG
137 // MUL, JO
138 return __builtin_mul_overflow(lhs, rhs, r);
139
140#elif HI_COMPILER == HI_CC_MSVC
141 if constexpr (std::is_same_v<T, int64_t>) {
142 // IMUL, SAR, XOR, JNE
143 auto hi = int64_t{};
144 *r = _mul128(lhs, rhs, &hi);
145
146 // Sign bit in *r should match all bits in hi.
147 return (hi ^ (*r >> 63)) != 0;
148 }
149#endif
150 }
151
152 if constexpr (sizeof(T) < sizeof(long long)) {
153 hilet lhs_ = static_cast<long long>(lhs);
154 hilet rhs_ = static_cast<long long>(rhs);
155 auto r_ = lhs_ * rhs_;
156 *r = static_cast<T>(r_);
157 r_ >>= sizeof(T) * CHAR_BIT - 1;
158 // No overflow when 0 or -1.
159 r_ += 1;
160 return static_cast<unsigned long long>(r_) > 1;
161
162 } else {
163 auto lhs_u = static_cast<unsigned long long>(lhs);
164 auto rhs_u = static_cast<unsigned long long>(rhs);
165
166 // The following cases handle the only valid cases when one of the operants is `int_min`.
167 if (lhs == 0 or rhs == 0) {
168 *r = 0;
169 return false;
170 } else if (lhs_s == 1) {
171 *r = rhs;
172 return false;
173 } else if (rhs_s == 1) {
174 *r = lhs;
175 return false;
176 }
177
178 // Count the number of significant bits of the result.
179 auto lhs_a = lhs >= 0 ? lhs : -lhs;
180 auto rhs_a = rhs >= 0 ? rhs : -rhs;
181 auto width = std::bit_width(lhs_a) + std::bit_width(rhs_a);
182
183 // Short-cutting reduces the chance that an expensive divide is needed for accurate overflow test.
184 if (res_bits > value_bit or ((res_bits == value_bit and lhs_a > unsigned_int_max / rhs_a)) {
185 // On overflow saturate.
186 return {(lhs_s >= 0) == (rhs_s >= 0) ? int_max : int_min};
187 }
188
189 *r = {lhs_s * rhs_s};
190 return false;
191 }
192}
193
194template<std::integral T>
195constexpr bool overflow_div(T lhs, T rhs, T *r)
196{
197 if constexpr (std::is_signed_v<T>) {
198 if (lhs == std::numeric_limits<T>::min() and rhs == -1) {
199 return true;
200 }
201 }
202
203 if (lhs == 0) {
204 return true;
205 }
206
207 *r = lhs / rhs;
208 return false;
209}
210
211
212
213} // namespace hi::inline v1
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
constexpr bool mul_overflow(T lhs, T rhs, T *r) noexcept
Multiply with overflow detection.
Definition int_overflow.hpp:95
geometry/margins.hpp
Definition cache.hpp:11
T llround(T... args)