HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
ivec.hpp
1// Copyright Take Vos 2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "required.hpp"
8#include "numeric_array.hpp"
9#include <fmt/format.h>
10#include <xmmintrin.h>
11#include <immintrin.h>
12#include <smmintrin.h>
13#include <emmintrin.h>
14#include <cstdint>
15#include <stdexcept>
16#include <array>
17#include <type_traits>
18#include <ostream>
19
20namespace tt {
21
38class i32x4 {
39 /* Intrinsic value of the f32x4.
40 * The elements in __m128i are assigned as follows.
41 * - [127:96] w
42 * - [95:64] z
43 * - [63:32] y
44 * - [31:0] x
45 */
46 __m128i v;
47
48public:
49 /* Create a zeroed out f32x4.
50 */
51 i32x4() noexcept : i32x4(_mm_setzero_si128()) {}
52 i32x4(i32x4 const &rhs) = default;
53 i32x4 &operator=(i32x4 const &rhs) = default;
54 i32x4(i32x4 &&rhs) = default;
55 i32x4 &operator=(i32x4 &&rhs) = default;
56
59 i32x4(__m128i rhs) noexcept :
60 v(rhs) {}
61
64 i32x4 &operator=(__m128i rhs) noexcept {
65 v = rhs;
66 return *this;
67 }
68
71 operator __m128i () const noexcept {
72 return v;
73 }
74
75 i32x4(f32x4 const &rhs) noexcept :
76 i32x4(_mm_cvtps_epi32(rhs)) {}
77
78 i32x4 &operator=(f32x4 const &rhs) noexcept {
79 return *this = _mm_cvtps_epi32(rhs);
80 }
81
82 operator f32x4 () const noexcept {
83 return _mm_cvtepi32_ps(*this);
84 }
85
86 explicit operator std::array<int32_t,4> () const noexcept {
88 _mm_storeu_si128(reinterpret_cast<__m128i*>(r.data()), *this);
89 return r;
90 }
91
96 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
97 explicit i32x4(T rhs) noexcept:
98 i32x4(_mm_set1_epi32(narrow_cast<int32_t>(rhs))) {}
99
104 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
105 i32x4 &operator=(T rhs) noexcept {
106 return *this = _mm_set1_epi32(narrow_cast<int32_t>(rhs));
107 }
108
114 template<typename T, typename U, typename V=int, typename W=int,
115 std::enable_if_t<std::is_arithmetic_v<T> && std::is_arithmetic_v<U> && std::is_arithmetic_v<V> && std::is_arithmetic_v<W>,int> = 0>
116 i32x4(T x, U y, V z=0, W w=0) noexcept :
117 i32x4(_mm_set_epi32(
118 narrow_cast<int32_t>(w),
119 narrow_cast<int32_t>(z),
120 narrow_cast<int32_t>(y),
121 narrow_cast<int32_t>(x)
122 )) {}
123
129 template<typename T, typename U, typename V=int, typename W=int,
130 std::enable_if_t<std::is_arithmetic_v<T> && std::is_arithmetic_v<U> && std::is_arithmetic_v<V> && std::is_arithmetic_v<W>,int> = 0>
131 [[nodiscard]] static i32x4 point(T x, U y, V z=0, W w=1) noexcept {
132 return i32x4(x, y, z, w);
133 }
134
135 template<size_t I, typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
136 i32x4 &set(T rhs) noexcept {
137 static_assert(I <= 3);
138 return *this = _mm_insert_epi32(*this, narrow_cast<int32_t>(rhs), I);
139 }
140
141 template<size_t I>
142 int get() const noexcept {
143 static_assert(I <= 3);
144 return _mm_extract_epi32(*this, I);
145 }
146
147 constexpr size_t size() const noexcept { return 4; }
148
149 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
150 i32x4 &x(T rhs) noexcept { return set<0>(rhs); }
151
152 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
153 i32x4 &y(T rhs) noexcept { return set<1>(rhs); }
154
155 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
156 i32x4 &z(T rhs) noexcept { return set<2>(rhs); }
157
158 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
159 i32x4 &w(T rhs) noexcept { return set<3>(rhs); }
160
161 int x() const noexcept { return get<0>(); }
162 int y() const noexcept { return get<1>(); }
163 int z() const noexcept { return get<2>(); }
164 int w() const noexcept { return get<3>(); }
165 int width() const noexcept { return get<0>(); }
166 int height() const noexcept { return get<1>(); }
167
168 i32x4 &operator+=(i32x4 const &rhs) noexcept {
169 return *this = _mm_add_epi32(*this, rhs);
170 }
171
172 i32x4 &operator-=(i32x4 const &rhs) noexcept {
173 return *this = _mm_sub_epi32(*this, rhs);
174 }
175
176 i32x4 &operator*=(i32x4 const &rhs) noexcept {
177 return *this = _mm_mullo_epi32(*this, rhs);
178 }
179
180
181 [[nodiscard]] friend i32x4 operator+(i32x4 const &lhs, i32x4 const &rhs) noexcept {
182 return _mm_add_epi32(lhs, rhs);
183 }
184
185 [[nodiscard]] friend i32x4 operator-(i32x4 const &lhs, i32x4 const &rhs) noexcept {
186 return _mm_sub_epi32(lhs, rhs);
187 }
188
189 [[nodiscard]] friend i32x4 operator*(i32x4 const &lhs, i32x4 const &rhs) noexcept {
190 return _mm_mullo_epi32(lhs, rhs);
191 }
192
193 [[nodiscard]] friend i32x4 max(i32x4 const &lhs, i32x4 const &rhs) noexcept {
194 return _mm_max_epi32(lhs, rhs);
195 }
196
197 [[nodiscard]] friend i32x4 min(i32x4 const &lhs, i32x4 const &rhs) noexcept {
198 return _mm_min_epi32(lhs, rhs);
199 }
200
201 [[nodiscard]] friend bool operator==(i32x4 const &lhs, i32x4 const &rhs) noexcept {
202 ttlet tmp2 = _mm_movemask_epi8(_mm_cmpeq_epi32(lhs, rhs));
203 return tmp2 == 0xffff;
204 }
205
206 [[nodiscard]] friend bool operator!=(i32x4 const &lhs, i32x4 const &rhs) noexcept {
207 return !(lhs == rhs);
208 }
209
213 [[nodiscard]] friend int eq(i32x4 const &lhs, i32x4 const &rhs) noexcept {
214 return _mm_movemask_epi8(_mm_cmpeq_epi32(lhs, rhs));
215 }
216
220 [[nodiscard]] friend int operator<(i32x4 const &lhs, i32x4 const &rhs) noexcept {
221 return _mm_movemask_epi8(_mm_cmplt_epi32(lhs, rhs));
222 }
223
227 [[nodiscard]] friend int operator>(i32x4 const &lhs, i32x4 const &rhs) noexcept {
228 return _mm_movemask_epi8(_mm_cmpgt_epi32(lhs, rhs));
229 }
230
231 [[nodiscard]] friend int operator<=(i32x4 const &lhs, i32x4 const &rhs) noexcept {
232 return (~(lhs > rhs)) & 0xffff;
233 }
234
235 [[nodiscard]] friend int operator>=(i32x4 const &lhs, i32x4 const &rhs) noexcept {
236 return (~(lhs < rhs)) & 0xffff;
237 }
238
239 [[nodiscard]] friend std::string to_string(i32x4 const &rhs) noexcept {
240 return fmt::format("({}, {}, {}, {})", rhs.x(), rhs.y(), rhs.z(), rhs.w());
241 }
242
243 std::ostream friend &operator<<(std::ostream &lhs, i32x4 const &rhs) noexcept {
244 return lhs << to_string(rhs);
245 }
246
247 template<std::size_t I>
248 [[nodiscard]] friend int get(i32x4 const &rhs) noexcept {
249 return rhs.get<I>();
250 }
251
252 template<char a, char b, char c, char d>
253 [[nodiscard]] constexpr static int swizzle_permute_mask() noexcept {
254 int r = 0;
255 switch (a) {
256 case 'x': r |= 0b00'00'00'00; break;
257 case 'y': r |= 0b00'00'00'01; break;
258 case 'z': r |= 0b00'00'00'10; break;
259 case 'w': r |= 0b00'00'00'11; break;
260 case '0': r |= 0b00'00'00'00; break;
261 case '1': r |= 0b00'00'00'00; break;
262 }
263 switch (b) {
264 case 'x': r |= 0b00'00'00'00; break;
265 case 'y': r |= 0b00'00'01'00; break;
266 case 'z': r |= 0b00'00'10'00; break;
267 case 'w': r |= 0b00'00'11'00; break;
268 case '0': r |= 0b00'00'01'00; break;
269 case '1': r |= 0b00'00'01'00; break;
270 }
271 switch (c) {
272 case 'x': r |= 0b00'00'00'00; break;
273 case 'y': r |= 0b00'01'00'00; break;
274 case 'z': r |= 0b00'10'00'00; break;
275 case 'w': r |= 0b00'11'00'00; break;
276 case '0': r |= 0b00'10'00'00; break;
277 case '1': r |= 0b00'10'00'00; break;
278 }
279 switch (d) {
280 case 'x': r |= 0b00'00'00'00; break;
281 case 'y': r |= 0b01'00'00'00; break;
282 case 'z': r |= 0b10'00'00'00; break;
283 case 'w': r |= 0b11'00'00'00; break;
284 case '0': r |= 0b11'00'00'00; break;
285 case '1': r |= 0b11'00'00'00; break;
286 }
287 return r;
288 }
289
290 template<char a, char b, char c, char d>
291 [[nodiscard]] i32x4 swizzle() const noexcept {
292 constexpr int permute_mask = f32x4::swizzle_permute_mask<a,b,c,d>();
293
294 __m128i swizzled;
295 // Clang is able to optimize these intrinsics, MSVC is not.
296 if constexpr (permute_mask != 0b11'10'01'00) {
297 swizzled = _mm_shuffle_epi32(*this, permute_mask);
298 } else {
299 swizzled = *this;
300 }
301
302 if constexpr (a == '0' || a == '1') {
303 swizzled = _mm_insert_epi32(swizzled, a == '0' ? 0 : 1, 0);
304 }
305 if constexpr (b == '0' || b == '1') {
306 swizzled = _mm_insert_epi32(swizzled, b == '0' ? 0 : 1, 1);
307 }
308 if constexpr (c == '0' || c == '1') {
309 swizzled = _mm_insert_epi32(swizzled, c == '0' ? 0 : 1, 2);
310 }
311 if constexpr (d == '0' || d == '1') {
312 swizzled = _mm_insert_epi32(swizzled, d == '0' ? 0 : 1, 3);
313 }
314
315 return swizzled;
316 }
317
318#define SWIZZLE4(name, A, B, C, D)\
319 [[nodiscard]] i32x4 name() const noexcept {\
320 return swizzle<A, B, C, D>();\
321 }
322
323#define SWIZZLE4_GEN3(name, A, B, C)\
324 SWIZZLE4(name ## 0, A, B, C, '0')\
325 SWIZZLE4(name ## 1, A, B, C, '1')\
326 SWIZZLE4(name ## x, A, B, C, 'x')\
327 SWIZZLE4(name ## y, A, B, C, 'y')\
328 SWIZZLE4(name ## z, A, B, C, 'z')\
329 SWIZZLE4(name ## w, A, B, C, 'w')
330
331#define SWIZZLE4_GEN2(name, A, B)\
332 SWIZZLE4_GEN3(name ## 0, A, B, '0')\
333 SWIZZLE4_GEN3(name ## 1, A, B, '1')\
334 SWIZZLE4_GEN3(name ## x, A, B, 'x')\
335 SWIZZLE4_GEN3(name ## y, A, B, 'y')\
336 SWIZZLE4_GEN3(name ## z, A, B, 'z')\
337 SWIZZLE4_GEN3(name ## w, A, B, 'w')
338
339#define SWIZZLE4_GEN1(name, A)\
340 SWIZZLE4_GEN2(name ## 0, A, '0')\
341 SWIZZLE4_GEN2(name ## 1, A, '1')\
342 SWIZZLE4_GEN2(name ## x, A, 'x')\
343 SWIZZLE4_GEN2(name ## y, A, 'y')\
344 SWIZZLE4_GEN2(name ## z, A, 'z')\
345 SWIZZLE4_GEN2(name ## w, A, 'w')
346
347 SWIZZLE4_GEN1(_0, '0')
348 SWIZZLE4_GEN1(_1, '1')
349 SWIZZLE4_GEN1(x, 'x')
350 SWIZZLE4_GEN1(y, 'y')
351 SWIZZLE4_GEN1(z, 'z')
352 SWIZZLE4_GEN1(w, 'w')
353
354#define SWIZZLE3(name, A, B, C)\
355 [[nodiscard]] i32x4 name() const noexcept {\
356 return swizzle<A,B,C,'w'>();\
357 }
358
359#define SWIZZLE3_GEN2(name, A, B)\
360 SWIZZLE3(name ## 0, A, B, '0')\
361 SWIZZLE3(name ## 1, A, B, '1')\
362 SWIZZLE3(name ## x, A, B, 'x')\
363 SWIZZLE3(name ## y, A, B, 'y')\
364 SWIZZLE3(name ## z, A, B, 'z')\
365 SWIZZLE3(name ## w, A, B, 'w')
366
367#define SWIZZLE3_GEN1(name, A)\
368 SWIZZLE3_GEN2(name ## 0, A, '0')\
369 SWIZZLE3_GEN2(name ## 1, A, '1')\
370 SWIZZLE3_GEN2(name ## x, A, 'x')\
371 SWIZZLE3_GEN2(name ## y, A, 'y')\
372 SWIZZLE3_GEN2(name ## z, A, 'z')\
373 SWIZZLE3_GEN2(name ## w, A, 'w')
374
375 SWIZZLE3_GEN1(_0, '0')
376 SWIZZLE3_GEN1(_1, '1')
377 SWIZZLE3_GEN1(x, 'x')
378 SWIZZLE3_GEN1(y, 'y')
379 SWIZZLE3_GEN1(z, 'z')
380 SWIZZLE3_GEN1(w, 'w')
381
382#define SWIZZLE2(name, A, B)\
383 [[nodiscard]] i32x4 name() const noexcept {\
384 return swizzle<A,B,'0','w'>();\
385 }
386
387#define SWIZZLE2_GEN1(name, A)\
388 SWIZZLE2(name ## 0, A, '0')\
389 SWIZZLE2(name ## 1, A, '1')\
390 SWIZZLE2(name ## x, A, 'x')\
391 SWIZZLE2(name ## y, A, 'y')\
392 SWIZZLE2(name ## z, A, 'z')\
393 SWIZZLE2(name ## w, A, 'w')
394
395 SWIZZLE2_GEN1(_0, '0')
396 SWIZZLE2_GEN1(_1, '1')
397 SWIZZLE2_GEN1(x, 'x')
398 SWIZZLE2_GEN1(y, 'y')
399 SWIZZLE2_GEN1(z, 'z')
400 SWIZZLE2_GEN1(w, 'w')
401};
402
403}
404
405#undef SWIZZLE4
406#undef SWIZZLE4_GEN1
407#undef SWIZZLE4_GEN2
408#undef SWIZZLE4_GEN3
409#undef SWIZZLE3
410#undef SWIZZLE3_GEN1
411#undef SWIZZLE3_GEN2
412#undef SWIZZLE2
413#undef SWIZZLE2_GEN1
A 4D vector.
Definition ivec.hpp:38
i32x4(__m128i rhs) noexcept
Create a i32x4 out of a __m128i.
Definition ivec.hpp:59
i32x4(T rhs) noexcept
Initialize a i32x4 with all elements set to a value.
Definition ivec.hpp:97
friend int operator<(i32x4 const &lhs, i32x4 const &rhs) noexcept
Less than.
Definition ivec.hpp:220
i32x4 & operator=(T rhs) noexcept
Initialize a i32x4 with all elements set to a value.
Definition ivec.hpp:105
i32x4(T x, U y, V z=0, W w=0) noexcept
Create a i32x4 out of 2 to 4 values.
Definition ivec.hpp:116
i32x4 & operator=(__m128i rhs) noexcept
Create a i32x4 out of a __m128i.
Definition ivec.hpp:64
friend int operator>(i32x4 const &lhs, i32x4 const &rhs) noexcept
Greater than.
Definition ivec.hpp:227
friend int eq(i32x4 const &lhs, i32x4 const &rhs) noexcept
Equal to.
Definition ivec.hpp:213
static i32x4 point(T x, U y, V z=0, W w=1) noexcept
Create a i32x4 out of 2 to 4 values.
Definition ivec.hpp:131
T data(T... args)