HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
ivec.hpp
1// Copyright 2020 Pokitec
2// All rights reserved.
3
4#pragma once
5
6#include "TTauri/Foundation/required.hpp"
7#include "TTauri/Foundation/vec.hpp"
8#include <fmt/format.h>
9#include <xmmintrin.h>
10#include <immintrin.h>
11#include <smmintrin.h>
12#include <emmintrin.h>
13#include <cstdint>
14#include <stdexcept>
15#include <array>
16#include <type_traits>
17#include <ostream>
18
19namespace tt {
20
37class ivec {
38 /* Intrinsic value of the vec.
39 * The elements in __m128i are assigned as follows.
40 * - [127:96] w
41 * - [95:64] z
42 * - [63:32] y
43 * - [31:0] x
44 */
45 __m128i v;
46
47public:
48 /* Create a zeroed out vec.
49 */
50 tt_force_inline ivec() noexcept : ivec(_mm_setzero_si128()) {}
51 tt_force_inline ivec(ivec const &rhs) = default;
52 tt_force_inline ivec &operator=(ivec const &rhs) = default;
53 tt_force_inline ivec(ivec &&rhs) = default;
54 tt_force_inline ivec &operator=(ivec &&rhs) = default;
55
58 tt_force_inline ivec(__m128i rhs) noexcept :
59 v(rhs) {}
60
63 tt_force_inline ivec &operator=(__m128i rhs) noexcept {
64 v = rhs;
65 return *this;
66 }
67
70 tt_force_inline operator __m128i () const noexcept {
71 return v;
72 }
73
74 tt_force_inline ivec(vec const &rhs) noexcept :
75 ivec(_mm_cvtps_epi32(rhs)) {}
76
77 tt_force_inline ivec &operator=(vec const &rhs) noexcept {
78 return *this = _mm_cvtps_epi32(rhs);
79 }
80
81 tt_force_inline operator vec () const noexcept {
82 return _mm_cvtepi32_ps(*this);
83 }
84
85 explicit tt_force_inline operator std::array<int32_t,4> () const noexcept {
87 _mm_storeu_si128(reinterpret_cast<__m128i*>(r.data()), *this);
88 return r;
89 }
90
95 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
96 explicit tt_force_inline ivec(T rhs) noexcept:
97 ivec(_mm_set1_epi32(numeric_cast<int32_t>(rhs))) {}
98
103 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
104 tt_force_inline ivec &operator=(T rhs) noexcept {
105 return *this = _mm_set1_epi32(numeric_cast<int32_t>(rhs));
106 }
107
113 template<typename T, typename U, typename V=int, typename W=int,
114 std::enable_if_t<std::is_arithmetic_v<T> && std::is_arithmetic_v<U> && std::is_arithmetic_v<V> && std::is_arithmetic_v<W>,int> = 0>
115 tt_force_inline ivec(T x, U y, V z=0, W w=0) noexcept :
116 ivec(_mm_set_epi32(
117 numeric_cast<int32_t>(w),
118 numeric_cast<int32_t>(z),
119 numeric_cast<int32_t>(y),
120 numeric_cast<int32_t>(x)
121 )) {}
122
128 template<typename T, typename U, typename V=int, typename W=int,
129 std::enable_if_t<std::is_arithmetic_v<T> && std::is_arithmetic_v<U> && std::is_arithmetic_v<V> && std::is_arithmetic_v<W>,int> = 0>
130 [[nodiscard]] tt_force_inline static ivec point(T x, U y, V z=0, W w=1) noexcept {
131 return ivec(x, y, z, w);
132 }
133
134 template<size_t I, typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
135 tt_force_inline ivec &set(T rhs) noexcept {
136 static_assert(I <= 3);
137 return *this = _mm_insert_epi32(*this, numeric_cast<int32_t>(rhs), I);
138 }
139
140 template<size_t I>
141 tt_force_inline int get() const noexcept {
142 static_assert(I <= 3);
143 return _mm_extract_epi32(*this, I);
144 }
145
146 constexpr size_t size() const noexcept { return 4; }
147
148 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
149 tt_force_inline ivec &x(T rhs) noexcept { return set<0>(rhs); }
150
151 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
152 tt_force_inline ivec &y(T rhs) noexcept { return set<1>(rhs); }
153
154 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
155 tt_force_inline ivec &z(T rhs) noexcept { return set<2>(rhs); }
156
157 template<typename T, std::enable_if_t<std::is_arithmetic_v<T>,int> = 0>
158 tt_force_inline ivec &w(T rhs) noexcept { return set<3>(rhs); }
159
160 tt_force_inline int x() const noexcept { return get<0>(); }
161 tt_force_inline int y() const noexcept { return get<1>(); }
162 tt_force_inline int z() const noexcept { return get<2>(); }
163 tt_force_inline int w() const noexcept { return get<3>(); }
164 tt_force_inline int width() const noexcept { return get<0>(); }
165 tt_force_inline int height() const noexcept { return get<1>(); }
166
167 tt_force_inline ivec &operator+=(ivec const &rhs) noexcept {
168 return *this = _mm_add_epi32(*this, rhs);
169 }
170
171 tt_force_inline ivec &operator-=(ivec const &rhs) noexcept {
172 return *this = _mm_sub_epi32(*this, rhs);
173 }
174
175 tt_force_inline ivec &operator*=(ivec const &rhs) noexcept {
176 return *this = _mm_mullo_epi32(*this, rhs);
177 }
178
179
180 [[nodiscard]] tt_force_inline friend ivec operator+(ivec const &lhs, ivec const &rhs) noexcept {
181 return _mm_add_epi32(lhs, rhs);
182 }
183
184 [[nodiscard]] tt_force_inline friend ivec operator-(ivec const &lhs, ivec const &rhs) noexcept {
185 return _mm_sub_epi32(lhs, rhs);
186 }
187
188 [[nodiscard]] tt_force_inline friend ivec operator*(ivec const &lhs, ivec const &rhs) noexcept {
189 return _mm_mullo_epi32(lhs, rhs);
190 }
191
192 [[nodiscard]] tt_force_inline friend ivec max(ivec const &lhs, ivec const &rhs) noexcept {
193 return _mm_max_epi32(lhs, rhs);
194 }
195
196 [[nodiscard]] tt_force_inline friend ivec min(ivec const &lhs, ivec const &rhs) noexcept {
197 return _mm_min_epi32(lhs, rhs);
198 }
199
200 [[nodiscard]] tt_force_inline friend bool operator==(ivec const &lhs, ivec const &rhs) noexcept {
201 ttlet tmp2 = _mm_movemask_epi8(_mm_cmpeq_epi32(lhs, rhs));
202 return tmp2 == 0xffff;
203 }
204
205 [[nodiscard]] tt_force_inline friend bool operator!=(ivec const &lhs, ivec const &rhs) noexcept {
206 return !(lhs == rhs);
207 }
208
212 [[nodiscard]] tt_force_inline friend int eq(ivec const &lhs, ivec const &rhs) noexcept {
213 return _mm_movemask_epi8(_mm_cmpeq_epi32(lhs, rhs));
214 }
215
219 [[nodiscard]] tt_force_inline friend int operator<(ivec const &lhs, ivec const &rhs) noexcept {
220 return _mm_movemask_epi8(_mm_cmplt_epi32(lhs, rhs));
221 }
222
226 [[nodiscard]] tt_force_inline friend int operator>(ivec const &lhs, ivec const &rhs) noexcept {
227 return _mm_movemask_epi8(_mm_cmpgt_epi32(lhs, rhs));
228 }
229
230 [[nodiscard]] tt_force_inline friend int operator<=(ivec const &lhs, ivec const &rhs) noexcept {
231 return (~(lhs > rhs)) & 0xffff;
232 }
233
234 [[nodiscard]] tt_force_inline friend int operator>=(ivec const &lhs, ivec const &rhs) noexcept {
235 return (~(lhs < rhs)) & 0xffff;
236 }
237
238 [[nodiscard]] friend std::string to_string(ivec const &rhs) noexcept {
239 return fmt::format("({}, {}, {}, {})", rhs.x(), rhs.y(), rhs.z(), rhs.w());
240 }
241
242 std::ostream friend &operator<<(std::ostream &lhs, ivec const &rhs) noexcept {
243 return lhs << to_string(rhs);
244 }
245
246 template<std::size_t I>
247 [[nodiscard]] tt_force_inline friend int get(ivec const &rhs) noexcept {
248 return rhs.get<I>();
249 }
250
251 template<char a, char b, char c, char d>
252 [[nodiscard]] constexpr static int swizzle_permute_mask() noexcept {
253 int r = 0;
254 switch (a) {
255 case 'x': r |= 0b00'00'00'00; break;
256 case 'y': r |= 0b00'00'00'01; break;
257 case 'z': r |= 0b00'00'00'10; break;
258 case 'w': r |= 0b00'00'00'11; break;
259 case '0': r |= 0b00'00'00'00; break;
260 case '1': r |= 0b00'00'00'00; break;
261 }
262 switch (b) {
263 case 'x': r |= 0b00'00'00'00; break;
264 case 'y': r |= 0b00'00'01'00; break;
265 case 'z': r |= 0b00'00'10'00; break;
266 case 'w': r |= 0b00'00'11'00; break;
267 case '0': r |= 0b00'00'01'00; break;
268 case '1': r |= 0b00'00'01'00; break;
269 }
270 switch (c) {
271 case 'x': r |= 0b00'00'00'00; break;
272 case 'y': r |= 0b00'01'00'00; break;
273 case 'z': r |= 0b00'10'00'00; break;
274 case 'w': r |= 0b00'11'00'00; break;
275 case '0': r |= 0b00'10'00'00; break;
276 case '1': r |= 0b00'10'00'00; break;
277 }
278 switch (d) {
279 case 'x': r |= 0b00'00'00'00; break;
280 case 'y': r |= 0b01'00'00'00; break;
281 case 'z': r |= 0b10'00'00'00; break;
282 case 'w': r |= 0b11'00'00'00; break;
283 case '0': r |= 0b11'00'00'00; break;
284 case '1': r |= 0b11'00'00'00; break;
285 }
286 return r;
287 }
288
289 template<char a, char b, char c, char d>
290 [[nodiscard]] tt_force_inline ivec swizzle() const noexcept {
291 constexpr int permute_mask = vec::swizzle_permute_mask<a,b,c,d>();
292
293 __m128i swizzled;
294 // Clang is able to optimize these intrinsics, MSVC is not.
295 if constexpr (permute_mask != 0b11'10'01'00) {
296 swizzled = _mm_shuffle_epi32(*this, permute_mask);
297 } else {
298 swizzled = *this;
299 }
300
301 if constexpr (a == '0' || a == '1') {
302 swizzled = _mm_insert_epi32(swizzled, a == '0' ? 0 : 1, 0);
303 }
304 if constexpr (b == '0' || b == '1') {
305 swizzled = _mm_insert_epi32(swizzled, b == '0' ? 0 : 1, 1);
306 }
307 if constexpr (c == '0' || c == '1') {
308 swizzled = _mm_insert_epi32(swizzled, c == '0' ? 0 : 1, 2);
309 }
310 if constexpr (d == '0' || d == '1') {
311 swizzled = _mm_insert_epi32(swizzled, d == '0' ? 0 : 1, 3);
312 }
313
314 return swizzled;
315 }
316
317#define SWIZZLE4(name, A, B, C, D)\
318 [[nodiscard]] ivec name() const noexcept {\
319 return swizzle<A, B, C, D>();\
320 }
321
322#define SWIZZLE4_GEN3(name, A, B, C)\
323 SWIZZLE4(name ## 0, A, B, C, '0')\
324 SWIZZLE4(name ## 1, A, B, C, '1')\
325 SWIZZLE4(name ## x, A, B, C, 'x')\
326 SWIZZLE4(name ## y, A, B, C, 'y')\
327 SWIZZLE4(name ## z, A, B, C, 'z')\
328 SWIZZLE4(name ## w, A, B, C, 'w')
329
330#define SWIZZLE4_GEN2(name, A, B)\
331 SWIZZLE4_GEN3(name ## 0, A, B, '0')\
332 SWIZZLE4_GEN3(name ## 1, A, B, '1')\
333 SWIZZLE4_GEN3(name ## x, A, B, 'x')\
334 SWIZZLE4_GEN3(name ## y, A, B, 'y')\
335 SWIZZLE4_GEN3(name ## z, A, B, 'z')\
336 SWIZZLE4_GEN3(name ## w, A, B, 'w')
337
338#define SWIZZLE4_GEN1(name, A)\
339 SWIZZLE4_GEN2(name ## 0, A, '0')\
340 SWIZZLE4_GEN2(name ## 1, A, '1')\
341 SWIZZLE4_GEN2(name ## x, A, 'x')\
342 SWIZZLE4_GEN2(name ## y, A, 'y')\
343 SWIZZLE4_GEN2(name ## z, A, 'z')\
344 SWIZZLE4_GEN2(name ## w, A, 'w')
345
346 SWIZZLE4_GEN1(_0, '0')
347 SWIZZLE4_GEN1(_1, '1')
348 SWIZZLE4_GEN1(x, 'x')
349 SWIZZLE4_GEN1(y, 'y')
350 SWIZZLE4_GEN1(z, 'z')
351 SWIZZLE4_GEN1(w, 'w')
352
353#define SWIZZLE3(name, A, B, C)\
354 [[nodiscard]] ivec name() const noexcept {\
355 return swizzle<A,B,C,'w'>();\
356 }
357
358#define SWIZZLE3_GEN2(name, A, B)\
359 SWIZZLE3(name ## 0, A, B, '0')\
360 SWIZZLE3(name ## 1, A, B, '1')\
361 SWIZZLE3(name ## x, A, B, 'x')\
362 SWIZZLE3(name ## y, A, B, 'y')\
363 SWIZZLE3(name ## z, A, B, 'z')\
364 SWIZZLE3(name ## w, A, B, 'w')
365
366#define SWIZZLE3_GEN1(name, A)\
367 SWIZZLE3_GEN2(name ## 0, A, '0')\
368 SWIZZLE3_GEN2(name ## 1, A, '1')\
369 SWIZZLE3_GEN2(name ## x, A, 'x')\
370 SWIZZLE3_GEN2(name ## y, A, 'y')\
371 SWIZZLE3_GEN2(name ## z, A, 'z')\
372 SWIZZLE3_GEN2(name ## w, A, 'w')
373
374 SWIZZLE3_GEN1(_0, '0')
375 SWIZZLE3_GEN1(_1, '1')
376 SWIZZLE3_GEN1(x, 'x')
377 SWIZZLE3_GEN1(y, 'y')
378 SWIZZLE3_GEN1(z, 'z')
379 SWIZZLE3_GEN1(w, 'w')
380
381#define SWIZZLE2(name, A, B)\
382 [[nodiscard]] ivec name() const noexcept {\
383 return swizzle<A,B,'0','w'>();\
384 }
385
386#define SWIZZLE2_GEN1(name, A)\
387 SWIZZLE2(name ## 0, A, '0')\
388 SWIZZLE2(name ## 1, A, '1')\
389 SWIZZLE2(name ## x, A, 'x')\
390 SWIZZLE2(name ## y, A, 'y')\
391 SWIZZLE2(name ## z, A, 'z')\
392 SWIZZLE2(name ## w, A, 'w')
393
394 SWIZZLE2_GEN1(_0, '0')
395 SWIZZLE2_GEN1(_1, '1')
396 SWIZZLE2_GEN1(x, 'x')
397 SWIZZLE2_GEN1(y, 'y')
398 SWIZZLE2_GEN1(z, 'z')
399 SWIZZLE2_GEN1(w, 'w')
400};
401
402}
403
404#undef SWIZZLE4
405#undef SWIZZLE4_GEN1
406#undef SWIZZLE4_GEN2
407#undef SWIZZLE4_GEN3
408#undef SWIZZLE3
409#undef SWIZZLE3_GEN1
410#undef SWIZZLE3_GEN2
411#undef SWIZZLE2
412#undef SWIZZLE2_GEN1
A 4D vector.
Definition ivec.hpp:37
static tt_force_inline ivec point(T x, U y, V z=0, W w=1) noexcept
Create a ivec out of 2 to 4 values.
Definition ivec.hpp:130
tt_force_inline friend int operator>(ivec const &lhs, ivec const &rhs) noexcept
Greater than.
Definition ivec.hpp:226
tt_force_inline ivec(T x, U y, V z=0, W w=0) noexcept
Create a ivec out of 2 to 4 values.
Definition ivec.hpp:115
tt_force_inline ivec & operator=(__m128i rhs) noexcept
Create a ivec out of a __m128i.
Definition ivec.hpp:63
tt_force_inline ivec(__m128i rhs) noexcept
Create a ivec out of a __m128i.
Definition ivec.hpp:58
tt_force_inline friend int operator<(ivec const &lhs, ivec const &rhs) noexcept
Less than.
Definition ivec.hpp:219
tt_force_inline ivec & operator=(T rhs) noexcept
Initialize a ivec with all elements set to a value.
Definition ivec.hpp:104
tt_force_inline ivec(T rhs) noexcept
Initialize a ivec with all elements set to a value.
Definition ivec.hpp:96
tt_force_inline friend int eq(ivec const &lhs, ivec const &rhs) noexcept
Equal to.
Definition ivec.hpp:212
A 4D vector.
Definition vec.hpp:37
T data(T... args)