HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
numeric_array.hpp
1// Copyright Take Vos 2020-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../architecture.hpp"
8#include "../concepts.hpp"
9#include "../cast.hpp"
10#include "../type_traits.hpp"
11#include "raw_numeric_array.hpp"
12#if TT_X86_64_V2
13#include "f32x4_x64v2.hpp"
14#include "i8x16_x64v2.hpp"
15#endif
16
17#if TT_X86_64_V1
18#include <xmmintrin.h> // SSE
19#include <emmintrin.h> // SSE2
20#endif
21#if TT_X86_64_V2
22#include <pmmintrin.h> // SSE3
23#include <tmmintrin.h> // SSSE3
24#include <smmintrin.h> // SSE4.1
25#include <nmmintrin.h> // SSE4.2
26#include <ammintrin.h> // SSE4A
27#endif
28#if TT_X86_64_V2_5
29#include <immintrin.h> // AVX, AVX2, FMA
30#endif
31
32#include <cstdint>
33#include <ostream>
34#include <string>
35#include <array>
36#include <type_traits>
37#include <concepts>
38#include <bit>
39#include <climits>
40
41namespace tt {
42
43template<arithmetic T, size_t N>
45public:
47 using value_type = typename container_type::value_type;
48 using size_type = typename container_type::size_type;
49 using difference_type = typename container_type::difference_type;
50 using reference = typename container_type::reference;
51 using const_reference = typename container_type::const_reference;
52 using pointer = typename container_type::pointer;
53 using const_pointer = typename container_type::const_pointer;
54 using iterator = typename container_type::iterator;
55 using const_iterator = typename container_type::const_iterator;
56
57 constexpr static bool is_i8x1 = std::is_same_v<T, int8_t> && N == 1;
58 constexpr static bool is_i8x2 = std::is_same_v<T, int8_t> && N == 2;
59 constexpr static bool is_i8x4 = std::is_same_v<T, int8_t> && N == 4;
60 constexpr static bool is_i8x8 = std::is_same_v<T, int8_t> && N == 8;
61 constexpr static bool is_i8x16 = std::is_same_v<T, int8_t> && N == 16;
62 constexpr static bool is_i8x32 = std::is_same_v<T, int8_t> && N == 32;
63 constexpr static bool is_i8x64 = std::is_same_v<T, int8_t> && N == 64;
64 constexpr static bool is_u8x1 = std::is_same_v<T, uint8_t> && N == 1;
65 constexpr static bool is_u8x2 = std::is_same_v<T, uint8_t> && N == 2;
66 constexpr static bool is_u8x4 = std::is_same_v<T, uint8_t> && N == 4;
67 constexpr static bool is_u8x8 = std::is_same_v<T, uint8_t> && N == 8;
68 constexpr static bool is_u8x16 = std::is_same_v<T, uint8_t> && N == 16;
69 constexpr static bool is_u8x32 = std::is_same_v<T, uint8_t> && N == 32;
70 constexpr static bool is_u8x64 = std::is_same_v<T, uint8_t> && N == 64;
71
72 constexpr static bool is_i16x1 = std::is_same_v<T, int16_t> && N == 1;
73 constexpr static bool is_i16x2 = std::is_same_v<T, int16_t> && N == 2;
74 constexpr static bool is_i16x4 = std::is_same_v<T, int16_t> && N == 4;
75 constexpr static bool is_i16x8 = std::is_same_v<T, int16_t> && N == 8;
76 constexpr static bool is_i16x16 = std::is_same_v<T, int16_t> && N == 16;
77 constexpr static bool is_i16x32 = std::is_same_v<T, int16_t> && N == 32;
78 constexpr static bool is_u16x1 = std::is_same_v<T, uint16_t> && N == 1;
79 constexpr static bool is_u16x2 = std::is_same_v<T, uint16_t> && N == 2;
80 constexpr static bool is_u16x4 = std::is_same_v<T, uint16_t> && N == 4;
81 constexpr static bool is_u16x8 = std::is_same_v<T, uint16_t> && N == 8;
82 constexpr static bool is_u16x16 = std::is_same_v<T, uint16_t> && N == 16;
83 constexpr static bool is_u16x32 = std::is_same_v<T, uint16_t> && N == 32;
84
85 constexpr static bool is_i32x1 = std::is_same_v<T, int32_t> && N == 1;
86 constexpr static bool is_i32x2 = std::is_same_v<T, int32_t> && N == 2;
87 constexpr static bool is_i32x4 = std::is_same_v<T, int32_t> && N == 4;
88 constexpr static bool is_i32x8 = std::is_same_v<T, int32_t> && N == 8;
89 constexpr static bool is_i32x16 = std::is_same_v<T, int32_t> && N == 16;
90 constexpr static bool is_u32x1 = std::is_same_v<T, uint32_t> && N == 1;
91 constexpr static bool is_u32x2 = std::is_same_v<T, uint32_t> && N == 2;
92 constexpr static bool is_u32x4 = std::is_same_v<T, uint32_t> && N == 4;
93 constexpr static bool is_u32x8 = std::is_same_v<T, uint32_t> && N == 8;
94 constexpr static bool is_u32x16 = std::is_same_v<T, uint32_t> && N == 16;
95 constexpr static bool is_f32x1 = std::is_same_v<T, float> && N == 1;
96 constexpr static bool is_f32x2 = std::is_same_v<T, float> && N == 2;
97 constexpr static bool is_f32x4 = std::is_same_v<T, float> && N == 4;
98 constexpr static bool is_f32x8 = std::is_same_v<T, float> && N == 8;
99 constexpr static bool is_f32x16 = std::is_same_v<T, float> && N == 16;
100
101 constexpr static bool is_i64x1 = std::is_same_v<T, int64_t> && N == 1;
102 constexpr static bool is_i64x2 = std::is_same_v<T, int64_t> && N == 2;
103 constexpr static bool is_i64x4 = std::is_same_v<T, int64_t> && N == 4;
104 constexpr static bool is_i64x8 = std::is_same_v<T, int64_t> && N == 8;
105 constexpr static bool is_u64x1 = std::is_same_v<T, uint64_t> && N == 1;
106 constexpr static bool is_u64x2 = std::is_same_v<T, uint64_t> && N == 2;
107 constexpr static bool is_u64x4 = std::is_same_v<T, uint64_t> && N == 4;
108 constexpr static bool is_u64x8 = std::is_same_v<T, uint64_t> && N == 8;
109 constexpr static bool is_f64x1 = std::is_same_v<T, double> && N == 1;
110 constexpr static bool is_f64x2 = std::is_same_v<T, double> && N == 2;
111 constexpr static bool is_f64x4 = std::is_same_v<T, double> && N == 4;
112 constexpr static bool is_f64x8 = std::is_same_v<T, double> && N == 8;
113
114 constexpr numeric_array() noexcept = default;
115 constexpr numeric_array(numeric_array const &rhs) noexcept = default;
116 constexpr numeric_array(numeric_array &&rhs) noexcept = default;
117 constexpr numeric_array &operator=(numeric_array const &rhs) noexcept = default;
118 constexpr numeric_array &operator=(numeric_array &&rhs) noexcept = default;
119
120 template<arithmetic U, size_t M>
121 [[nodiscard]] constexpr explicit numeric_array(numeric_array<U, M> const &other) noexcept : v()
122 {
123 if (!std::is_constant_evaluated()) {
124 if constexpr (x86_64_v2 and is_f64x2 and other.is_i32x4) {
125 *this = numeric_array{_mm_cvtepi32_pd(other.reg())};
126 return;
127 } else if constexpr (x86_64_v2 and is_f32x4 and other.is_i32x4) {
128 *this = numeric_array{_mm_cvtepi32_ps(other.reg())};
129 return;
130 } else if constexpr (x86_64_v2 and is_i64x4 and other.is_i32x4) {
131 *this = numeric_array{_mm_cvtepi32_epi64(other.reg())};
132 return;
133 } else if constexpr (x86_64_v2 and is_i64x4 and other.is_i16x8) {
134 *this = numeric_array{_mm_cvtepi16_epi64(other.reg())};
135 return;
136 } else if constexpr (x86_64_v2 and is_i32x4 and other.is_i16x8) {
137 *this = numeric_array{_mm_cvtepi16_epi32(other.reg())};
138 return;
139 } else if constexpr (x86_64_v2 and is_i64x2 and other.is_i8x16) {
140 *this = numeric_array{_mm_cvtepi8_epi64(other.reg())};
141 return;
142 } else if constexpr (x86_64_v2 and is_i32x4 and other.is_f32x4) {
143 *this = numeric_array{_mm_cvtps_epi32(other.reg())};
144 return;
145 } else if constexpr (x86_64_v2 and is_i32x4 and other.is_i8x16) {
146 *this = numeric_array{_mm_cvtepi8_epi32(other.reg())};
147 return;
148 } else if constexpr (x86_64_v2 and is_i16x8 and other.is_i8x16) {
149 *this = numeric_array{_mm_cvtepi8_epi16(other.reg())};
150 return;
151 } else if constexpr (x86_64_v2_5 and is_f64x4 and other.is_f32x4) {
152 *this = numeric_array{_mm256_cvteps_pd(other.reg())};
153 return;
154 } else if constexpr (x86_64_v2_5 and is_f64x4 and other.is_i32x4) {
155 *this = numeric_array{_mm256_cvtepi32_pd(other.reg())};
156 return;
157 } else if constexpr (x86_64_v2_5 and is_f32x4 and other.is_f64x4) {
158 *this = numeric_array{_mm256_cvtpd_ps(other.reg())};
159 return;
160 } else if constexpr (x86_64_v2_5 and is_i32x4 and other.is_f64x4) {
161 *this = numeric_array{_mm256_cvtpd_epi32(other.reg())};
162 return;
163 } else if constexpr (x86_64_v2_5 and is_i32x8 and other.is_f32x8) {
164 *this = numeric_array{_mm256_cvtps_epi32(other.reg())};
165 return;
166 } else if constexpr (x86_64_v2_5 and is_f32x8 and other.is_i32x8) {
167 *this = numeric_array{_mm256_cvtepi32_ps(other.reg())};
168 return;
169 }
170 }
171
172 for (size_t i = 0; i != N; ++i) {
173 if (i < M) {
174 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
175 // SSE conversion round floats before converting to integer.
176 v[i] = static_cast<value_type>(std::round(other[i]));
177 } else {
178 v[i] = static_cast<value_type>(other[i]);
179 }
180 } else {
181 v[i] = T{};
182 }
183 }
184 }
185
186 template<arithmetic U, size_t M>
187 [[nodiscard]] constexpr explicit numeric_array(numeric_array<U, M> const &other1, numeric_array<U, M> const &other2) noexcept
188 :
189 v()
190 {
191 if (!std::is_constant_evaluated()) {
192 if constexpr (x86_64_v2_5 and is_f32x8 and other1.is_f32x4 and other2.is_f32x4) {
193 *this = numeric_array{_mm256_set_m128(other2.reg(), other1.reg())};
194 return;
195 } else if constexpr (x86_64_v2_5 and is_f64x4 and other1.is_f64x2 and other2.is_f64x2) {
196 *this = numeric_array{_mm256_set_m128d(other2.reg(), other1.reg())};
197 return;
198 } else if constexpr (
199 x86_64_v2_5 and std::is_integral_v<T> and std::is_integral_v<U> and (sizeof(T) * N == 32) and
200 (sizeof(U) * M == 16)) {
201 *this = numeric_array{_mm256_set_m128i(other2.reg(), other1.reg())};
202 return;
203 } else if constexpr (x86_64_v2 and is_i16x8 and other1.is_i32x4 and other2.is_i32x4) {
204 *this = numeric_array{_mm_packs_epi32(other2.reg(), other1.reg())};
205 return;
206 } else if constexpr (x86_64_v2 and is_i8x16 and other1.is_i16x8 and other2.is_i16x8) {
207 *this = numeric_array{_mm_packs_epi16(other2.reg(), other1.reg())};
208 return;
209 } else if constexpr (x86_64_v2 and is_u16x8 and other1.is_u32x4 and other2.is_u32x4) {
210 *this = numeric_array{_mm_packus_epu32(other2.reg(), other1.reg())};
211 return;
212 } else if constexpr (x86_64_v2 and is_u8x16 and other1.is_u16x8 and other2.is_u16x8) {
213 *this = numeric_array{_mm_packus_epu16(other2.reg(), other1.reg())};
214 return;
215 }
216 }
217
218 for (size_t i = 0; i != N; ++i) {
219 if (i < M) {
220 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
221 // SSE conversion round floats before converting to integer.
222 v[i] = static_cast<value_type>(std::round(other1[i]));
223 } else {
224 v[i] = static_cast<value_type>(other1[i]);
225 }
226 } else if (i < M * 2) {
227 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
228 // SSE conversion round floats before converting to integer.
229 v[i] = static_cast<value_type>(std::round(other2[i - M]));
230 } else {
231 v[i] = static_cast<value_type>(other2[i - M]);
232 }
233 } else {
234 v[i] = U{};
235 }
236 }
237 }
238
239 [[nodiscard]] constexpr numeric_array(std::initializer_list<T> rhs) noexcept : v()
240 {
241 auto src = std::begin(rhs);
242 auto dst = std::begin(v);
243
244 // Copy all values from the initializer list.
245 while (src != std::end(rhs) && dst != std::end(v)) {
246 *(dst++) = *(src++);
247 }
248
249 tt_axiom(
250 dst != std::end(v) || src == std::end(rhs),
251 "Expecting the std:initializer_list size to be <= to the size of the numeric array");
252
253 // Set all other elements to zero
254 while (dst != std::end(v)) {
255 *(dst++) = {};
256 }
257 }
258
259 [[nodiscard]] constexpr numeric_array(T const &first) noexcept requires(N == 1) : numeric_array({first}) {}
260
261 template<arithmetic... Rest>
262 requires(sizeof...(Rest) + 2 <= N)
263 [[nodiscard]] constexpr numeric_array(T const &first, T const &second, Rest const &...rest) noexcept :
264 numeric_array({first, second, narrow_cast<T>(rest)...})
265 {
266 }
267
268 [[nodiscard]] static constexpr numeric_array broadcast(T rhs) noexcept
269 {
270 auto r = numeric_array{};
271 for (size_t i = 0; i != N; ++i) {
272 r[i] = rhs;
273 }
274 return r;
275 }
276
277 [[nodiscard]] numeric_array(std::array<T, N> const &rhs) noexcept : v(rhs) {}
278
279 numeric_array &operator=(std::array<T, N> const &rhs) noexcept
280 {
281 v = rhs;
282 return *this;
283 }
284
285 [[nodiscard]] operator std::array<T, N>() const noexcept
286 {
287 return v;
288 }
289
290 [[nodiscard]] __m128i reg() const noexcept requires(x86_64_v2 and std::is_integral_v<T> and sizeof(T) * N == 16)
291 {
292 return _mm_loadu_si128(reinterpret_cast<__m128i const *>(v.data()));
293 }
294
295 [[nodiscard]] __m128 reg() const noexcept requires(x86_64_v2 and is_f32x4)
296 {
297 return _mm_loadu_ps(v.data());
298 }
299
300 [[nodiscard]] __m128d reg() const noexcept requires(x86_64_v2 and is_f64x2)
301 {
302 return _mm_loadu_pd(v.data());
303 }
304
305 [[nodiscard]] explicit numeric_array(__m128i const &rhs) noexcept
306 requires(x86_64_v2 and std::is_integral_v<T> and sizeof(T) * N == 16)
307 {
308 _mm_storeu_si128(reinterpret_cast<__m128i *>(v.data()), rhs);
309 }
310
311 [[nodiscard]] explicit numeric_array(__m128 const &rhs) noexcept requires(x86_64_v2 and is_f32x4)
312 {
313 _mm_storeu_ps(v.data(), rhs);
314 }
315
316 [[nodiscard]] explicit numeric_array(__m128d const &rhs) noexcept requires(x86_64_v2 and is_f64x2)
317 {
318 _mm_storeu_pd(v.data(), rhs);
319 }
320
321 numeric_array &operator=(__m128i const &rhs) noexcept requires(x86_64_v2 and std::is_integral_v<T> and sizeof(T) * N == 16)
322 {
323 _mm_storeu_si128(reinterpret_cast<__m128i *>(v.data()), rhs);
324 return *this;
325 }
326
327 numeric_array &operator=(__m128 const &rhs) noexcept requires(x86_64_v2 and is_f32x4)
328 {
329 _mm_storeu_ps(v.data(), rhs);
330 return *this;
331 }
332
333 numeric_array &operator=(__m128d const &rhs) noexcept requires(x86_64_v2 and is_f64x2)
334 {
335 _mm_storeu_pd(v.data(), rhs);
336 return *this;
337 }
338
339 [[nodiscard]] __m256i reg() const noexcept requires(x86_64_v2_5 and std::is_integral_v<T> and sizeof(T) * N == 32)
340 {
341 return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(v.data()));
342 }
343
344 [[nodiscard]] __m256 reg() const noexcept requires(x86_64_v2_5 and is_f32x8)
345 {
346 return _mm256_loadu_ps(v.data());
347 }
348
349 [[nodiscard]] __m256d reg() const noexcept requires(x86_64_v2_5 and is_f64x4)
350 {
351 return _mm256_loadu_pd(v.data());
352 }
353
354 [[nodiscard]] explicit numeric_array(__m256i const &rhs) noexcept
355 requires(x86_64_v2_5 and std::is_integral_v<T> and sizeof(T) * N == 32)
356 {
357 _mm256_storeu_si256(reinterpret_cast<__m256i *>(v.data()), rhs);
358 }
359
360 [[nodiscard]] explicit numeric_array(__m256 const &rhs) noexcept requires(x86_64_v2_5 and is_f32x8)
361 {
362 _mm256_storeu_ps(v.data(), rhs);
363 }
364
365 [[nodiscard]] explicit numeric_array(__m256d const &rhs) noexcept requires(x86_64_v2_5 and is_f64x4)
366 {
367 _mm256_storeu_pd(v.data(), rhs);
368 }
369
370 numeric_array &operator=(__m256i const &rhs) noexcept requires(x86_64_v2_5 and std::is_integral_v<T> and sizeof(T) * N == 32)
371 {
372 _mm256_storeu_si256(reinterpret_cast<__m256i *>(v.data()), rhs);
373 return *this;
374 }
375
376 numeric_array &operator=(__m256 const &rhs) noexcept requires(x86_64_v2_5 and is_f32x8)
377 {
378 _mm256_storeu_ps(v.data(), rhs);
379 return *this;
380 }
381
382 numeric_array &operator=(__m256d const &rhs) noexcept requires(x86_64_v2_5 and is_f64x4)
383 {
384 _mm256_storeu_pd(v.data(), rhs);
385 return *this;
386 }
387
388 template<typename Other>
389 requires(sizeof(Other) == sizeof(numeric_array)) [[nodiscard]] constexpr friend Other
390 bit_cast(numeric_array const &rhs) noexcept
391 {
392 using rhs_value_type = typename std::remove_cvref_t<decltype(rhs)>::value_type;
393
394 if (not std::is_constant_evaluated()) {
395 if constexpr (Other::is_f32x4 and std::is_integral_v<rhs_value_type> and x86_64_v2) {
396 return Other{_mm_castsi128_ps(rhs.reg())};
397 } else if constexpr (Other::is_f32x4 and rhs.is_f64x2 and x86_64_v2) {
398 return Other{_mm_castpd_ps(rhs.reg())};
399 } else if constexpr (Other::is_f64x2 and std::is_integral_v<rhs_value_type> and x86_64_v2) {
400 return Other{_mm_castsi128_pd(rhs.reg())};
401 } else if constexpr (Other::is_f64x2 and rhs.is_f32x4 and x86_64_v2) {
402 return Other{_mm_castps_pd(rhs.reg())};
403 } else if constexpr (std::is_integral_v<Other::value_type> and rhs.is_f32x4 and x86_64_v2) {
404 return Other{_mm_castps_si128(rhs.reg())};
405 } else if constexpr (std::is_integral_v<Other::value_type> and rhs.is_f64x2 and x86_64_v2) {
406 return Other{_mm_castpd_si128(rhs.reg())};
407 } else if constexpr (std::is_integral_v<Other::value_type> and std::is_integral_v<rhs_value_type> and x86_64_v2) {
408 return Other{rhs.reg()};
409 }
410 }
411 return std::bit_cast<Other>(rhs);
412 }
413
417 {
418 if (not std::is_constant_evaluated()) {
419 if constexpr (x86_64_v2 and is_f64x2) {
420 return numeric_array{_mm_unpacklo_pd(a.reg(), b.reg())};
421 } else if constexpr (x86_64_v2 and is_f32x4) {
422 return numeric_array{_mm_unpacklo_ps(a.reg(), b.reg())};
423 } else if constexpr (x86_64_v2 and is_i64x2) {
424 return numeric_array{_mm_unpacklo_epi64(a.reg(), b.reg())};
425 } else if constexpr (x86_64_v2 and is_i32x4) {
426 return numeric_array{_mm_unpacklo_epi32(a.reg(), b.reg())};
427 } else if constexpr (x86_64_v2 and is_i16x8) {
428 return numeric_array{_mm_unpacklo_epi16(a.reg(), b.reg())};
429 } else if constexpr (x86_64_v2 and is_i8x16) {
430 return numeric_array{_mm_unpacklo_epi8(a.reg(), b.reg())};
431 }
432 }
433
434 auto r = numeric_array{};
435 for (size_t i = 0; i != N; ++i) {
436 r[i] = (i % 2 == 0) ? a[i / 2] : b[i / 2];
437 }
438 return r;
439 }
440
445 template<size_t S>
446 [[nodiscard]] static constexpr numeric_array load(std::byte const *ptr) noexcept
447 {
448 auto r = numeric_array{};
449 std::memcpy(&r, ptr, S);
450 return r;
451 }
452
453
458 [[nodiscard]] static constexpr numeric_array load(std::byte const *ptr) noexcept
459 {
460 auto r = numeric_array{};
461 std::memcpy(&r, ptr, sizeof(r));
462 return r;
463 }
464
469 [[nodiscard]] static constexpr numeric_array load(T const *ptr) noexcept
470 {
471 auto r = numeric_array{};
472 std::memcpy(&r, ptr, sizeof(r));
473 return r;
474 }
475
476 template<size_t S>
477 constexpr void store(std::byte *ptr) const noexcept
478 {
479 std::memcpy(ptr, this, S);
480 }
481
485 constexpr void store(std::byte *ptr) const noexcept
486 {
487 store<sizeof(*this)>(ptr);
488 }
489
490 [[nodiscard]] constexpr T const &operator[](size_t i) const noexcept
491 {
492 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
493 tt_axiom(i < N);
494 return v[i];
495 }
496
497 [[nodiscard]] constexpr T &operator[](size_t i) noexcept
498 {
499 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
500 tt_axiom(i < N);
501 return v[i];
502 }
503
504 [[nodiscard]] constexpr reference front() noexcept
505 {
506 return v.front();
507 }
508
509 [[nodiscard]] constexpr const_reference front() const noexcept
510 {
511 return v.front();
512 }
513
514 [[nodiscard]] constexpr reference back() noexcept
515 {
516 return v.back();
517 }
518
519 [[nodiscard]] constexpr const_reference back() const noexcept
520 {
521 return v.back();
522 }
523
524 [[nodiscard]] constexpr pointer data() noexcept
525 {
526 return v.data();
527 }
528
529 [[nodiscard]] constexpr const_pointer data() const noexcept
530 {
531 return v.data();
532 }
533
534 [[nodiscard]] constexpr iterator begin() noexcept
535 {
536 return v.begin();
537 }
538
539 [[nodiscard]] constexpr const_iterator begin() const noexcept
540 {
541 return v.begin();
542 }
543
544 [[nodiscard]] constexpr const_iterator cbegin() const noexcept
545 {
546 return v.cbegin();
547 }
548
549 [[nodiscard]] constexpr iterator end() noexcept
550 {
551 return v.end();
552 }
553
554 [[nodiscard]] constexpr const_iterator end() const noexcept
555 {
556 return v.end();
557 }
558
559 [[nodiscard]] constexpr const_iterator cend() const noexcept
560 {
561 return v.cend();
562 }
563
564 [[nodiscard]] constexpr bool empty() const noexcept
565 {
566 return v.empty();
567 }
568
569 [[nodiscard]] constexpr size_type size() const noexcept
570 {
571 return v.size();
572 }
573
574 [[nodiscard]] constexpr size_type max_size() const noexcept
575 {
576 return v.max_size();
577 }
578
579 constexpr bool is_point() const noexcept
580 {
581 return v.back() != T{};
582 }
583
584 constexpr bool is_vector() const noexcept
585 {
586 return v.back() == T{};
587 }
588
589 constexpr bool is_opaque() const noexcept
590 {
591 return a() == T{1};
592 }
593
594 constexpr bool is_transparent() const noexcept
595 {
596 return a() == T{0};
597 }
598
599 [[nodiscard]] constexpr T const &x() const noexcept requires(N >= 1)
600 {
601 return std::get<0>(v);
602 }
603
604 [[nodiscard]] constexpr T const &y() const noexcept requires(N >= 2)
605 {
606 return std::get<1>(v);
607 }
608
609 [[nodiscard]] constexpr T const &z() const noexcept requires(N >= 3)
610 {
611 return std::get<2>(v);
612 }
613
614 [[nodiscard]] constexpr T const &w() const noexcept requires(N >= 4)
615 {
616 return std::get<3>(v);
617 }
618
619 [[nodiscard]] constexpr T &x() noexcept requires(N >= 1)
620 {
621 return std::get<0>(v);
622 }
623
624 [[nodiscard]] constexpr T &y() noexcept requires(N >= 2)
625 {
626 return std::get<1>(v);
627 }
628
629 [[nodiscard]] constexpr T &z() noexcept requires(N >= 3)
630 {
631 return std::get<2>(v);
632 }
633
634 [[nodiscard]] constexpr T &w() noexcept requires(N >= 4)
635 {
636 return std::get<3>(v);
637 }
638
639 [[nodiscard]] constexpr T const &r() const noexcept requires(N >= 1)
640 {
641 return std::get<0>(v);
642 }
643
644 [[nodiscard]] constexpr T const &g() const noexcept requires(N >= 2)
645 {
646 return std::get<1>(v);
647 }
648
649 [[nodiscard]] constexpr T const &b() const noexcept requires(N >= 3)
650 {
651 return std::get<2>(v);
652 }
653
654 [[nodiscard]] constexpr T const &a() const noexcept requires(N >= 4)
655 {
656 return std::get<3>(v);
657 }
658
659 [[nodiscard]] constexpr T &r() noexcept requires(N >= 1)
660 {
661 return std::get<0>(v);
662 }
663
664 [[nodiscard]] constexpr T &g() noexcept requires(N >= 2)
665 {
666 return std::get<1>(v);
667 }
668
669 [[nodiscard]] constexpr T &b() noexcept requires(N >= 3)
670 {
671 return std::get<2>(v);
672 }
673
674 [[nodiscard]] constexpr T &a() noexcept requires(N >= 4)
675 {
676 return std::get<3>(v);
677 }
678
679 [[nodiscard]] constexpr T const &width() const noexcept requires(N >= 1)
680 {
681 return std::get<0>(v);
682 }
683
684 [[nodiscard]] constexpr T const &height() const noexcept requires(N >= 2)
685 {
686 return std::get<1>(v);
687 }
688
689 [[nodiscard]] constexpr T const &depth() const noexcept requires(N >= 3)
690 {
691 return std::get<2>(v);
692 }
693
694 [[nodiscard]] constexpr T &width() noexcept requires(N >= 1)
695 {
696 return std::get<0>(v);
697 }
698
699 [[nodiscard]] constexpr T &height() noexcept requires(N >= 2)
700 {
701 return std::get<1>(v);
702 }
703
704 [[nodiscard]] constexpr T &depth() noexcept requires(N >= 3)
705 {
706 return std::get<2>(v);
707 }
708
709 constexpr numeric_array &operator<<=(unsigned int rhs) noexcept
710 {
711 return *this = *this << rhs;
712 }
713
714 constexpr numeric_array &operator>>=(unsigned int rhs) noexcept
715 {
716 return *this = *this >> rhs;
717 }
718
719 constexpr numeric_array &operator|=(numeric_array const &rhs) noexcept
720 {
721 return *this = *this | rhs;
722 }
723
724 constexpr numeric_array &operator|=(T const &rhs) noexcept
725 {
726 return *this = *this | rhs;
727 }
728
729 constexpr numeric_array &operator&=(numeric_array const &rhs) noexcept
730 {
731 return *this = *this & rhs;
732 }
733
734 constexpr numeric_array &operator&=(T const &rhs) noexcept
735 {
736 return *this = *this & rhs;
737 }
738
739 constexpr numeric_array &operator^=(numeric_array const &rhs) noexcept
740 {
741 return *this = *this ^ rhs;
742 }
743
744 constexpr numeric_array &operator^=(T const &rhs) noexcept
745 {
746 return *this = *this ^ rhs;
747 }
748
749 constexpr numeric_array &operator+=(numeric_array const &rhs) noexcept
750 {
751 return *this = *this + rhs;
752 }
753
754 constexpr numeric_array &operator+=(T const &rhs) noexcept
755 {
756 return *this = *this + rhs;
757 }
758
759 constexpr numeric_array &operator-=(numeric_array const &rhs) noexcept
760 {
761 return *this = *this - rhs;
762 }
763
764 constexpr numeric_array &operator-=(T const &rhs) noexcept
765 {
766 return *this = *this - rhs;
767 }
768
769 constexpr numeric_array &operator*=(numeric_array const &rhs) noexcept
770 {
771 return *this = *this * rhs;
772 }
773
774 constexpr numeric_array &operator*=(T const &rhs) noexcept
775 {
776 return *this = *this * rhs;
777 }
778
779 constexpr numeric_array &operator/=(numeric_array const &rhs) noexcept
780 {
781 return *this = *this / rhs;
782 }
783
784 constexpr numeric_array &operator/=(T const &rhs) noexcept
785 {
786 return *this = *this / rhs;
787 }
788
789 constexpr numeric_array &operator%=(numeric_array const &rhs) noexcept
790 {
791 return *this = *this % rhs;
792 }
793
794 constexpr numeric_array &operator%=(T const &rhs) noexcept
795 {
796 return *this = *this % rhs;
797 }
798
799 constexpr static ssize_t get_zero = -1;
800 constexpr static ssize_t get_one = -2;
801
806 template<size_t I>
807 [[nodiscard]] friend constexpr T &get(numeric_array &rhs) noexcept
808 {
809 static_assert(I < N, "Index out of bounds");
810 return std::get<I>(rhs.v);
811 }
812
818 template<ssize_t I>
819 [[nodiscard]] friend constexpr T get(numeric_array &&rhs) noexcept
820 {
821 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
822 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N), "Index out of bounds");
823 if constexpr (I == get_zero) {
824 return T{0};
825 } else if constexpr (I == get_one) {
826 return T{1};
827 } else {
828 return std::get<I>(rhs.v);
829 }
830 }
831
837 template<ssize_t I>
838 [[nodiscard]] friend constexpr T get(numeric_array const &rhs) noexcept
839 {
840 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
841 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N), "Index out of bounds");
842 if constexpr (I == get_zero) {
843 return T{0};
844 } else if constexpr (I == get_one) {
845 return T{1};
846 } else {
847 return std::get<I>(rhs.v);
848 }
849 }
850
855 template<size_t Mask = ~size_t{0}>
856 [[nodiscard]] friend constexpr numeric_array zero(numeric_array rhs) noexcept
857 {
858 if (!std::is_constant_evaluated()) {
859 if constexpr (is_f32x4 && x86_64_v2) {
860 return numeric_array{f32x4_x64v2_zero<Mask & 0xf>(rhs.v)};
861 }
862 }
863
864 auto r = numeric_array{};
865 for (size_t i = 0; i != N; ++i) {
866 if (static_cast<bool>((Mask >> i) & 1)) {
867 r.v[i] = T{0};
868 } else {
869 r.v[i] = rhs.v[i];
870 }
871 }
872 return r;
873 }
874
879 template<size_t Mask = ~size_t{0}>
880 [[nodiscard]] friend constexpr numeric_array neg(numeric_array rhs) noexcept
881 {
882 if (!std::is_constant_evaluated()) {
883 if constexpr (is_f32x4 && x86_64_v2) {
884 return numeric_array{f32x4_x64v2_neg<Mask & 0xf>(rhs.v)};
885 }
886 }
887
888 auto r = numeric_array{};
889 for (size_t i = 0; i != N; ++i) {
890 if (static_cast<bool>((Mask >> i) & 1)) {
891 r.v[i] = -rhs.v[i];
892 } else {
893 r.v[i] = rhs.v[i];
894 }
895 }
896 return r;
897 }
898
899 [[nodiscard]] friend constexpr numeric_array operator-(numeric_array const &rhs) noexcept
900 {
901 auto r = numeric_array{};
902 for (size_t i = 0; i != N; ++i) {
903 // -rhs.v[i] will cause a memory load with msvc.
904 r.v[i] = T{} - rhs.v[i];
905 }
906 return r;
907 }
908
909 [[nodiscard]] friend constexpr numeric_array abs(numeric_array const &rhs) noexcept
910 {
911 auto neg_rhs = -rhs;
912
913 auto r = numeric_array{};
914 for (size_t i = 0; i != N; ++i) {
915 r.v[i] = rhs.v[i] < T{} ? neg_rhs.v[i] : rhs.v[i];
916 }
917 return r;
918 }
919
920 [[nodiscard]] friend constexpr numeric_array rcp(numeric_array const &rhs) noexcept
921 {
922 if (!std::is_constant_evaluated()) {
923 if constexpr (is_f32x4 and x86_64_v2) {
924 return numeric_array{_mm_rcp_ps(rhs.reg())};
925 }
926 }
927
928 auto r = numeric_array{};
929 for (size_t i = 0; i != N; ++i) {
930 r[i] = 1.0f / rhs.v[i];
931 }
932 return r;
933 }
934
935 [[nodiscard]] friend constexpr numeric_array sqrt(numeric_array const &rhs) noexcept
936 {
937 if (!std::is_constant_evaluated()) {
938 if constexpr (is_f32x4 and x86_64_v2) {
939 return numeric_array{_mm_sqrt_ps(rhs.reg())};
940 }
941 }
942
943 auto r = numeric_array{};
944 for (size_t i = 0; i != N; ++i) {
945 r[i] = std::sqrt(rhs.v[i]);
946 }
947 return r;
948 }
949
950 [[nodiscard]] friend constexpr numeric_array rcp_sqrt(numeric_array const &rhs) noexcept
951 {
952 if (!std::is_constant_evaluated()) {
953 if constexpr (is_f32x4 and x86_64_v2) {
954 return numeric_array{_mm_rcp_sqrt_ps(rhs.reg())};
955 }
956 }
957
958 auto r = numeric_array{};
959 for (size_t i = 0; i != N; ++i) {
960 r[i] = 1.0f / std::sqrt(rhs.v[i]);
961 }
962 return r;
963 }
964
965 [[nodiscard]] friend constexpr numeric_array floor(numeric_array const &rhs) noexcept
966 {
967 if (!std::is_constant_evaluated()) {
968 if constexpr (is_f32x4 and x86_64_v2) {
969 return numeric_array{_mm_floor_ps(rhs.reg())};
970 }
971 }
972
973 auto r = numeric_array{};
974 for (size_t i = 0; i != N; ++i) {
975 r[i] = std::floor(rhs.v[i]);
976 }
977 return r;
978 }
979
980 [[nodiscard]] friend constexpr numeric_array ceil(numeric_array const &rhs) noexcept
981 {
982 if (!std::is_constant_evaluated()) {
983 if constexpr (is_f32x4 and x86_64_v2) {
984 return numeric_array{_mm_ceil_ps(rhs.reg())};
985 }
986 }
987
988 auto r = numeric_array{};
989 for (size_t i = 0; i != N; ++i) {
990 r[i] = std::ceil(rhs.v[i]);
991 }
992 return r;
993 }
994
995 [[nodiscard]] friend constexpr numeric_array round(numeric_array const &rhs) noexcept
996 {
997 if (!std::is_constant_evaluated()) {
998 if constexpr (is_f32x4 and x86_64_v2) {
999 return numeric_array{_mm_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1000 }
1001 }
1002
1003 auto r = numeric_array{};
1004 for (size_t i = 0; i != N; ++i) {
1005 r[i] = std::round(rhs.v[i]);
1006 }
1007 return r;
1008 }
1009
1017 template<size_t Mask>
1018 [[nodiscard]] friend constexpr T dot(numeric_array const &lhs, numeric_array const &rhs) noexcept
1019 {
1020 if (!std::is_constant_evaluated()) {
1021 if constexpr (is_f32x4 and x86_64_v2) {
1022 return f32x4_x64v2_dot<Mask>(lhs.v, rhs.v);
1023 }
1024 }
1025
1026 auto r = T{};
1027 for (size_t i = 0; i != N; ++i) {
1028 if (static_cast<bool>(Mask & (1_uz << i))) {
1029 r += lhs.v[i] * rhs.v[i];
1030 }
1031 }
1032 return r;
1033 }
1034
1042 template<size_t Mask>
1043 [[nodiscard]] friend constexpr T hypot(numeric_array const &rhs) noexcept
1044 {
1045 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1046 return f32x4_x64v2_hypot<Mask>(rhs.v);
1047 }
1048 return std::sqrt(dot<Mask>(rhs, rhs));
1049 }
1050
1058 template<size_t Mask>
1059 [[nodiscard]] friend constexpr T squared_hypot(numeric_array const &rhs) noexcept
1060 {
1061 return dot<Mask>(rhs, rhs);
1062 }
1063
1070 template<size_t Mask>
1071 [[nodiscard]] friend constexpr T rcp_hypot(numeric_array const &rhs) noexcept
1072 {
1073 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1074 return f32x4_x64v2_rcp_hypot<Mask>(rhs.v);
1075 }
1076
1077 return 1.0f / hypot<Mask>(rhs);
1078 }
1079
1088 template<size_t Mask>
1089 [[nodiscard]] friend constexpr numeric_array normalize(numeric_array const &rhs) noexcept
1090 {
1091 tt_axiom(rhs.is_vector());
1092
1093 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1094 return numeric_array{f32x4_x64v2_normalize<Mask>(rhs.v)};
1095 }
1096
1097 ttlet rcp_hypot_ = rcp_hypot<Mask>(rhs);
1098
1099 auto r = numeric_array{};
1100 for (size_t i = 0; i != N; ++i) {
1101 if (static_cast<bool>(Mask & (1_uz << i))) {
1102 r.v[i] = rhs.v[i] * rcp_hypot_;
1103 }
1104 }
1105 return r;
1106 }
1107
1108 [[nodiscard]] friend constexpr unsigned int eq(numeric_array const &lhs, numeric_array const &rhs) noexcept
1109 requires(N <= sizeof(unsigned int) * CHAR_BIT)
1110 {
1111 if (!std::is_constant_evaluated()) {
1112 if constexpr (is_f32x4 and x86_64_v2) {
1113 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpeq_ps(lhs.reg(), rhs.reg())));
1114 }
1115 }
1116
1117 unsigned int r = 0;
1118 for (size_t i = 0; i != N; ++i) {
1119 r |= static_cast<unsigned int>(lhs.v[i] == rhs.v[i]) << i;
1120 }
1121 return r;
1122 }
1123
1124 [[nodiscard]] friend constexpr unsigned int ne(numeric_array const &lhs, numeric_array const &rhs) noexcept
1125 requires(N <= sizeof(unsigned int) * CHAR_BIT)
1126 {
1127 if (!std::is_constant_evaluated()) {
1128 if constexpr (is_f32x4 and x86_64_v2) {
1129 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpne_ps(lhs.reg(), rhs.reg())));
1130 }
1131 }
1132 unsigned int r = 0;
1133 for (size_t i = 0; i != N; ++i) {
1134 r |= static_cast<unsigned int>(lhs.v[i] != rhs.v[i]) << i;
1135 }
1136 return r;
1137 }
1138
1139 [[nodiscard]] friend constexpr unsigned int lt(numeric_array const &lhs, numeric_array const &rhs) noexcept
1140 requires(N <= sizeof(unsigned int) * CHAR_BIT)
1141 {
1142 if (!std::is_constant_evaluated()) {
1143 if constexpr (is_f32x4 and x86_64_v2) {
1144 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmplt_ps(lhs.reg(), rhs.reg())));
1145 }
1146 }
1147 unsigned int r = 0;
1148 for (size_t i = 0; i != N; ++i) {
1149 r |= static_cast<unsigned int>(lhs.v[i] < rhs.v[i]) << i;
1150 }
1151 return r;
1152 }
1153
1154 [[nodiscard]] friend constexpr unsigned int gt(numeric_array const &lhs, numeric_array const &rhs) noexcept
1155 requires(N <= sizeof(unsigned int) * CHAR_BIT)
1156 {
1157 if (!std::is_constant_evaluated()) {
1158 if constexpr (is_f32x4 and x86_64_v2) {
1159 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpgt_ps(lhs.reg(), rhs.reg())));
1160 }
1161 }
1162 unsigned int r = 0;
1163 for (size_t i = 0; i != N; ++i) {
1164 r |= static_cast<unsigned int>(lhs.v[i] > rhs.v[i]) << i;
1165 }
1166 return r;
1167 }
1168
1169 [[nodiscard]] friend constexpr unsigned int le(numeric_array const &lhs, numeric_array const &rhs) noexcept
1170 requires(N <= sizeof(unsigned int) * CHAR_BIT)
1171 {
1172 if (!std::is_constant_evaluated()) {
1173 if constexpr (is_f32x4 and x86_64_v2) {
1174 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmple_ps(lhs.reg(), rhs.reg())));
1175 }
1176 }
1177 unsigned int r = 0;
1178 for (size_t i = 0; i != N; ++i) {
1179 r |= static_cast<unsigned int>(lhs.v[i] <= rhs.v[i]) << i;
1180 }
1181 return r;
1182 }
1183
1184 [[nodiscard]] friend constexpr unsigned int ge(numeric_array const &lhs, numeric_array const &rhs) noexcept
1185 requires(N <= sizeof(unsigned int) * CHAR_BIT)
1186 {
1187 if (!std::is_constant_evaluated()) {
1188 if constexpr (is_f32x4 and x86_64_v2) {
1189 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpge_ps(lhs.reg(), rhs.reg())));
1190 }
1191 }
1192 unsigned int r = 0;
1193 for (size_t i = 0; i != N; ++i) {
1194 r |= static_cast<unsigned int>(lhs.v[i] >= rhs.v[i]) << i;
1195 }
1196 return r;
1197 }
1198
1199 [[nodiscard]] friend constexpr numeric_array gt_mask(numeric_array const &lhs, numeric_array const &rhs) noexcept
1200 {
1201 if (not std::is_constant_evaluated()) {
1202 if constexpr (is_f32x4 and x86_64_v2) {
1203 return numeric_array{_mm_cmpgt_ps(lhs.reg(), rhs.reg())};
1204 } else if constexpr (is_i64x4 and x86_64_v2) {
1205 return numeric_array{_mm_cmpgt_epi64(lhs.reg(), rhs.reg())};
1206 } else if constexpr (is_i32x4 and x86_64_v2) {
1207 return numeric_array{_mm_cmpgt_epi32(lhs.reg(), rhs.reg())};
1208 } else if constexpr (is_i16x4 and x86_64_v2) {
1209 return numeric_array{_mm_cmpgt_epi16(lhs.reg(), rhs.reg())};
1210 }
1211 }
1212
1213 auto r = numeric_array{};
1214 for (size_t i = 0; i != N; ++i) {
1215 if constexpr (sizeof(value_type) == 4) {
1216 r[i] = std::bit_cast<value_type>((static_cast<int32_t>(lhs.v[i] > rhs.v[i]) << 31) >> 31);
1217 } else {
1218 tt_static_not_implemented();
1219 }
1220 }
1221 return r;
1222 }
1223
1224 [[nodiscard]] friend constexpr numeric_array ge_mask(numeric_array const &lhs, numeric_array const &rhs) noexcept
1225 {
1226 if (not std::is_constant_evaluated()) {
1227 if constexpr (is_f32x4 and x86_64_v2) {
1228 return numeric_array{_mm_cmpge_ps(lhs.reg(), rhs.reg())};
1229 }
1230 }
1231
1232 auto r = numeric_array{};
1233 for (size_t i = 0; i != N; ++i) {
1234 if constexpr (sizeof(value_type) == 4) {
1235 r[i] = std::bit_cast<value_type>((static_cast<int32_t>(lhs.v[i] >= rhs.v[i]) << 31) >> 31);
1236 } else {
1237 tt_static_not_implemented();
1238 }
1239 }
1240 return r;
1241 }
1242
1243 [[nodiscard]] friend constexpr bool operator==(numeric_array const &lhs, numeric_array const &rhs) noexcept
1244 {
1245 if (!std::is_constant_evaluated()) {
1246 if constexpr (is_f32x4 && x86_64_v2) {
1247 // MSVC cannot vectorize comparison.
1248 return f32x4_x64v2_eq(lhs.v, rhs.v);
1249 }
1250 }
1251
1252 auto r = true;
1253 for (size_t i = 0; i != N; ++i) {
1254 r &= (lhs.v[i] == rhs.v[i]);
1255 }
1256 return r;
1257 }
1258
1259 [[nodiscard]] friend constexpr bool operator!=(numeric_array const &lhs, numeric_array const &rhs) noexcept
1260 {
1261 return !(lhs == rhs);
1262 }
1263
1264 [[nodiscard]] friend constexpr numeric_array operator<<(numeric_array const &lhs, unsigned int rhs) noexcept
1265 {
1266 if (not std::is_constant_evaluated()) {
1267 if constexpr (x86_64_v2 and is_i64x2) {
1268 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1269 } else if constexpr (x86_64_v2 and is_i32x4) {
1270 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1271 } else if constexpr (x86_64_v2 and is_i16x8) {
1272 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1273 } else if constexpr (x86_64_v2 and is_u64x2) {
1274 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1275 } else if constexpr (x86_64_v2 and is_u32x4) {
1276 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1277 } else if constexpr (x86_64_v2 and is_u16x8) {
1278 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1279 }
1280 }
1281
1282 auto r = numeric_array{};
1283 for (size_t i = 0; i != N; ++i) {
1284 r.v[i] = lhs.v[i] << rhs;
1285 }
1286 return r;
1287 }
1288
1289 [[nodiscard]] friend constexpr numeric_array operator>>(numeric_array const &lhs, unsigned int rhs) noexcept
1290 {
1291 if (not std::is_constant_evaluated()) {
1292 if constexpr (x86_64_v2 and is_i32x4) {
1293 return numeric_array{_mm_srai_epi32(lhs.reg(), rhs)};
1294 } else if constexpr (x86_64_v2 and is_i16x8) {
1295 return numeric_array{_mm_srai_epi16(lhs.reg(), rhs)};
1296 } else if constexpr (x86_64_v2 and is_u64x2) {
1297 return numeric_array{_mm_srli_epi64(lhs.reg(), rhs)};
1298 } else if constexpr (x86_64_v2 and is_u32x4) {
1299 return numeric_array{_mm_srli_epi32(lhs.reg(), rhs)};
1300 } else if constexpr (x86_64_v2 and is_u16x8) {
1301 return numeric_array{_mm_srli_epi16(lhs.reg(), rhs)};
1302 }
1303 }
1304
1305 auto r = numeric_array{};
1306 for (size_t i = 0; i != N; ++i) {
1307 r.v[i] = lhs.v[i] >> rhs;
1308 }
1309 return r;
1310 }
1311
1312 [[nodiscard]] friend constexpr numeric_array operator|(numeric_array const &lhs, numeric_array const &rhs) noexcept
1313 {
1314 if (!std::is_constant_evaluated()) {
1315 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1316 return numeric_array{_mm_or_si128(lhs.reg(), rhs.reg())};
1317 }
1318 }
1319 auto r = numeric_array{};
1320 for (size_t i = 0; i != N; ++i) {
1321 r.v[i] = lhs.v[i] | rhs.v[i];
1322 }
1323 return r;
1324 }
1325
1326 [[nodiscard]] friend constexpr numeric_array operator|(numeric_array const &lhs, T const &rhs) noexcept
1327 {
1328 return lhs | broadcast(rhs);
1329 }
1330
1331 [[nodiscard]] friend constexpr numeric_array operator|(T const &lhs, numeric_array const &rhs) noexcept
1332 {
1333 return broadcast(lhs) | rhs;
1334 }
1335
1336 [[nodiscard]] friend constexpr numeric_array operator&(numeric_array const &lhs, numeric_array const &rhs) noexcept
1337 {
1338 if (!std::is_constant_evaluated()) {
1339 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1340 return numeric_array{_mm_and_si128(lhs.reg(), rhs.reg())};
1341 }
1342 }
1343 auto r = numeric_array{};
1344 for (size_t i = 0; i != N; ++i) {
1345 r.v[i] = lhs.v[i] & rhs.v[i];
1346 }
1347 return r;
1348 }
1349
1350 [[nodiscard]] friend constexpr numeric_array operator&(numeric_array const &lhs, T const &rhs) noexcept
1351 {
1352 return lhs & broadcast(rhs);
1353 }
1354
1355 [[nodiscard]] friend constexpr numeric_array operator&(T const &lhs, numeric_array const &rhs) noexcept
1356 {
1357 return broadcast(lhs) & rhs;
1358 }
1359
1360 [[nodiscard]] friend constexpr numeric_array operator^(numeric_array const &lhs, numeric_array const &rhs) noexcept
1361 {
1362 if (!std::is_constant_evaluated()) {
1363 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1364 return numeric_array{_mm_xor_si128(lhs.reg(), rhs.reg())};
1365 }
1366 }
1367 auto r = numeric_array{};
1368 for (size_t i = 0; i != N; ++i) {
1369 r.v[i] = lhs.v[i] ^ rhs.v[i];
1370 }
1371 return r;
1372 }
1373
1374 [[nodiscard]] friend constexpr numeric_array operator^(numeric_array const &lhs, T const &rhs) noexcept
1375 {
1376 return lhs ^ broadcast(rhs);
1377 }
1378
1379 [[nodiscard]] friend constexpr numeric_array operator^(T const &lhs, numeric_array const &rhs) noexcept
1380 {
1381 return broadcast(lhs) ^ rhs;
1382 }
1383
1384 [[nodiscard]] friend constexpr numeric_array operator+(numeric_array const &lhs, numeric_array const &rhs) noexcept
1385 {
1386 if (!std::is_constant_evaluated()) {
1387 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1388 return numeric_array{_mm256_add_ps(lhs.reg(), rhs.reg())};
1389 }
1390 }
1391
1392 auto r = numeric_array{};
1393 for (size_t i = 0; i != N; ++i) {
1394 r.v[i] = lhs.v[i] + rhs.v[i];
1395 }
1396 return r;
1397 }
1398
1399 [[nodiscard]] friend constexpr numeric_array operator+(numeric_array const &lhs, T const &rhs) noexcept
1400 {
1401 return lhs + broadcast(rhs);
1402 }
1403
1404 [[nodiscard]] friend constexpr numeric_array operator+(T const &lhs, numeric_array const &rhs) noexcept
1405 {
1406 return broadcast(lhs) + rhs;
1407 }
1408
1409 [[nodiscard]] friend constexpr numeric_array hadd(numeric_array const &lhs, numeric_array const &rhs) noexcept
1410 {
1411 if (!std::is_constant_evaluated()) {
1412 if constexpr (is_f64x2 and x86_64_v2) {
1413 return numeric_array{_mm_hadd_pd(lhs.reg(), rhs.reg())};
1414 } else if constexpr (is_f32x4 and x86_64_v2) {
1415 return numeric_array{_mm_hadd_ps(lhs.reg(), rhs.reg())};
1416 } else if constexpr (is_i32x4 and x86_64_v2) {
1417 return numeric_array{_mm_hadd_epi32(lhs.reg(), rhs.reg())};
1418 } else if constexpr (is_i16x8 and x86_64_v2) {
1419 return numeric_array{_mm_hadd_epi16(lhs.reg(), rhs.reg())};
1420 } else if constexpr (is_i8x16 and x86_64_v2) {
1421 return numeric_array{_mm_hadd_epi8(lhs.reg(), rhs.reg())};
1422 }
1423 }
1424
1425 tt_axiom(N % 2 == 0);
1426
1427 auto r = numeric_array{};
1428
1429 size_t src_i = 0;
1430 size_t dst_i = 0;
1431 while (src_i != N) {
1432 auto tmp = lhs[src_i++];
1433 tmp += lhs[src_i++];
1434 r.v[dst_i++] = tmp;
1435 }
1436
1437 src_i = 0;
1438 while (src_i != N) {
1439 auto tmp = rhs[src_i++];
1440 tmp += rhs[src_i++];
1441 r.v[dst_i++] = tmp;
1442 }
1443 return r;
1444 }
1445
1446 [[nodiscard]] friend constexpr numeric_array hsub(numeric_array const &lhs, numeric_array const &rhs) noexcept
1447 {
1448 if (!std::is_constant_evaluated()) {
1449 if constexpr (is_f64x2 and x86_64_v2) {
1450 return numeric_array{_mm_hsub_pd(lhs.reg(), rhs.reg())};
1451 } else if constexpr (is_f32x4 and x86_64_v2) {
1452 return numeric_array{_mm_hsub_ps(lhs.reg(), rhs.reg())};
1453 } else if constexpr (is_i32x4 and x86_64_v2) {
1454 return numeric_array{_mm_hsub_epi32(lhs.reg(), rhs.reg())};
1455 } else if constexpr (is_i16x8 and x86_64_v2) {
1456 return numeric_array{_mm_hsub_epi16(lhs.reg(), rhs.reg())};
1457 } else if constexpr (is_i8x16 and x86_64_v2) {
1458 return numeric_array{_mm_hsub_epi8(lhs.reg(), rhs.reg())};
1459 }
1460 }
1461
1462 tt_axiom(N % 2 == 0);
1463
1464 auto r = numeric_array{};
1465
1466 size_t src_i = 0;
1467 size_t dst_i = 0;
1468 while (src_i != N) {
1469 auto tmp = lhs[src_i++];
1470 tmp -= lhs[src_i++];
1471 r.v[dst_i++] = tmp;
1472 }
1473
1474 src_i = 0;
1475 while (src_i != N) {
1476 auto tmp = rhs[src_i++];
1477 tmp -= rhs[src_i++];
1478 r.v[dst_i++] = tmp;
1479 }
1480 return r;
1481 }
1482
1483 [[nodiscard]] friend constexpr numeric_array operator-(numeric_array const &lhs, numeric_array const &rhs) noexcept
1484 {
1485 if (!std::is_constant_evaluated()) {
1486 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1487 return numeric_array{_mm256_sub_ps(lhs.reg(), rhs.reg())};
1488 }
1489 }
1490
1491 auto r = numeric_array{};
1492 for (size_t i = 0; i != N; ++i) {
1493 r.v[i] = lhs.v[i] - rhs.v[i];
1494 }
1495 return r;
1496 }
1497
1498 [[nodiscard]] friend constexpr numeric_array operator-(numeric_array const &lhs, T const &rhs) noexcept
1499 {
1500 return lhs - broadcast(rhs);
1501 }
1502
1503 [[nodiscard]] friend constexpr numeric_array operator-(T const &lhs, numeric_array const &rhs) noexcept
1504 {
1505 return broadcast(lhs) - rhs;
1506 }
1507
1512 template<size_t Mask = ~size_t{0}>
1513 [[nodiscard]] friend constexpr numeric_array addsub(numeric_array const &lhs, numeric_array const &rhs) noexcept
1514 {
1515 if (!std::is_constant_evaluated()) {
1516 if constexpr (is_f32x4 && x86_64_v2) {
1517 return numeric_array{f32x4_x64v2_addsub<Mask & 0xf>(lhs.v, rhs.v)};
1518 }
1519 }
1520
1521 auto r = numeric_array{};
1522 for (size_t i = 0; i != N; ++i) {
1523 if (static_cast<bool>((Mask >> i) & 1)) {
1524 r.v[i] = lhs.v[i] + rhs.v[i];
1525 } else {
1526 r.v[i] = lhs.v[i] - rhs.v[i];
1527 }
1528 }
1529 return r;
1530 }
1531
1532 [[nodiscard]] friend constexpr numeric_array operator*(numeric_array const &lhs, numeric_array const &rhs) noexcept
1533 {
1534 if (!std::is_constant_evaluated()) {
1535 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1536 return numeric_array{_mm256_mul_ps(lhs.reg(), rhs.reg())};
1537 }
1538 }
1539
1540 auto r = numeric_array{};
1541 for (size_t i = 0; i != N; ++i) {
1542 r.v[i] = lhs.v[i] * rhs.v[i];
1543 }
1544 return r;
1545 }
1546
1547 [[nodiscard]] friend constexpr numeric_array operator*(numeric_array const &lhs, T const &rhs) noexcept
1548 {
1549 return lhs * broadcast(rhs);
1550 }
1551
1552 [[nodiscard]] friend constexpr numeric_array operator*(T const &lhs, numeric_array const &rhs) noexcept
1553 {
1554 return broadcast(lhs) * rhs;
1555 }
1556
1557 [[nodiscard]] friend constexpr numeric_array operator/(numeric_array const &lhs, numeric_array const &rhs) noexcept
1558 {
1559 if (!std::is_constant_evaluated()) {
1560 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1561 return numeric_array{_mm256_div_ps(lhs.reg(), rhs.reg())};
1562 }
1563 }
1564
1565 auto r = numeric_array{};
1566 for (size_t i = 0; i != N; ++i) {
1567 r.v[i] = lhs.v[i] / rhs.v[i];
1568 }
1569 return r;
1570 }
1571
1572 [[nodiscard]] friend constexpr numeric_array operator/(numeric_array const &lhs, T const &rhs) noexcept
1573 {
1574 return lhs / broadcast(rhs);
1575 }
1576
1577 [[nodiscard]] friend constexpr numeric_array operator/(T const &lhs, numeric_array const &rhs) noexcept
1578 {
1579 return broadcast(lhs) / rhs;
1580 }
1581
1582 [[nodiscard]] friend constexpr numeric_array operator%(numeric_array const &lhs, numeric_array const &rhs) noexcept
1583 {
1584 auto r = numeric_array{};
1585 for (size_t i = 0; i != N; ++i) {
1586 r.v[i] = lhs.v[i] % rhs.v[i];
1587 }
1588 return r;
1589 }
1590
1591 [[nodiscard]] friend constexpr numeric_array operator%(numeric_array const &lhs, T const &rhs) noexcept
1592 {
1593 return lhs % broadcast(rhs);
1594 }
1595
1596 [[nodiscard]] friend constexpr numeric_array operator%(T const &lhs, numeric_array const &rhs) noexcept
1597 {
1598 return broadcast(lhs) % rhs;
1599 }
1600
1601 [[nodiscard]] friend constexpr numeric_array min(numeric_array const &lhs, numeric_array const &rhs) noexcept
1602 {
1603 auto r = numeric_array{};
1604 for (size_t i = 0; i != N; ++i) {
1605 // std::min() causes vectorization failure with msvc
1606 r.v[i] = lhs.v[i] < rhs.v[i] ? lhs.v[i] : rhs.v[i];
1607 }
1608 return r;
1609 }
1610
1611 [[nodiscard]] friend constexpr numeric_array max(numeric_array const &lhs, numeric_array const &rhs) noexcept
1612 {
1613 auto r = numeric_array{};
1614 for (size_t i = 0; i != N; ++i) {
1615 // std::max() causes vectorization failure with msvc
1616 r.v[i] = lhs.v[i] > rhs.v[i] ? lhs.v[i] : rhs.v[i];
1617 }
1618 return r;
1619 }
1620
1621 [[nodiscard]] friend constexpr numeric_array
1622 clamp(numeric_array const &lhs, numeric_array const &low, numeric_array const &high) noexcept
1623 {
1624 auto r = numeric_array{};
1625 for (size_t i = 0; i != N; ++i) {
1626 // std::clamp() causes vectorization failure with msvc
1627 r.v[i] = lhs.v[i] < low.v[i] ? low.v[i] : lhs.v[i] > high.v[i] ? high.v[i] : lhs.v[i];
1628 }
1629 return r;
1630 }
1631
1634 [[nodiscard]] friend constexpr numeric_array cross_2D(numeric_array const &rhs) noexcept requires(N >= 2)
1635 {
1636 tt_axiom(rhs.z() == 0.0f && rhs.is_vector());
1637 return numeric_array{-rhs.y(), rhs.x()};
1638 }
1639
1642 [[nodiscard]] friend constexpr numeric_array normal_2D(numeric_array const &rhs) noexcept requires(N >= 2)
1643 {
1644 return normalize<0b0011>(cross_2D(rhs));
1645 }
1646
1649 [[nodiscard]] friend constexpr float cross_2D(numeric_array const &lhs, numeric_array const &rhs) noexcept requires(N >= 2)
1650 {
1651 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1652 return f32x4_x64v2_viktor_cross(lhs.v, rhs.v);
1653
1654 } else {
1655 return lhs.x() * rhs.y() - lhs.y() * rhs.x();
1656 }
1657 }
1658
1659 // x=a.y*b.z - a.z*b.y
1660 // y=a.z*b.x - a.x*b.z
1661 // z=a.x*b.y - a.y*b.x
1662 // w=a.w*b.w - a.w*b.w
1663 [[nodiscard]] constexpr friend numeric_array cross_3D(numeric_array const &lhs, numeric_array const &rhs) noexcept
1664 {
1665 if (!std::is_constant_evaluated()) {
1666 if constexpr (is_f32x4 && x86_64_v2) {
1667 return numeric_array{f32x4_x64v2_cross(lhs.v, rhs.v)};
1668 }
1669 }
1670
1671 return numeric_array{
1672 lhs.y() * rhs.z() - lhs.z() * rhs.y(),
1673 lhs.z() * rhs.x() - lhs.x() * rhs.z(),
1674 lhs.x() * rhs.y() - lhs.y() * rhs.x(),
1675 0.0f};
1676 }
1677
1678 // w + x*i + y*j + z*k
1679 //
1680 // (w1*x2 + x1*w2 + y1*z2 - z1*y2)i
1681 // + (w1*y2 - x1*z2 + y1*w2 + z1*x2)j
1682 // + (w1*z2 + x1*y2 - y1*x2 + z1*w2)k
1683 // + (w1*w2 - x1*x2 - y1*y2 - z1*z2)
1684 template<int D>
1685 requires(D == 4) [[nodiscard]] friend numeric_array
1686 hamilton_cross(numeric_array const &lhs, numeric_array const &rhs) noexcept
1687 {
1688 ttlet col0 = lhs.wwww() * rhs;
1689 ttlet col1 = lhs.xxxx() * rhs.wzyx();
1690 ttlet col2 = lhs.yyyy() * rhs.zwxy();
1691 ttlet col3 = lhs.zzzz() * rhs.yxwz();
1692
1693 ttlet col01 = addsub(col0, col1);
1694 ttlet col012 = addsub(col01.xzyw(), col2.xzyw()).xzyw();
1695
1696 return numeric_array{
1697
1698 };
1699 }
1700
1704 [[nodiscard]] friend constexpr numeric_array shift_left(numeric_array const &lhs, unsigned int rhs) noexcept
1705 {
1706 numeric_array r;
1707 for (ssize_t i = 0; i != N; ++i) {
1708 if ((i - rhs) >= 0) {
1709 r[i] = lhs[i - rhs];
1710 } else {
1711 r[i] = T{};
1712 }
1713 }
1714 return r;
1715 }
1716
1720 [[nodiscard]] friend constexpr numeric_array shift_right(numeric_array const &lhs, unsigned int rhs) noexcept
1721 {
1722 numeric_array r;
1723 for (ssize_t i = 0; i != N; ++i) {
1724 if ((i + rhs) < N) {
1725 r[i] = lhs[i + rhs];
1726 } else {
1727 r[i] = T{};
1728 }
1729 }
1730 return r;
1731 }
1732
1733 [[nodiscard]] friend constexpr numeric_array
1734 blend(numeric_array const &a, numeric_array const &b, numeric_array const &mask) requires(is_i8x16)
1735 {
1736 if (!std::is_constant_evaluated()) {
1737 if constexpr (x86_64_v2) {
1738 return numeric_array{_mm_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1739 }
1740 }
1741
1742 auto r = numeric_array{};
1743
1744 for (size_t i = 0; i != N; ++i) {
1745 r[i] = mask[i] >= 0 ? a[i] : b[i];
1746 }
1747
1748 return r;
1749 }
1750
1751 [[nodiscard]] static constexpr numeric_array byte_srl_shuffle_indices(unsigned int rhs) requires(is_i8x16)
1752 {
1753 static_assert(std::endian::native == std::endian::little);
1754
1755 auto r = numeric_array{};
1756 for (auto i = 0; i != 16; ++i) {
1757 if ((i + rhs) < 16) {
1758 r[i] = narrow_cast<int8_t>(i + rhs);
1759 } else {
1760 // Indices set to -1 result in a zero after a byte shuffle.
1761 r[i] = -1;
1762 }
1763 }
1764 return r;
1765 }
1766
1767 [[nodiscard]] static constexpr numeric_array byte_sll_shuffle_indices(unsigned int rhs) requires(is_i8x16)
1768 {
1769 static_assert(std::endian::native == std::endian::little);
1770
1771 auto r = numeric_array{};
1772 for (auto i = 0; i != 16; ++i) {
1773 if ((i - rhs) >= 0) {
1774 r[i] = narrow_cast<int8_t>(i - rhs);
1775 } else {
1776 // Indices set to -1 result in a zero after a byte shuffle.
1777 r[i] = -1;
1778 }
1779 }
1780 return r;
1781 }
1782
1785 [[nodiscard]] friend constexpr numeric_array shuffle(numeric_array const &lhs, numeric_array const &rhs) requires(is_i8x16)
1786 {
1787 if (!std::is_constant_evaluated()) {
1788 if constexpr (x86_64_v2) {
1789 return numeric_array{_mm_shuffle_epi8(lhs.reg(), rhs.reg())};
1790 }
1791 }
1792
1793 auto r = numeric_array{};
1794
1795 for (size_t i = 0; i != N; ++i) {
1796 if (rhs[i] >= 0) {
1797 r[i] = lhs[rhs[i] & 0xf];
1798 } else {
1799 r[i] = 0;
1800 }
1801 }
1802
1803 return r;
1804 }
1805
1808 [[nodiscard]] friend constexpr numeric_array midpoint(numeric_array const &p1, numeric_array const &p2) noexcept
1809 {
1810 tt_axiom(p1.is_point());
1811 tt_axiom(p2.is_point());
1812 return (p1 + p2) * 0.5f;
1813 }
1814
1817 [[nodiscard]] friend constexpr numeric_array reflect_point(numeric_array const &p, numeric_array const anchor) noexcept
1818 {
1819 tt_axiom(p.is_point());
1820 tt_axiom(anchor.is_point());
1821 return anchor - (p - anchor);
1822 }
1823
1824 template<typename... Columns>
1825 [[nodiscard]] friend constexpr std::array<numeric_array, N> transpose(Columns const &...columns) noexcept
1826 {
1827 static_assert(sizeof...(Columns) == N, "Can only transpose square matrices");
1828
1830
1831 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1832 auto tmp = f32x4_x64v2_transpose(columns.v...);
1833 for (int i = 0; i != N; ++i) {
1834 r[i] = numeric_array{tmp[i]};
1835 }
1836
1837 } else {
1838 transpose_detail<0, Columns...>(columns..., r);
1839 }
1840
1841 return r;
1842 }
1843
1844 [[nodiscard]] constexpr friend numeric_array composit(numeric_array const &under, numeric_array const &over) noexcept
1845 requires(N == 4 && std::is_floating_point_v<T>)
1846 {
1847 if (over.is_transparent()) {
1848 return under;
1849 }
1850 if (over.is_opaque()) {
1851 return over;
1852 }
1853
1854 ttlet over_alpha = over.wwww();
1855 ttlet under_alpha = under.wwww();
1856
1857 ttlet over_color = over.xyz1();
1858 ttlet under_color = under.xyz1();
1859
1860 ttlet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
1861
1862 return output_color / output_color.www1();
1863 }
1864
1865 [[nodiscard]] friend std::string to_string(numeric_array const &rhs) noexcept
1866 {
1867 auto r = std::string{};
1868
1869 r += '(';
1870 for (size_t i = 0; i != N; ++i) {
1871 if (i != 0) {
1872 r += "; ";
1873 }
1874 r += std::format("{}", rhs[i]);
1875 }
1876 r += ')';
1877 return r;
1878 }
1879
1880 friend std::ostream &operator<<(std::ostream &lhs, numeric_array const &rhs)
1881 {
1882 return lhs << to_string(rhs);
1883 }
1884
1889 template<size_t FromElement, size_t ToElement, size_t ZeroMask = 0>
1890 [[nodiscard]] constexpr friend numeric_array insert(numeric_array const &lhs, numeric_array const &rhs)
1891 {
1892 auto r = numeric_array{};
1893
1894 if (!std::is_constant_evaluated()) {
1895 if constexpr (is_f32x4 && x86_64_v2) {
1896 return numeric_array{f32x4_x64v2_insert<FromElement, ToElement, ZeroMask>(lhs.v, rhs.v)};
1897 } else if constexpr (is_u64x2 and x86_64_v2) {
1898 return numeric_array{u64x2_x64v2_insert<FromElement, ToElement, ZeroMask>(lhs.v, rhs.v)};
1899 }
1900 }
1901
1902 for (size_t i = 0; i != N; ++i) {
1903 if ((ZeroMask >> i) & 1) {
1904 r[i] = T{};
1905 } else if (i == ToElement) {
1906 r[i] = rhs[FromElement];
1907 } else {
1908 r[i] = lhs[i];
1909 }
1910 }
1911
1912 return r;
1913 }
1914
1922 template<ssize_t... Elements>
1923 [[nodiscard]] constexpr numeric_array swizzle() const
1924 {
1925 static_assert(sizeof...(Elements) <= N);
1926
1927 if (!std::is_constant_evaluated()) {
1928 if constexpr (is_f32x4 && x86_64_v2) {
1929 return numeric_array{f32x4_x64v2_swizzle<Elements...>(v)};
1930 } else if constexpr (is_i32x4 && x86_64_v2) {
1931 return numeric_array{i32x4_x64v2_swizzle<Elements...>(v)};
1932 } else if constexpr (is_u32x4 && x86_64_v2) {
1933 return numeric_array{u32x4_x64v2_swizzle<Elements...>(v)};
1934 } else if constexpr (is_u64x2 and x86_64_v2) {
1935 return numeric_array{u64x2_x64v2_swizzle<Elements...>(v)};
1936 }
1937 }
1938
1939 auto r = numeric_array{};
1940 swizzle_detail<0, Elements...>(r);
1941 return r;
1942 }
1943
1944#define SWIZZLE(swizzle_name, D, ...) \
1945 [[nodiscard]] constexpr numeric_array swizzle_name() const noexcept requires(D == N) \
1946 { \
1947 return swizzle<__VA_ARGS__>(); \
1948 }
1949
1950#define SWIZZLE_4D_GEN1(name, ...) \
1951 SWIZZLE(name##0, 4, __VA_ARGS__, get_zero) \
1952 SWIZZLE(name##1, 4, __VA_ARGS__, get_one) \
1953 SWIZZLE(name##x, 4, __VA_ARGS__, 0) \
1954 SWIZZLE(name##y, 4, __VA_ARGS__, 1) \
1955 SWIZZLE(name##z, 4, __VA_ARGS__, 2) \
1956 SWIZZLE(name##w, 4, __VA_ARGS__, 3)
1957
1958#define SWIZZLE_4D_GEN2(name, ...) \
1959 SWIZZLE_4D_GEN1(name##0, __VA_ARGS__, get_zero) \
1960 SWIZZLE_4D_GEN1(name##1, __VA_ARGS__, get_one) \
1961 SWIZZLE_4D_GEN1(name##x, __VA_ARGS__, 0) \
1962 SWIZZLE_4D_GEN1(name##y, __VA_ARGS__, 1) \
1963 SWIZZLE_4D_GEN1(name##z, __VA_ARGS__, 2) \
1964 SWIZZLE_4D_GEN1(name##w, __VA_ARGS__, 3)
1965
1966#define SWIZZLE_4D_GEN3(name, ...) \
1967 SWIZZLE_4D_GEN2(name##0, __VA_ARGS__, get_zero) \
1968 SWIZZLE_4D_GEN2(name##1, __VA_ARGS__, get_one) \
1969 SWIZZLE_4D_GEN2(name##x, __VA_ARGS__, 0) \
1970 SWIZZLE_4D_GEN2(name##y, __VA_ARGS__, 1) \
1971 SWIZZLE_4D_GEN2(name##z, __VA_ARGS__, 2) \
1972 SWIZZLE_4D_GEN2(name##w, __VA_ARGS__, 3)
1973
1974 SWIZZLE_4D_GEN3(_0, get_zero)
1975 SWIZZLE_4D_GEN3(_1, get_one)
1976 SWIZZLE_4D_GEN3(x, 0)
1977 SWIZZLE_4D_GEN3(y, 1)
1978 SWIZZLE_4D_GEN3(z, 2)
1979 SWIZZLE_4D_GEN3(w, 3)
1980
1981#define SWIZZLE_3D_GEN1(name, ...) \
1982 SWIZZLE(name##0, 3, __VA_ARGS__, get_zero) \
1983 SWIZZLE(name##1, 3, __VA_ARGS__, get_one) \
1984 SWIZZLE(name##x, 3, __VA_ARGS__, 0) \
1985 SWIZZLE(name##y, 3, __VA_ARGS__, 1) \
1986 SWIZZLE(name##z, 3, __VA_ARGS__, 2)
1987
1988#define SWIZZLE_3D_GEN2(name, ...) \
1989 SWIZZLE_3D_GEN1(name##0, __VA_ARGS__, get_zero) \
1990 SWIZZLE_3D_GEN1(name##1, __VA_ARGS__, get_one) \
1991 SWIZZLE_3D_GEN1(name##x, __VA_ARGS__, 0) \
1992 SWIZZLE_3D_GEN1(name##y, __VA_ARGS__, 1) \
1993 SWIZZLE_3D_GEN1(name##z, __VA_ARGS__, 2)
1994
1995 SWIZZLE_3D_GEN2(_0, get_zero)
1996 SWIZZLE_3D_GEN2(_1, get_one)
1997 SWIZZLE_3D_GEN2(x, 0)
1998 SWIZZLE_3D_GEN2(y, 1)
1999 SWIZZLE_3D_GEN2(z, 2)
2000
2001#define SWIZZLE_2D_GEN1(name, ...) \
2002 SWIZZLE(name##0, 2, __VA_ARGS__, get_zero) \
2003 SWIZZLE(name##1, 2, __VA_ARGS__, get_one) \
2004 SWIZZLE(name##x, 2, __VA_ARGS__, 0) \
2005 SWIZZLE(name##y, 2, __VA_ARGS__, 1)
2006
2007 SWIZZLE_2D_GEN1(_0, get_zero)
2008 SWIZZLE_2D_GEN1(_1, get_one)
2009 SWIZZLE_2D_GEN1(x, 0)
2010 SWIZZLE_2D_GEN1(y, 1)
2011
2012#undef SWIZZLE
2013#undef SWIZZLE_4D_GEN1
2014#undef SWIZZLE_4D_GEN2
2015#undef SWIZZLE_4D_GEN3
2016#undef SWIZZLE_3D_GEN1
2017#undef SWIZZLE_3D_GEN2
2018#undef SWIZZLE_2D_GEN1
2019
2020private:
2021 container_type v;
2022
2023 template<int I, typename First, typename... Rest>
2024 friend constexpr void transpose_detail(First const &first, Rest const &...rest, std::array<numeric_array, N> &r) noexcept
2025 {
2026 for (size_t j = 0; j != N; ++j) {
2027 r[j][I] = first[j];
2028 }
2029
2030 if constexpr (sizeof...(Rest) != 0) {
2031 transpose_detail<I + 1, Rest...>(rest..., r);
2032 }
2033 }
2034
2035 template<ssize_t I, ssize_t FirstElement, ssize_t... RestElements>
2036 constexpr void swizzle_detail(numeric_array &r) const noexcept
2037 {
2038 static_assert(I < narrow_cast<ssize_t>(N));
2039 static_assert(FirstElement >= -2 && FirstElement < narrow_cast<ssize_t>(N), "Index out of bounds");
2040
2041 get<I>(r) = get<FirstElement>(*this);
2042 if constexpr (sizeof...(RestElements) != 0) {
2043 swizzle_detail<I + 1, RestElements...>(r);
2044 }
2045 }
2046};
2047
2048using i8x1 = numeric_array<int8_t, 1>;
2049using i8x2 = numeric_array<int8_t, 2>;
2050using i8x4 = numeric_array<int8_t, 4>;
2051using i8x8 = numeric_array<int8_t, 8>;
2052using i8x16 = numeric_array<int8_t, 16>;
2053using i8x32 = numeric_array<int8_t, 32>;
2054using i8x64 = numeric_array<int8_t, 64>;
2055
2056using u8x1 = numeric_array<uint8_t, 1>;
2057using u8x2 = numeric_array<uint8_t, 2>;
2058using u8x4 = numeric_array<uint8_t, 4>;
2059using u8x8 = numeric_array<uint8_t, 8>;
2060using u8x16 = numeric_array<uint8_t, 16>;
2061using u8x32 = numeric_array<uint8_t, 32>;
2062using u8x64 = numeric_array<uint8_t, 64>;
2063
2064using i16x1 = numeric_array<int16_t, 1>;
2065using i16x2 = numeric_array<int16_t, 2>;
2066using i16x4 = numeric_array<int16_t, 4>;
2067using i16x8 = numeric_array<int16_t, 8>;
2068using i16x16 = numeric_array<int16_t, 16>;
2069using i16x32 = numeric_array<int16_t, 32>;
2070
2071using u16x1 = numeric_array<uint16_t, 1>;
2072using u16x2 = numeric_array<uint16_t, 2>;
2073using u16x4 = numeric_array<uint16_t, 4>;
2074using u16x8 = numeric_array<uint16_t, 8>;
2075using u16x16 = numeric_array<uint16_t, 16>;
2076using u16x32 = numeric_array<uint16_t, 32>;
2077
2078using i32x1 = numeric_array<int32_t, 1>;
2079using i32x2 = numeric_array<int32_t, 2>;
2080using i32x4 = numeric_array<int32_t, 4>;
2081using i32x8 = numeric_array<int32_t, 8>;
2082using i32x16 = numeric_array<int32_t, 16>;
2083
2084using u32x1 = numeric_array<uint32_t, 1>;
2085using u32x2 = numeric_array<uint32_t, 2>;
2086using u32x4 = numeric_array<uint32_t, 4>;
2087using u32x8 = numeric_array<uint32_t, 8>;
2088using u32x16 = numeric_array<uint32_t, 16>;
2089
2090using f32x1 = numeric_array<float, 1>;
2091using f32x2 = numeric_array<float, 2>;
2092using f32x4 = numeric_array<float, 4>;
2093using f32x8 = numeric_array<float, 8>;
2094using f32x16 = numeric_array<float, 16>;
2095
2096using i64x1 = numeric_array<int64_t, 1>;
2097using i64x2 = numeric_array<int64_t, 2>;
2098using i64x4 = numeric_array<int64_t, 4>;
2099using i64x8 = numeric_array<int64_t, 8>;
2100
2101using u64x1 = numeric_array<uint64_t, 1>;
2102using u64x2 = numeric_array<uint64_t, 2>;
2103using u64x4 = numeric_array<uint64_t, 4>;
2104using u64x8 = numeric_array<uint64_t, 8>;
2105
2106using f64x1 = numeric_array<double, 1>;
2107using f64x2 = numeric_array<double, 2>;
2108using f64x4 = numeric_array<double, 4>;
2109using f64x8 = numeric_array<double, 8>;
2110
2111} // namespace tt
2112
2113namespace std {
2114template<class T, std::size_t N>
2115struct tuple_size<tt::numeric_array<T, N>> : std::integral_constant<std::size_t, N> {
2116};
2117
2118template<std::size_t I, class T, std::size_t N>
2119struct tuple_element<I, tt::numeric_array<T, N>> {
2120 using type = T;
2121};
2122
2123} // namespace std
STL namespace.
Definition numeric_array.hpp:44
friend constexpr T get(numeric_array &&rhs) noexcept
Get a element from the numeric array.
Definition numeric_array.hpp:819
static constexpr numeric_array load(std::byte const *ptr) noexcept
Load a numeric array from memory.
Definition numeric_array.hpp:446
friend constexpr numeric_array neg(numeric_array rhs) noexcept
Negate individual elements.
Definition numeric_array.hpp:880
friend constexpr T get(numeric_array const &rhs) noexcept
Get a element from the numeric array.
Definition numeric_array.hpp:838
friend constexpr T squared_hypot(numeric_array const &rhs) noexcept
Take the squared length of the vector.
Definition numeric_array.hpp:1059
friend constexpr numeric_array cross_2D(numeric_array const &rhs) noexcept
Calculate the 2D normal on a 2D vector.
Definition numeric_array.hpp:1634
friend constexpr numeric_array reflect_point(numeric_array const &p, numeric_array const anchor) noexcept
Find the point on the other side and at the same distance of an anchor-point.
Definition numeric_array.hpp:1817
constexpr void store(std::byte *ptr) const noexcept
Store a numeric array into memory.
Definition numeric_array.hpp:485
friend constexpr numeric_array midpoint(numeric_array const &p1, numeric_array const &p2) noexcept
Find a point at the midpoint between two points.
Definition numeric_array.hpp:1808
friend constexpr numeric_array shift_right(numeric_array const &lhs, unsigned int rhs) noexcept
Shift the elements left.
Definition numeric_array.hpp:1720
friend constexpr T & get(numeric_array &rhs) noexcept
Get a element from the numeric array.
Definition numeric_array.hpp:807
static constexpr numeric_array interleave_lo(numeric_array a, numeric_array b) noexcept
Interleave the first words in both arrays.
Definition numeric_array.hpp:416
friend constexpr T dot(numeric_array const &lhs, numeric_array const &rhs) noexcept
Take a dot product.
Definition numeric_array.hpp:1018
friend constexpr T rcp_hypot(numeric_array const &rhs) noexcept
Take a reciprocal of the length.
Definition numeric_array.hpp:1071
friend constexpr numeric_array shuffle(numeric_array const &lhs, numeric_array const &rhs)
Shuffle a 16x byte array, using the indices from the right-hand-side.
Definition numeric_array.hpp:1785
friend constexpr T hypot(numeric_array const &rhs) noexcept
Take the length of the vector.
Definition numeric_array.hpp:1043
friend constexpr float cross_2D(numeric_array const &lhs, numeric_array const &rhs) noexcept
Calculate the cross-product between two 2D vectors.
Definition numeric_array.hpp:1649
friend constexpr numeric_array normal_2D(numeric_array const &rhs) noexcept
Calculate the 2D unit-normal on a 2D vector.
Definition numeric_array.hpp:1642
constexpr friend numeric_array insert(numeric_array const &lhs, numeric_array const &rhs)
Insert an element from rhs into the result.
Definition numeric_array.hpp:1890
constexpr numeric_array swizzle() const
swizzle around the elements of the numeric array.
Definition numeric_array.hpp:1923
friend constexpr numeric_array addsub(numeric_array const &lhs, numeric_array const &rhs) noexcept
Add or subtract individual elements.
Definition numeric_array.hpp:1513
static constexpr numeric_array load(std::byte const *ptr) noexcept
Load a numeric array from memory.
Definition numeric_array.hpp:458
friend constexpr numeric_array normalize(numeric_array const &rhs) noexcept
Normalize a vector.
Definition numeric_array.hpp:1089
friend constexpr numeric_array shift_left(numeric_array const &lhs, unsigned int rhs) noexcept
Shift the elements left.
Definition numeric_array.hpp:1704
static constexpr numeric_array load(T const *ptr) noexcept
Load a numeric array from memory.
Definition numeric_array.hpp:469
friend constexpr numeric_array zero(numeric_array rhs) noexcept
Set individual elements to zero.
Definition numeric_array.hpp:856
Definition concepts.hpp:15
Definition concepts.hpp:18
Definition concepts.hpp:21
T back(T... args)
T begin(T... args)
T ceil(T... args)
T data(T... args)
T empty(T... args)
T end(T... args)
T floor(T... args)
T front(T... args)
T max_size(T... args)
T memcpy(T... args)
T round(T... args)
T size(T... args)
T sqrt(T... args)