46 using value_type =
typename container_type::value_type;
47 using size_type =
typename container_type::size_type;
48 using difference_type =
typename container_type::difference_type;
49 using reference =
typename container_type::reference;
50 using const_reference =
typename container_type::const_reference;
51 using pointer =
typename container_type::pointer;
52 using const_pointer =
typename container_type::const_pointer;
53 using iterator =
typename container_type::iterator;
54 using const_iterator =
typename container_type::const_iterator;
56 constexpr static bool is_i8x1 = std::is_same_v<T, int8_t> && N == 1;
57 constexpr static bool is_i8x2 = std::is_same_v<T, int8_t> && N == 2;
58 constexpr static bool is_i8x4 = std::is_same_v<T, int8_t> && N == 4;
59 constexpr static bool is_i8x8 = std::is_same_v<T, int8_t> && N == 8;
60 constexpr static bool is_i8x16 = std::is_same_v<T, int8_t> && N == 16;
61 constexpr static bool is_i8x32 = std::is_same_v<T, int8_t> && N == 32;
62 constexpr static bool is_i8x64 = std::is_same_v<T, int8_t> && N == 64;
63 constexpr static bool is_u8x1 = std::is_same_v<T, uint8_t> && N == 1;
64 constexpr static bool is_u8x2 = std::is_same_v<T, uint8_t> && N == 2;
65 constexpr static bool is_u8x4 = std::is_same_v<T, uint8_t> && N == 4;
66 constexpr static bool is_u8x8 = std::is_same_v<T, uint8_t> && N == 8;
67 constexpr static bool is_u8x16 = std::is_same_v<T, uint8_t> && N == 16;
68 constexpr static bool is_u8x32 = std::is_same_v<T, uint8_t> && N == 32;
69 constexpr static bool is_u8x64 = std::is_same_v<T, uint8_t> && N == 64;
71 constexpr static bool is_i16x1 = std::is_same_v<T, int16_t> && N == 1;
72 constexpr static bool is_i16x2 = std::is_same_v<T, int16_t> && N == 2;
73 constexpr static bool is_i16x4 = std::is_same_v<T, int16_t> && N == 4;
74 constexpr static bool is_i16x8 = std::is_same_v<T, int16_t> && N == 8;
75 constexpr static bool is_i16x16 = std::is_same_v<T, int16_t> && N == 16;
76 constexpr static bool is_i16x32 = std::is_same_v<T, int16_t> && N == 32;
77 constexpr static bool is_u16x1 = std::is_same_v<T, uint16_t> && N == 1;
78 constexpr static bool is_u16x2 = std::is_same_v<T, uint16_t> && N == 2;
79 constexpr static bool is_u16x4 = std::is_same_v<T, uint16_t> && N == 4;
80 constexpr static bool is_u16x8 = std::is_same_v<T, uint16_t> && N == 8;
81 constexpr static bool is_u16x16 = std::is_same_v<T, uint16_t> && N == 16;
82 constexpr static bool is_u16x32 = std::is_same_v<T, uint16_t> && N == 32;
84 constexpr static bool is_i32x1 = std::is_same_v<T, int32_t> && N == 1;
85 constexpr static bool is_i32x2 = std::is_same_v<T, int32_t> && N == 2;
86 constexpr static bool is_i32x4 = std::is_same_v<T, int32_t> && N == 4;
87 constexpr static bool is_i32x8 = std::is_same_v<T, int32_t> && N == 8;
88 constexpr static bool is_i32x16 = std::is_same_v<T, int32_t> && N == 16;
89 constexpr static bool is_u32x1 = std::is_same_v<T, uint32_t> && N == 1;
90 constexpr static bool is_u32x2 = std::is_same_v<T, uint32_t> && N == 2;
91 constexpr static bool is_u32x4 = std::is_same_v<T, uint32_t> && N == 4;
92 constexpr static bool is_u32x8 = std::is_same_v<T, uint32_t> && N == 8;
93 constexpr static bool is_u32x16 = std::is_same_v<T, uint32_t> && N == 16;
94 constexpr static bool is_f32x1 = std::is_same_v<T, float> && N == 1;
95 constexpr static bool is_f32x2 = std::is_same_v<T, float> && N == 2;
96 constexpr static bool is_f32x4 = std::is_same_v<T, float> && N == 4;
97 constexpr static bool is_f32x8 = std::is_same_v<T, float> && N == 8;
98 constexpr static bool is_f32x16 = std::is_same_v<T, float> && N == 16;
100 constexpr static bool is_i64x1 = std::is_same_v<T, int64_t> && N == 1;
101 constexpr static bool is_i64x2 = std::is_same_v<T, int64_t> && N == 2;
102 constexpr static bool is_i64x4 = std::is_same_v<T, int64_t> && N == 4;
103 constexpr static bool is_i64x8 = std::is_same_v<T, int64_t> && N == 8;
104 constexpr static bool is_u64x1 = std::is_same_v<T, uint64_t> && N == 1;
105 constexpr static bool is_u64x2 = std::is_same_v<T, uint64_t> && N == 2;
106 constexpr static bool is_u64x4 = std::is_same_v<T, uint64_t> && N == 4;
107 constexpr static bool is_u64x8 = std::is_same_v<T, uint64_t> && N == 8;
108 constexpr static bool is_f64x1 = std::is_same_v<T, double> && N == 1;
109 constexpr static bool is_f64x2 = std::is_same_v<T, double> && N == 2;
110 constexpr static bool is_f64x4 = std::is_same_v<T, double> && N == 4;
111 constexpr static bool is_f64x8 = std::is_same_v<T, double> && N == 8;
121 template<arithmetic U,
size_t M>
124 if (!std::is_constant_evaluated()) {
125 if constexpr (is_f64x2 and other.is_i32x4) {
126#if defined(TT_HAS_SSE2)
130 }
else if constexpr (is_f32x4 and other.is_i32x4) {
131#if defined(TT_HAS_SSE2)
135 }
else if constexpr (is_i32x4 and other.is_f32x4) {
136#if defined(TT_HAS_SSE2)
140 }
else if constexpr (is_i64x4 and other.is_i32x4) {
141#if defined(TT_HAS_SSE4_1)
145 }
else if constexpr (is_i64x4 and other.is_i16x8) {
146#if defined(TT_HAS_SSE4_1)
150 }
else if constexpr (is_i32x4 and other.is_i16x8) {
151#if defined(TT_HAS_SSE4_1)
155 }
else if constexpr (is_i64x2 and other.is_i8x16) {
156#if defined(TT_HAS_SSE4_1)
160 }
else if constexpr (is_i32x4 and other.is_i8x16) {
161#if defined(TT_HAS_SSE4_1)
165 }
else if constexpr (is_i16x8 and other.is_i8x16) {
166#if defined(TT_HAS_SSE4_1)
170 }
else if constexpr (is_f64x4 and other.is_f32x4) {
171#if defined(TT_HAS_AVX)
175 }
else if constexpr (is_f64x4 and other.is_i32x4) {
176#if defined(TT_HAS_AVX)
180 }
else if constexpr (is_f32x4 and other.is_f64x4) {
181#if defined(TT_HAS_AVX)
185 }
else if constexpr (is_i32x4 and other.is_f64x4) {
186#if defined(TT_HAS_AVX)
190 }
else if constexpr (is_i32x8 and other.is_f32x8) {
191#if defined(TT_HAS_AVX)
195 }
else if constexpr (is_f32x8 and other.is_i32x8) {
196#if defined(TT_HAS_AVX)
203 for (
size_t i = 0; i != N; ++i) {
205 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
207 v[i] =
static_cast<value_type
>(
std::round(other[i]));
209 v[i] =
static_cast<value_type
>(other[i]);
217 template<arithmetic U,
size_t M>
222 if (!std::is_constant_evaluated()) {
223 if constexpr (is_i16x8 and other1.is_i32x4 and other2.is_i32x4) {
224#if defined(TT_HAS_SSE2)
225 *
this =
numeric_array{_mm_packs_epi32(other2.reg(), other1.reg())};
228 }
else if constexpr (is_i8x16 and other1.is_i16x8 and other2.is_i16x8) {
229#if defined(TT_HAS_SSE2)
230 *
this =
numeric_array{_mm_packs_epi16(other2.reg(), other1.reg())};
233 }
else if constexpr (is_u8x16 and other1.is_u16x8 and other2.is_u16x8) {
234#if defined(TT_HAS_SSE2)
235 *
this =
numeric_array{_mm_packus_epu16(other2.reg(), other1.reg())};
238 }
else if constexpr (is_u16x8 and other1.is_u32x4 and other2.is_u32x4) {
239#if defined(TT_HAS_SSE4_1)
240 *
this =
numeric_array{_mm_packus_epu32(other2.reg(), other1.reg())};
243 }
else if constexpr (is_f32x8 and other1.is_f32x4 and other2.is_f32x4) {
244#if defined(TT_HAS_AVX)
245 *
this =
numeric_array{_mm256_set_m128(other2.reg(), other1.reg())};
248 }
else if constexpr (is_f64x4 and other1.is_f64x2 and other2.is_f64x2) {
249#if defined(TT_HAS_AVX)
250 *
this =
numeric_array{_mm256_set_m128d(other2.reg(), other1.reg())};
253 }
else if constexpr (
254 std::is_integral_v<T> and std::is_integral_v<U> and (
sizeof(T) * N == 32) and (
sizeof(U) * M == 16)) {
255#if defined(TT_HAS_AVX)
256 *
this =
numeric_array{_mm256_set_m128i(other2.reg(), other1.reg())};
262 for (
size_t i = 0; i != N; ++i) {
264 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
266 v[i] =
static_cast<value_type
>(
std::round(other1[i]));
268 v[i] =
static_cast<value_type
>(other1[i]);
270 }
else if (i < M * 2) {
271 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
273 v[i] =
static_cast<value_type
>(
std::round(other2[i - M]));
275 v[i] =
static_cast<value_type
>(other2[i - M]);
295 "Expecting the std:initializer_list size to be <= to the size of the numeric array");
306 requires(
sizeof...(Rest) + 2 <= N)
307 [[nodiscard]]
constexpr numeric_array(T
const &first, T
const &second, Rest
const &...rest) noexcept :
312 [[nodiscard]]
static constexpr numeric_array broadcast(T rhs)
noexcept
315 for (
size_t i = 0; i != N; ++i) {
334#if defined(TT_HAS_SSE2)
335 [[nodiscard]] __m128i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
337 return _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(v.
data()));
341#if defined(TT_HAS_SSE2)
342 [[nodiscard]] __m128 reg()
const noexcept requires(is_f32x4)
344 return _mm_loadu_ps(v.
data());
348#if defined(TT_HAS_SSE2)
349 [[nodiscard]] __m128d reg()
const noexcept requires(is_f64x2)
351 return _mm_loadu_pd(v.
data());
355#if defined(TT_HAS_SSE2)
356 [[nodiscard]]
explicit numeric_array(__m128i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
358 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
362#if defined(TT_HAS_SSE2)
363 [[nodiscard]]
explicit numeric_array(__m128
const &rhs)
noexcept requires(is_f32x4)
365 _mm_storeu_ps(v.
data(), rhs);
369#if defined(TT_HAS_SSE2)
370 [[nodiscard]]
explicit numeric_array(__m128d
const &rhs)
noexcept requires(is_f64x2)
372 _mm_storeu_pd(v.
data(), rhs);
376#if defined(TT_HAS_SSE2)
377 numeric_array &operator=(__m128i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
379 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
384#if defined(TT_HAS_SSE2)
385 numeric_array &operator=(__m128
const &rhs)
noexcept requires(is_f32x4)
387 _mm_storeu_ps(v.
data(), rhs);
392#if defined(TT_HAS_SSE2)
393 numeric_array &operator=(__m128d
const &rhs)
noexcept requires(is_f64x2)
395 _mm_storeu_pd(v.
data(), rhs);
400#if defined(TT_HAS_AVX)
401 [[nodiscard]] __m256i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
403 return _mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(v.
data()));
407#if defined(TT_HAS_AVX)
408 [[nodiscard]] __m256 reg()
const noexcept requires(is_f32x8)
410 return _mm256_loadu_ps(v.
data());
414#if defined(TT_HAS_AVX)
415 [[nodiscard]] __m256d reg()
const noexcept requires(is_f64x4)
417 return _mm256_loadu_pd(v.
data());
421#if defined(TT_HAS_AVX)
422 [[nodiscard]]
explicit numeric_array(__m256i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
424 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
428#if defined(TT_HAS_AVX)
429 [[nodiscard]]
explicit numeric_array(__m256
const &rhs)
noexcept requires(is_f32x8)
431 _mm256_storeu_ps(v.
data(), rhs);
435#if defined(TT_HAS_AVX)
436 [[nodiscard]]
explicit numeric_array(__m256d
const &rhs)
noexcept requires(is_f64x4)
438 _mm256_storeu_pd(v.
data(), rhs);
442#if defined(TT_HAS_AVX)
443 numeric_array &operator=(__m256i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
445 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
450#if defined(TT_HAS_AVX)
451 numeric_array &operator=(__m256
const &rhs)
noexcept requires(is_f32x8)
453 _mm256_storeu_ps(v.
data(), rhs);
458#if defined(TT_HAS_AVX)
459 numeric_array &operator=(__m256d
const &rhs)
noexcept requires(is_f64x4)
461 _mm256_storeu_pd(v.
data(), rhs);
466 template<
typename Other>
467 [[nodiscard]]
constexpr friend Other bit_cast(
numeric_array const &rhs)
noexcept
470 using rhs_value_type =
typename std::remove_cvref_t<
decltype(rhs)>::value_type;
472 if (not std::is_constant_evaluated()) {
473 if constexpr (Other::is_f32x4 and std::is_integral_v<rhs_value_type>) {
474#if defined(TT_HAS_SSE2)
475 return Other{_mm_castsi128_ps(rhs.reg())};
477 }
else if constexpr (Other::is_f32x4 and rhs.is_f64x2) {
478#if defined(TT_HAS_SSE2)
479 return Other{_mm_castpd_ps(rhs.reg())};
481 }
else if constexpr (Other::is_f64x2 and std::is_integral_v<rhs_value_type>) {
482#if defined(TT_HAS_SSE2)
483 return Other{_mm_castsi128_pd(rhs.reg())};
485 }
else if constexpr (Other::is_f64x2 and rhs.is_f32x4) {
486#if defined(TT_HAS_SSE2)
487 return Other{_mm_castps_pd(rhs.reg())};
489 }
else if constexpr (std::is_integral_v<Other::value_type> and rhs.is_f32x4) {
490#if defined(TT_HAS_SSE2)
491 return Other{_mm_castps_si128(rhs.reg())};
493 }
else if constexpr (std::is_integral_v<Other::value_type> and rhs.is_f64x2) {
494#if defined(TT_HAS_SSE2)
495 return Other{_mm_castpd_si128(rhs.reg())};
497 }
else if constexpr (std::is_integral_v<Other::value_type> and std::is_integral_v<rhs_value_type>) {
498#if defined(TT_HAS_SSE2)
499 return Other{rhs.reg()};
503 return std::bit_cast<Other>(rhs);
510 if (not std::is_constant_evaluated()) {
511 if constexpr (x86_64_v2 and is_f64x2) {
513 }
else if constexpr (x86_64_v2 and is_f32x4) {
515 }
else if constexpr (x86_64_v2 and is_i64x2) {
517 }
else if constexpr (x86_64_v2 and is_i32x4) {
519 }
else if constexpr (x86_64_v2 and is_i16x8) {
521 }
else if constexpr (x86_64_v2 and is_i8x16) {
527 for (
size_t i = 0; i != N; ++i) {
528 r[i] = (i % 2 == 0) ? a[i / 2] : b[i / 2];
568 constexpr void store(std::byte *ptr)
const noexcept
576 constexpr void store(std::byte *ptr)
const noexcept
578 store<sizeof(*this)>(ptr);
581 [[nodiscard]]
constexpr T
const &operator[](
size_t i)
const noexcept
583 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
588 [[nodiscard]]
constexpr T &operator[](
size_t i)
noexcept
590 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
595 [[nodiscard]]
constexpr reference front() noexcept
600 [[nodiscard]]
constexpr const_reference front() const noexcept
605 [[nodiscard]]
constexpr reference back() noexcept
610 [[nodiscard]]
constexpr const_reference back() const noexcept
615 [[nodiscard]]
constexpr pointer data() noexcept
620 [[nodiscard]]
constexpr const_pointer data() const noexcept
625 [[nodiscard]]
constexpr iterator begin() noexcept
630 [[nodiscard]]
constexpr const_iterator begin() const noexcept
635 [[nodiscard]]
constexpr const_iterator cbegin() const noexcept
640 [[nodiscard]]
constexpr iterator end() noexcept
645 [[nodiscard]]
constexpr const_iterator end() const noexcept
650 [[nodiscard]]
constexpr const_iterator cend() const noexcept
655 [[nodiscard]]
constexpr bool empty() const noexcept
660 [[nodiscard]]
constexpr size_type size() const noexcept
665 [[nodiscard]]
constexpr size_type max_size() const noexcept
670 constexpr bool is_point() const noexcept
672 return v.
back() != T{};
675 constexpr bool is_vector() const noexcept
677 return v.
back() == T{};
680 constexpr bool is_opaque() const noexcept
685 constexpr bool is_transparent() const noexcept
690 [[nodiscard]]
constexpr T
const &x() const noexcept requires(N >= 1)
692 return std::get<0>(v);
695 [[nodiscard]]
constexpr T
const &y() const noexcept requires(N >= 2)
697 return std::get<1>(v);
700 [[nodiscard]]
constexpr T
const &z() const noexcept requires(N >= 3)
702 return std::get<2>(v);
705 [[nodiscard]]
constexpr T
const &w() const noexcept requires(N >= 4)
707 return std::get<3>(v);
710 [[nodiscard]]
constexpr T &x() noexcept requires(N >= 1)
712 return std::get<0>(v);
715 [[nodiscard]]
constexpr T &y() noexcept requires(N >= 2)
717 return std::get<1>(v);
720 [[nodiscard]]
constexpr T &z() noexcept requires(N >= 3)
722 return std::get<2>(v);
725 [[nodiscard]]
constexpr T &w() noexcept requires(N >= 4)
727 return std::get<3>(v);
730 [[nodiscard]]
constexpr T
const &r() const noexcept requires(N >= 1)
732 return std::get<0>(v);
735 [[nodiscard]]
constexpr T
const &g() const noexcept requires(N >= 2)
737 return std::get<1>(v);
740 [[nodiscard]]
constexpr T
const &b() const noexcept requires(N >= 3)
742 return std::get<2>(v);
745 [[nodiscard]]
constexpr T
const &a() const noexcept requires(N >= 4)
747 return std::get<3>(v);
750 [[nodiscard]]
constexpr T &r() noexcept requires(N >= 1)
752 return std::get<0>(v);
755 [[nodiscard]]
constexpr T &g() noexcept requires(N >= 2)
757 return std::get<1>(v);
760 [[nodiscard]]
constexpr T &b() noexcept requires(N >= 3)
762 return std::get<2>(v);
765 [[nodiscard]]
constexpr T &a() noexcept requires(N >= 4)
767 return std::get<3>(v);
770 [[nodiscard]]
constexpr T
const &width() const noexcept requires(N >= 1)
772 return std::get<0>(v);
775 [[nodiscard]]
constexpr T
const &height() const noexcept requires(N >= 2)
777 return std::get<1>(v);
780 [[nodiscard]]
constexpr T
const &depth() const noexcept requires(N >= 3)
782 return std::get<2>(v);
785 [[nodiscard]]
constexpr T &width() noexcept requires(N >= 1)
787 return std::get<0>(v);
790 [[nodiscard]]
constexpr T &height() noexcept requires(N >= 2)
792 return std::get<1>(v);
795 [[nodiscard]]
constexpr T &depth() noexcept requires(N >= 3)
797 return std::get<2>(v);
800 constexpr numeric_array &operator<<=(
unsigned int rhs)
noexcept
802 return *
this = *
this << rhs;
805 constexpr numeric_array &operator>>=(
unsigned int rhs)
noexcept
807 return *
this = *
this >> rhs;
810 constexpr numeric_array &operator|=(numeric_array
const &rhs)
noexcept
812 return *
this = *
this | rhs;
815 constexpr numeric_array &operator|=(T
const &rhs)
noexcept
817 return *
this = *
this | rhs;
820 constexpr numeric_array &operator&=(numeric_array
const &rhs)
noexcept
822 return *
this = *
this & rhs;
825 constexpr numeric_array &operator&=(T
const &rhs)
noexcept
827 return *
this = *
this & rhs;
830 constexpr numeric_array &operator^=(numeric_array
const &rhs)
noexcept
832 return *
this = *
this ^ rhs;
835 constexpr numeric_array &operator^=(T
const &rhs)
noexcept
837 return *
this = *
this ^ rhs;
840 constexpr numeric_array &operator+=(numeric_array
const &rhs)
noexcept
842 return *
this = *
this + rhs;
845 constexpr numeric_array &operator+=(T
const &rhs)
noexcept
847 return *
this = *
this + rhs;
850 constexpr numeric_array &operator-=(numeric_array
const &rhs)
noexcept
852 return *
this = *
this - rhs;
855 constexpr numeric_array &operator-=(T
const &rhs)
noexcept
857 return *
this = *
this - rhs;
860 constexpr numeric_array &operator*=(numeric_array
const &rhs)
noexcept
862 return *
this = *
this * rhs;
865 constexpr numeric_array &operator*=(T
const &rhs)
noexcept
867 return *
this = *
this * rhs;
870 constexpr numeric_array &operator/=(numeric_array
const &rhs)
noexcept
872 return *
this = *
this / rhs;
875 constexpr numeric_array &operator/=(T
const &rhs)
noexcept
877 return *
this = *
this / rhs;
880 constexpr numeric_array &operator%=(numeric_array
const &rhs)
noexcept
882 return *
this = *
this % rhs;
885 constexpr numeric_array &operator%=(T
const &rhs)
noexcept
887 return *
this = *
this % rhs;
890 constexpr static ssize_t get_zero = -1;
891 constexpr static ssize_t get_one = -2;
900 static_assert(I < N,
"Index out of bounds");
901 return std::get<I>(rhs.v);
912 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
913 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
914 if constexpr (I == get_zero) {
916 }
else if constexpr (I == get_one) {
919 return std::get<I>(rhs.v);
931 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
932 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
933 if constexpr (I == get_zero) {
935 }
else if constexpr (I == get_one) {
938 return std::get<I>(rhs.v);
946 template<
size_t Mask = ~size_t{0}>
949 if (!std::is_constant_evaluated()) {
950 if constexpr (is_f32x4 && x86_64_v2) {
956 for (
size_t i = 0; i != N; ++i) {
957 if (
static_cast<bool>((Mask >> i) & 1)) {
970 template<
size_t Mask = ~size_t{0}>
973 if (!std::is_constant_evaluated()) {
974 if constexpr (is_f32x4 && x86_64_v2) {
980 for (
size_t i = 0; i != N; ++i) {
981 if (
static_cast<bool>((Mask >> i) & 1)) {
993 for (
size_t i = 0; i != N; ++i) {
995 r.v[i] = T{} - rhs.v[i];
1000 [[nodiscard]]
friend constexpr numeric_array abs(numeric_array
const &rhs)
noexcept
1002 auto neg_rhs = -rhs;
1004 auto r = numeric_array{};
1005 for (
size_t i = 0; i != N; ++i) {
1006 r.v[i] = rhs.v[i] < T{} ? neg_rhs.v[i] : rhs.v[i];
1011 [[nodiscard]]
friend constexpr numeric_array rcp(numeric_array
const &rhs)
noexcept
1013 if (!std::is_constant_evaluated()) {
1014 if constexpr (is_f32x4 and x86_64_v2) {
1015 return numeric_array{_mm_rcp_ps(rhs.reg())};
1019 auto r = numeric_array{};
1020 for (
size_t i = 0; i != N; ++i) {
1021 r[i] = 1.0f / rhs.v[i];
1026 [[nodiscard]]
friend constexpr numeric_array sqrt(numeric_array
const &rhs)
noexcept
1028 if (!std::is_constant_evaluated()) {
1029 if constexpr (is_f32x4 and x86_64_v2) {
1030 return numeric_array{_mm_sqrt_ps(rhs.reg())};
1034 auto r = numeric_array{};
1035 for (
size_t i = 0; i != N; ++i) {
1041 [[nodiscard]]
friend constexpr numeric_array rcp_sqrt(numeric_array
const &rhs)
noexcept
1043 if (!std::is_constant_evaluated()) {
1044 if constexpr (is_f32x4 and x86_64_v2) {
1045 return numeric_array{_mm_rcp_sqrt_ps(rhs.reg())};
1049 auto r = numeric_array{};
1050 for (
size_t i = 0; i != N; ++i) {
1056 [[nodiscard]]
friend constexpr numeric_array floor(numeric_array
const &rhs)
noexcept
1058 if (!std::is_constant_evaluated()) {
1059 if constexpr (is_f32x4 and x86_64_v2) {
1060 return numeric_array{_mm_floor_ps(rhs.reg())};
1064 auto r = numeric_array{};
1065 for (
size_t i = 0; i != N; ++i) {
1071 [[nodiscard]]
friend constexpr numeric_array ceil(numeric_array
const &rhs)
noexcept
1073 if (!std::is_constant_evaluated()) {
1074 if constexpr (is_f32x4 and x86_64_v2) {
1075 return numeric_array{_mm_ceil_ps(rhs.reg())};
1079 auto r = numeric_array{};
1080 for (
size_t i = 0; i != N; ++i) {
1086 [[nodiscard]]
friend constexpr numeric_array round(numeric_array
const &rhs)
noexcept
1088 if (!std::is_constant_evaluated()) {
1089 if constexpr (is_f32x4 and x86_64_v2) {
1090 return numeric_array{_mm_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1094 auto r = numeric_array{};
1095 for (
size_t i = 0; i != N; ++i) {
1108 template<
size_t Mask>
1111 if (!std::is_constant_evaluated()) {
1112 if constexpr (is_f32x4 and x86_64_v2) {
1113 return f32x4_x64v2_dot<Mask>(lhs.v, rhs.v);
1118 for (
size_t i = 0; i != N; ++i) {
1119 if (
static_cast<bool>(Mask & (1_uz << i))) {
1120 r += lhs.v[i] * rhs.v[i];
1133 template<
size_t Mask>
1136 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1137 return f32x4_x64v2_hypot<Mask>(rhs.v);
1149 template<
size_t Mask>
1152 return dot<Mask>(rhs, rhs);
1161 template<
size_t Mask>
1164 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1165 return f32x4_x64v2_rcp_hypot<Mask>(rhs.v);
1168 return 1.0f / hypot<Mask>(rhs);
1179 template<
size_t Mask>
1182 tt_axiom(rhs.is_vector());
1184 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1188 ttlet rcp_hypot_ = rcp_hypot<Mask>(rhs);
1191 for (
size_t i = 0; i != N; ++i) {
1192 if (
static_cast<bool>(Mask & (1_uz << i))) {
1193 r.v[i] = rhs.v[i] * rcp_hypot_;
1200 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1202 if (!std::is_constant_evaluated()) {
1203 if constexpr (is_f32x4 and x86_64_v2) {
1204 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpeq_ps(lhs.reg(), rhs.reg())));
1209 for (
size_t i = 0; i != N; ++i) {
1210 r |=
static_cast<unsigned int>(lhs.v[i] == rhs.v[i]) << i;
1215 [[nodiscard]]
friend constexpr unsigned int ne(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1216 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1218 if (!std::is_constant_evaluated()) {
1219 if constexpr (is_f32x4 and x86_64_v2) {
1220 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpne_ps(lhs.reg(), rhs.reg())));
1224 for (
size_t i = 0; i != N; ++i) {
1225 r |=
static_cast<unsigned int>(lhs.v[i] != rhs.v[i]) << i;
1230 [[nodiscard]]
friend constexpr unsigned int lt(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1231 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1233 if (!std::is_constant_evaluated()) {
1234 if constexpr (is_f32x4 and x86_64_v2) {
1235 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmplt_ps(lhs.reg(), rhs.reg())));
1239 for (
size_t i = 0; i != N; ++i) {
1240 r |=
static_cast<unsigned int>(lhs.v[i] < rhs.v[i]) << i;
1245 [[nodiscard]]
friend constexpr unsigned int gt(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1246 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1248 if (!std::is_constant_evaluated()) {
1249 if constexpr (is_f32x4 and x86_64_v2) {
1250 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpgt_ps(lhs.reg(), rhs.reg())));
1254 for (
size_t i = 0; i != N; ++i) {
1255 r |=
static_cast<unsigned int>(lhs.v[i] > rhs.v[i]) << i;
1260 [[nodiscard]]
friend constexpr unsigned int le(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1261 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1263 if (!std::is_constant_evaluated()) {
1264 if constexpr (is_f32x4 and x86_64_v2) {
1265 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmple_ps(lhs.reg(), rhs.reg())));
1269 for (
size_t i = 0; i != N; ++i) {
1270 r |=
static_cast<unsigned int>(lhs.v[i] <= rhs.v[i]) << i;
1275 [[nodiscard]]
friend constexpr unsigned int ge(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1276 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1278 if (!std::is_constant_evaluated()) {
1279 if constexpr (is_f32x4 and x86_64_v2) {
1280 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpge_ps(lhs.reg(), rhs.reg())));
1284 for (
size_t i = 0; i != N; ++i) {
1285 r |=
static_cast<unsigned int>(lhs.v[i] >= rhs.v[i]) << i;
1290 [[nodiscard]]
static constexpr value_type zero_mask() noexcept
1292 std::array<
unsigned char,
sizeof(value_type)> bytes;
1293 for (
size_t i = 0; i != bytes.size(); ++i) {
1296 return std::bit_cast<value_type>(bytes);
1299 [[nodiscard]]
static constexpr value_type ones_mask() noexcept
1301 static_assert(CHAR_BIT == 8);
1303 std::array<
unsigned char,
sizeof(value_type)> bytes;
1304 for (
size_t i = 0; i != bytes.size(); ++i) {
1307 return std::bit_cast<value_type>(bytes);
1310 [[nodiscard]]
friend constexpr numeric_array gt_mask(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1312 if (not std::is_constant_evaluated()) {
1313 if constexpr (is_f32x4 and x86_64_v2) {
1314 return numeric_array{_mm_cmpgt_ps(lhs.reg(), rhs.reg())};
1315 }
else if constexpr (is_i64x4 and x86_64_v2) {
1316 return numeric_array{_mm_cmpgt_epi64(lhs.reg(), rhs.reg())};
1317 }
else if constexpr (is_i32x4 and x86_64_v2) {
1318 return numeric_array{_mm_cmpgt_epi32(lhs.reg(), rhs.reg())};
1319 }
else if constexpr (is_i16x4 and x86_64_v2) {
1320 return numeric_array{_mm_cmpgt_epi16(lhs.reg(), rhs.reg())};
1324 constexpr value_type
zero = zero_mask();
1325 constexpr value_type ones = ones_mask();
1327 auto r = numeric_array{};
1328 for (
size_t i = 0; i != N; ++i) {
1329 r[i] = lhs.v[i] > rhs.v[i] ? ones :
zero;
1335 [[nodiscard]]
friend constexpr numeric_array ge_mask(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1337 if (not std::is_constant_evaluated()) {
1338 if constexpr (is_f32x4 and x86_64_v2) {
1339 return numeric_array{_mm_cmpge_ps(lhs.reg(), rhs.reg())};
1343 constexpr value_type
zero = zero_mask();
1344 constexpr value_type ones = ones_mask();
1346 auto r = numeric_array{};
1347 for (
size_t i = 0; i != N; ++i) {
1348 r[i] = lhs.v[i] >= rhs.v[i] ? ones :
zero;
1353 [[nodiscard]]
friend constexpr bool operator==(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1355 if (!std::is_constant_evaluated()) {
1356 if constexpr (is_f32x4 && x86_64_v2) {
1358 return f32x4_x64v2_eq(lhs.v, rhs.v);
1363 for (
size_t i = 0; i != N; ++i) {
1364 r &= (lhs.v[i] == rhs.v[i]);
1369 [[nodiscard]]
friend constexpr bool operator!=(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1371 return !(lhs == rhs);
1374 [[nodiscard]]
friend constexpr numeric_array operator<<(numeric_array
const &lhs,
unsigned int rhs)
noexcept
1376 if (not std::is_constant_evaluated()) {
1377 if constexpr (x86_64_v2 and is_i64x2) {
1378 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1379 }
else if constexpr (x86_64_v2 and is_i32x4) {
1380 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1381 }
else if constexpr (x86_64_v2 and is_i16x8) {
1382 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1383 }
else if constexpr (x86_64_v2 and is_u64x2) {
1384 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1385 }
else if constexpr (x86_64_v2 and is_u32x4) {
1386 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1387 }
else if constexpr (x86_64_v2 and is_u16x8) {
1388 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1392 auto r = numeric_array{};
1393 for (
size_t i = 0; i != N; ++i) {
1394 r.v[i] = lhs.v[i] << rhs;
1399 [[nodiscard]]
friend constexpr numeric_array operator>>(numeric_array
const &lhs,
unsigned int rhs)
noexcept
1401 if (not std::is_constant_evaluated()) {
1402 if constexpr (x86_64_v2 and is_i32x4) {
1403 return numeric_array{_mm_srai_epi32(lhs.reg(), rhs)};
1404 }
else if constexpr (x86_64_v2 and is_i16x8) {
1405 return numeric_array{_mm_srai_epi16(lhs.reg(), rhs)};
1406 }
else if constexpr (x86_64_v2 and is_u64x2) {
1407 return numeric_array{_mm_srli_epi64(lhs.reg(), rhs)};
1408 }
else if constexpr (x86_64_v2 and is_u32x4) {
1409 return numeric_array{_mm_srli_epi32(lhs.reg(), rhs)};
1410 }
else if constexpr (x86_64_v2 and is_u16x8) {
1411 return numeric_array{_mm_srli_epi16(lhs.reg(), rhs)};
1415 auto r = numeric_array{};
1416 for (
size_t i = 0; i != N; ++i) {
1417 r.v[i] = lhs.v[i] >> rhs;
1422 [[nodiscard]]
friend constexpr numeric_array operator|(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1424 if (!std::is_constant_evaluated()) {
1425 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1426 return numeric_array{_mm_or_si128(lhs.reg(), rhs.reg())};
1429 auto r = numeric_array{};
1430 for (
size_t i = 0; i != N; ++i) {
1431 r.v[i] = lhs.v[i] | rhs.v[i];
1436 [[nodiscard]]
friend constexpr numeric_array operator|(numeric_array
const &lhs, T
const &rhs)
noexcept
1438 return lhs | broadcast(rhs);
1441 [[nodiscard]]
friend constexpr numeric_array operator|(T
const &lhs, numeric_array
const &rhs)
noexcept
1443 return broadcast(lhs) | rhs;
1446 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1448 if (!std::is_constant_evaluated()) {
1449 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1450 return numeric_array{_mm_and_si128(lhs.reg(), rhs.reg())};
1453 auto r = numeric_array{};
1454 for (
size_t i = 0; i != N; ++i) {
1455 r.v[i] = lhs.v[i] & rhs.v[i];
1460 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const &lhs, T
const &rhs)
noexcept
1462 return lhs & broadcast(rhs);
1465 [[nodiscard]]
friend constexpr numeric_array operator&(T
const &lhs, numeric_array
const &rhs)
noexcept
1467 return broadcast(lhs) & rhs;
1470 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1472 if (!std::is_constant_evaluated()) {
1473 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1474 return numeric_array{_mm_xor_si128(lhs.reg(), rhs.reg())};
1477 auto r = numeric_array{};
1478 for (
size_t i = 0; i != N; ++i) {
1479 r.v[i] = lhs.v[i] ^ rhs.v[i];
1484 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const &lhs, T
const &rhs)
noexcept
1486 return lhs ^ broadcast(rhs);
1489 [[nodiscard]]
friend constexpr numeric_array operator^(T
const &lhs, numeric_array
const &rhs)
noexcept
1491 return broadcast(lhs) ^ rhs;
1494 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1496 if (!std::is_constant_evaluated()) {
1497 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1498 return numeric_array{_mm256_add_ps(lhs.reg(), rhs.reg())};
1502 auto r = numeric_array{};
1503 for (
size_t i = 0; i != N; ++i) {
1504 r.v[i] = lhs.v[i] + rhs.v[i];
1509 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const &lhs, T
const &rhs)
noexcept
1511 return lhs + broadcast(rhs);
1514 [[nodiscard]]
friend constexpr numeric_array operator+(T
const &lhs, numeric_array
const &rhs)
noexcept
1516 return broadcast(lhs) + rhs;
1519 [[nodiscard]]
friend constexpr numeric_array hadd(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1521 if (!std::is_constant_evaluated()) {
1522 if constexpr (is_f64x2 and x86_64_v2) {
1523 return numeric_array{_mm_hadd_pd(lhs.reg(), rhs.reg())};
1524 }
else if constexpr (is_f32x4 and x86_64_v2) {
1525 return numeric_array{_mm_hadd_ps(lhs.reg(), rhs.reg())};
1526 }
else if constexpr (is_i32x4 and x86_64_v2) {
1527 return numeric_array{_mm_hadd_epi32(lhs.reg(), rhs.reg())};
1528 }
else if constexpr (is_i16x8 and x86_64_v2) {
1529 return numeric_array{_mm_hadd_epi16(lhs.reg(), rhs.reg())};
1530 }
else if constexpr (is_i8x16 and x86_64_v2) {
1531 return numeric_array{_mm_hadd_epi8(lhs.reg(), rhs.reg())};
1535 tt_axiom(N % 2 == 0);
1537 auto r = numeric_array{};
1541 while (src_i != N) {
1542 auto tmp = lhs[src_i++];
1543 tmp += lhs[src_i++];
1548 while (src_i != N) {
1549 auto tmp = rhs[src_i++];
1550 tmp += rhs[src_i++];
1556 [[nodiscard]]
friend constexpr numeric_array hsub(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1558 if (!std::is_constant_evaluated()) {
1559 if constexpr (is_f64x2 and x86_64_v2) {
1560 return numeric_array{_mm_hsub_pd(lhs.reg(), rhs.reg())};
1561 }
else if constexpr (is_f32x4 and x86_64_v2) {
1562 return numeric_array{_mm_hsub_ps(lhs.reg(), rhs.reg())};
1563 }
else if constexpr (is_i32x4 and x86_64_v2) {
1564 return numeric_array{_mm_hsub_epi32(lhs.reg(), rhs.reg())};
1565 }
else if constexpr (is_i16x8 and x86_64_v2) {
1566 return numeric_array{_mm_hsub_epi16(lhs.reg(), rhs.reg())};
1567 }
else if constexpr (is_i8x16 and x86_64_v2) {
1568 return numeric_array{_mm_hsub_epi8(lhs.reg(), rhs.reg())};
1572 tt_axiom(N % 2 == 0);
1574 auto r = numeric_array{};
1578 while (src_i != N) {
1579 auto tmp = lhs[src_i++];
1580 tmp -= lhs[src_i++];
1585 while (src_i != N) {
1586 auto tmp = rhs[src_i++];
1587 tmp -= rhs[src_i++];
1593 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1595 if (!std::is_constant_evaluated()) {
1596 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1597 return numeric_array{_mm256_sub_ps(lhs.reg(), rhs.reg())};
1601 auto r = numeric_array{};
1602 for (
size_t i = 0; i != N; ++i) {
1603 r.v[i] = lhs.v[i] - rhs.v[i];
1608 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const &lhs, T
const &rhs)
noexcept
1610 return lhs - broadcast(rhs);
1613 [[nodiscard]]
friend constexpr numeric_array operator-(T
const &lhs, numeric_array
const &rhs)
noexcept
1615 return broadcast(lhs) - rhs;
1622 template<
size_t Mask = ~size_t{0}>
1625 if (!std::is_constant_evaluated()) {
1626 if constexpr (is_f32x4 && x86_64_v2) {
1627 return numeric_array{f32x4_x64v2_addsub<Mask & 0xf>(lhs.v, rhs.v)};
1632 for (
size_t i = 0; i != N; ++i) {
1633 if (
static_cast<bool>((Mask >> i) & 1)) {
1634 r.v[i] = lhs.v[i] + rhs.v[i];
1636 r.v[i] = lhs.v[i] - rhs.v[i];
1644 if (!std::is_constant_evaluated()) {
1645 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1650 auto r = numeric_array{};
1651 for (
size_t i = 0; i != N; ++i) {
1652 r.v[i] = lhs.v[i] * rhs.v[i];
1657 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const &lhs, T
const &rhs)
noexcept
1659 return lhs * broadcast(rhs);
1662 [[nodiscard]]
friend constexpr numeric_array operator*(T
const &lhs, numeric_array
const &rhs)
noexcept
1664 return broadcast(lhs) * rhs;
1667 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1669 if (!std::is_constant_evaluated()) {
1670 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1671 return numeric_array{_mm256_div_ps(lhs.reg(), rhs.reg())};
1675 auto r = numeric_array{};
1676 for (
size_t i = 0; i != N; ++i) {
1677 r.v[i] = lhs.v[i] / rhs.v[i];
1682 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const &lhs, T
const &rhs)
noexcept
1684 return lhs / broadcast(rhs);
1687 [[nodiscard]]
friend constexpr numeric_array operator/(T
const &lhs, numeric_array
const &rhs)
noexcept
1689 return broadcast(lhs) / rhs;
1692 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1694 auto r = numeric_array{};
1695 for (
size_t i = 0; i != N; ++i) {
1696 r.v[i] = lhs.v[i] % rhs.v[i];
1701 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const &lhs, T
const &rhs)
noexcept
1703 return lhs % broadcast(rhs);
1706 [[nodiscard]]
friend constexpr numeric_array operator%(T
const &lhs, numeric_array
const &rhs)
noexcept
1708 return broadcast(lhs) % rhs;
1711 [[nodiscard]]
friend constexpr numeric_array min(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1713 auto r = numeric_array{};
1714 for (
size_t i = 0; i != N; ++i) {
1716 r.v[i] = lhs.v[i] < rhs.v[i] ? lhs.v[i] : rhs.v[i];
1721 [[nodiscard]]
friend constexpr numeric_array max(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1723 auto r = numeric_array{};
1724 for (
size_t i = 0; i != N; ++i) {
1726 r.v[i] = lhs.v[i] > rhs.v[i] ? lhs.v[i] : rhs.v[i];
1731 [[nodiscard]]
friend constexpr numeric_array
1732 clamp(numeric_array
const &lhs, numeric_array
const &low, numeric_array
const &high)
noexcept
1734 auto r = numeric_array{};
1735 for (
size_t i = 0; i != N; ++i) {
1737 r.v[i] = lhs.v[i] < low.v[i] ? low.v[i] : lhs.v[i] > high.v[i] ? high.v[i] : lhs.v[i];
1746 tt_axiom(rhs.z() == 0.0f && rhs.is_vector());
1754 return normalize<0b0011>(
cross_2D(rhs));
1761 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1762 return f32x4_x64v2_viktor_cross(lhs.v, rhs.v);
1765 return lhs.x() * rhs.y() - lhs.y() * rhs.x();
1775 if (!std::is_constant_evaluated()) {
1776 if constexpr (is_f32x4 && x86_64_v2) {
1781 return numeric_array{
1782 lhs.y() * rhs.z() - lhs.z() * rhs.y(),
1783 lhs.z() * rhs.x() - lhs.x() * rhs.z(),
1784 lhs.x() * rhs.y() - lhs.y() * rhs.x(),
1795 requires(D == 4) [[nodiscard]]
friend numeric_array
1796 hamilton_cross(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1798 ttlet col0 = lhs.wwww() * rhs;
1799 ttlet col1 = lhs.xxxx() * rhs.wzyx();
1800 ttlet col2 = lhs.yyyy() * rhs.zwxy();
1801 ttlet col3 = lhs.zzzz() * rhs.yxwz();
1803 ttlet col01 =
addsub(col0, col1);
1804 ttlet col012 =
addsub(col01.xzyw(), col2.xzyw()).xzyw();
1806 return numeric_array{
1817 for (
ssize_t i = 0; i != N; ++i) {
1818 if ((i - rhs) >= 0) {
1819 r[i] = lhs[i - rhs];
1833 for (
ssize_t i = 0; i != N; ++i) {
1834 if ((i + rhs) < N) {
1835 r[i] = lhs[i + rhs];
1846 if (!std::is_constant_evaluated()) {
1847 if constexpr (x86_64_v2) {
1848 return numeric_array{_mm_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1852 auto r = numeric_array{};
1854 for (
size_t i = 0; i != N; ++i) {
1855 r[i] = mask[i] >= 0 ? a[i] : b[i];
1861 [[nodiscard]]
static constexpr numeric_array byte_srl_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
1863 static_assert(std::endian::native == std::endian::little);
1865 auto r = numeric_array{};
1866 for (
auto i = 0; i != 16; ++i) {
1867 if ((i + rhs) < 16) {
1868 r[i] = narrow_cast<int8_t>(i + rhs);
1877 [[nodiscard]]
static constexpr numeric_array byte_sll_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
1879 static_assert(std::endian::native == std::endian::little);
1881 auto r = numeric_array{};
1882 for (
auto i = 0; i != 16; ++i) {
1883 if ((i - rhs) >= 0) {
1884 r[i] = narrow_cast<int8_t>(i - rhs);
1897 if (!std::is_constant_evaluated()) {
1898 if constexpr (x86_64_v2) {
1899 return numeric_array{_mm_shuffle_epi8(lhs.reg(), rhs.reg())};
1905 for (
size_t i = 0; i != N; ++i) {
1907 r[i] = lhs[rhs[i] & 0xf];
1920 tt_axiom(p1.is_point());
1921 tt_axiom(p2.is_point());
1922 return (p1 + p2) * 0.5f;
1929 tt_axiom(p.is_point());
1930 tt_axiom(anchor.is_point());
1931 return anchor - (p - anchor);
1934 template<
typename... Columns>
1937 static_assert(
sizeof...(Columns) == N,
"Can only transpose square matrices");
1941 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1942 auto tmp = f32x4_x64v2_transpose(columns.v...);
1943 for (
int i = 0; i != N; ++i) {
1944 r[i] = numeric_array{tmp[i]};
1948 transpose_detail<0, Columns...>(columns..., r);
1954 [[nodiscard]]
constexpr friend numeric_array composit(numeric_array
const &under, numeric_array
const &over)
noexcept
1955 requires(N == 4 && std::is_floating_point_v<T>)
1957 if (over.is_transparent()) {
1960 if (over.is_opaque()) {
1964 ttlet over_alpha = over.wwww();
1965 ttlet under_alpha = under.wwww();
1967 ttlet over_color = over.xyz1();
1968 ttlet under_color = under.xyz1();
1970 ttlet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
1972 return output_color / output_color.www1();
1975 [[nodiscard]]
friend std::string to_string(numeric_array
const &rhs)
noexcept
1980 for (
size_t i = 0; i != N; ++i) {
1984 r += std::format(
"{}", rhs[i]);
1992 return lhs << to_string(rhs);
1999 template<
size_t FromElement,
size_t ToElement,
size_t ZeroMask = 0>
2004 if (!std::is_constant_evaluated()) {
2005 if constexpr (is_f32x4 && x86_64_v2) {
2006 return numeric_array{f32x4_x64v2_insert<FromElement, ToElement, ZeroMask>(lhs.v, rhs.v)};
2007 }
else if constexpr (is_u64x2 and x86_64_v2) {
2008 return numeric_array{u64x2_x64v2_insert<FromElement, ToElement, ZeroMask>(lhs.v, rhs.v)};
2012 for (
size_t i = 0; i != N; ++i) {
2013 if ((ZeroMask >> i) & 1) {
2015 }
else if (i == ToElement) {
2016 r[i] = rhs[FromElement];
2035 static_assert(
sizeof...(Elements) <= N);
2037 if (!std::is_constant_evaluated()) {
2038 if constexpr (is_f32x4 && x86_64_v2) {
2040 }
else if constexpr (is_i32x4 && x86_64_v2) {
2042 }
else if constexpr (is_u32x4 && x86_64_v2) {
2044 }
else if constexpr (is_u64x2 and x86_64_v2) {
2050 swizzle_detail<0, Elements...>(r);
2054#define SWIZZLE(swizzle_name, D, ...) \
2055 [[nodiscard]] constexpr numeric_array swizzle_name() const noexcept requires(D == N) \
2057 return swizzle<__VA_ARGS__>(); \
2060#define SWIZZLE_4D_GEN1(name, ...) \
2061 SWIZZLE(name##0, 4, __VA_ARGS__, get_zero) \
2062 SWIZZLE(name##1, 4, __VA_ARGS__, get_one) \
2063 SWIZZLE(name##x, 4, __VA_ARGS__, 0) \
2064 SWIZZLE(name##y, 4, __VA_ARGS__, 1) \
2065 SWIZZLE(name##z, 4, __VA_ARGS__, 2) \
2066 SWIZZLE(name##w, 4, __VA_ARGS__, 3)
2068#define SWIZZLE_4D_GEN2(name, ...) \
2069 SWIZZLE_4D_GEN1(name##0, __VA_ARGS__, get_zero) \
2070 SWIZZLE_4D_GEN1(name##1, __VA_ARGS__, get_one) \
2071 SWIZZLE_4D_GEN1(name##x, __VA_ARGS__, 0) \
2072 SWIZZLE_4D_GEN1(name##y, __VA_ARGS__, 1) \
2073 SWIZZLE_4D_GEN1(name##z, __VA_ARGS__, 2) \
2074 SWIZZLE_4D_GEN1(name##w, __VA_ARGS__, 3)
2076#define SWIZZLE_4D_GEN3(name, ...) \
2077 SWIZZLE_4D_GEN2(name##0, __VA_ARGS__, get_zero) \
2078 SWIZZLE_4D_GEN2(name##1, __VA_ARGS__, get_one) \
2079 SWIZZLE_4D_GEN2(name##x, __VA_ARGS__, 0) \
2080 SWIZZLE_4D_GEN2(name##y, __VA_ARGS__, 1) \
2081 SWIZZLE_4D_GEN2(name##z, __VA_ARGS__, 2) \
2082 SWIZZLE_4D_GEN2(name##w, __VA_ARGS__, 3)
2084 SWIZZLE_4D_GEN3(_0, get_zero)
2085 SWIZZLE_4D_GEN3(_1, get_one)
2086 SWIZZLE_4D_GEN3(x, 0)
2087 SWIZZLE_4D_GEN3(y, 1)
2088 SWIZZLE_4D_GEN3(z, 2)
2089 SWIZZLE_4D_GEN3(w, 3)
2091#define SWIZZLE_3D_GEN1(name, ...) \
2092 SWIZZLE(name##0, 3, __VA_ARGS__, get_zero) \
2093 SWIZZLE(name##1, 3, __VA_ARGS__, get_one) \
2094 SWIZZLE(name##x, 3, __VA_ARGS__, 0) \
2095 SWIZZLE(name##y, 3, __VA_ARGS__, 1) \
2096 SWIZZLE(name##z, 3, __VA_ARGS__, 2)
2098#define SWIZZLE_3D_GEN2(name, ...) \
2099 SWIZZLE_3D_GEN1(name##0, __VA_ARGS__, get_zero) \
2100 SWIZZLE_3D_GEN1(name##1, __VA_ARGS__, get_one) \
2101 SWIZZLE_3D_GEN1(name##x, __VA_ARGS__, 0) \
2102 SWIZZLE_3D_GEN1(name##y, __VA_ARGS__, 1) \
2103 SWIZZLE_3D_GEN1(name##z, __VA_ARGS__, 2)
2105 SWIZZLE_3D_GEN2(_0, get_zero)
2106 SWIZZLE_3D_GEN2(_1, get_one)
2107 SWIZZLE_3D_GEN2(x, 0)
2108 SWIZZLE_3D_GEN2(y, 1)
2109 SWIZZLE_3D_GEN2(z, 2)
2111#define SWIZZLE_2D_GEN1(name, ...) \
2112 SWIZZLE(name##0, 2, __VA_ARGS__, get_zero) \
2113 SWIZZLE(name##1, 2, __VA_ARGS__, get_one) \
2114 SWIZZLE(name##x, 2, __VA_ARGS__, 0) \
2115 SWIZZLE(name##y, 2, __VA_ARGS__, 1)
2117 SWIZZLE_2D_GEN1(_0, get_zero)
2118 SWIZZLE_2D_GEN1(_1, get_one)
2119 SWIZZLE_2D_GEN1(x, 0)
2120 SWIZZLE_2D_GEN1(y, 1)
2123#undef SWIZZLE_4D_GEN1
2124#undef SWIZZLE_4D_GEN2
2125#undef SWIZZLE_4D_GEN3
2126#undef SWIZZLE_3D_GEN1
2127#undef SWIZZLE_3D_GEN2
2128#undef SWIZZLE_2D_GEN1
2131 template<
int I,
typename First,
typename... Rest>
2134 for (
size_t j = 0; j != N; ++j) {
2138 if constexpr (
sizeof...(Rest) != 0) {
2139 transpose_detail<I + 1, Rest...>(rest..., r);
2143 template<ssize_t I, ssize_t FirstElement, ssize_t... RestElements>
2144 constexpr void swizzle_detail(numeric_array &r)
const noexcept
2146 static_assert(I < narrow_cast<ssize_t>(N));
2147 static_assert(FirstElement >= -2 && FirstElement < narrow_cast<ssize_t>(N),
"Index out of bounds");
2149 get<I>(r) = get<FirstElement>(*
this);
2150 if constexpr (
sizeof...(RestElements) != 0) {
2151 swizzle_detail<I + 1, RestElements...>(r);