47 using value_type =
typename container_type::value_type;
48 using size_type =
typename container_type::size_type;
49 using difference_type =
typename container_type::difference_type;
50 using reference =
typename container_type::reference;
51 using const_reference =
typename container_type::const_reference;
52 using pointer =
typename container_type::pointer;
53 using const_pointer =
typename container_type::const_pointer;
54 using iterator =
typename container_type::iterator;
55 using const_iterator =
typename container_type::const_iterator;
57 constexpr static bool is_i8x1 = std::is_same_v<T, int8_t> && N == 1;
58 constexpr static bool is_i8x2 = std::is_same_v<T, int8_t> && N == 2;
59 constexpr static bool is_i8x4 = std::is_same_v<T, int8_t> && N == 4;
60 constexpr static bool is_i8x8 = std::is_same_v<T, int8_t> && N == 8;
61 constexpr static bool is_i8x16 = std::is_same_v<T, int8_t> && N == 16;
62 constexpr static bool is_i8x32 = std::is_same_v<T, int8_t> && N == 32;
63 constexpr static bool is_i8x64 = std::is_same_v<T, int8_t> && N == 64;
64 constexpr static bool is_u8x1 = std::is_same_v<T, uint8_t> && N == 1;
65 constexpr static bool is_u8x2 = std::is_same_v<T, uint8_t> && N == 2;
66 constexpr static bool is_u8x4 = std::is_same_v<T, uint8_t> && N == 4;
67 constexpr static bool is_u8x8 = std::is_same_v<T, uint8_t> && N == 8;
68 constexpr static bool is_u8x16 = std::is_same_v<T, uint8_t> && N == 16;
69 constexpr static bool is_u8x32 = std::is_same_v<T, uint8_t> && N == 32;
70 constexpr static bool is_u8x64 = std::is_same_v<T, uint8_t> && N == 64;
72 constexpr static bool is_i16x1 = std::is_same_v<T, int16_t> && N == 1;
73 constexpr static bool is_i16x2 = std::is_same_v<T, int16_t> && N == 2;
74 constexpr static bool is_i16x4 = std::is_same_v<T, int16_t> && N == 4;
75 constexpr static bool is_i16x8 = std::is_same_v<T, int16_t> && N == 8;
76 constexpr static bool is_i16x16 = std::is_same_v<T, int16_t> && N == 16;
77 constexpr static bool is_i16x32 = std::is_same_v<T, int16_t> && N == 32;
78 constexpr static bool is_u16x1 = std::is_same_v<T, uint16_t> && N == 1;
79 constexpr static bool is_u16x2 = std::is_same_v<T, uint16_t> && N == 2;
80 constexpr static bool is_u16x4 = std::is_same_v<T, uint16_t> && N == 4;
81 constexpr static bool is_u16x8 = std::is_same_v<T, uint16_t> && N == 8;
82 constexpr static bool is_u16x16 = std::is_same_v<T, uint16_t> && N == 16;
83 constexpr static bool is_u16x32 = std::is_same_v<T, uint16_t> && N == 32;
85 constexpr static bool is_i32x1 = std::is_same_v<T, int32_t> && N == 1;
86 constexpr static bool is_i32x2 = std::is_same_v<T, int32_t> && N == 2;
87 constexpr static bool is_i32x4 = std::is_same_v<T, int32_t> && N == 4;
88 constexpr static bool is_i32x8 = std::is_same_v<T, int32_t> && N == 8;
89 constexpr static bool is_i32x16 = std::is_same_v<T, int32_t> && N == 16;
90 constexpr static bool is_u32x1 = std::is_same_v<T, uint32_t> && N == 1;
91 constexpr static bool is_u32x2 = std::is_same_v<T, uint32_t> && N == 2;
92 constexpr static bool is_u32x4 = std::is_same_v<T, uint32_t> && N == 4;
93 constexpr static bool is_u32x8 = std::is_same_v<T, uint32_t> && N == 8;
94 constexpr static bool is_u32x16 = std::is_same_v<T, uint32_t> && N == 16;
95 constexpr static bool is_f32x1 = std::is_same_v<T, float> && N == 1;
96 constexpr static bool is_f32x2 = std::is_same_v<T, float> && N == 2;
97 constexpr static bool is_f32x4 = std::is_same_v<T, float> && N == 4;
98 constexpr static bool is_f32x8 = std::is_same_v<T, float> && N == 8;
99 constexpr static bool is_f32x16 = std::is_same_v<T, float> && N == 16;
101 constexpr static bool is_i64x1 = std::is_same_v<T, int64_t> && N == 1;
102 constexpr static bool is_i64x2 = std::is_same_v<T, int64_t> && N == 2;
103 constexpr static bool is_i64x4 = std::is_same_v<T, int64_t> && N == 4;
104 constexpr static bool is_i64x8 = std::is_same_v<T, int64_t> && N == 8;
105 constexpr static bool is_u64x1 = std::is_same_v<T, uint64_t> && N == 1;
106 constexpr static bool is_u64x2 = std::is_same_v<T, uint64_t> && N == 2;
107 constexpr static bool is_u64x4 = std::is_same_v<T, uint64_t> && N == 4;
108 constexpr static bool is_u64x8 = std::is_same_v<T, uint64_t> && N == 8;
109 constexpr static bool is_f64x1 = std::is_same_v<T, double> && N == 1;
110 constexpr static bool is_f64x2 = std::is_same_v<T, double> && N == 2;
111 constexpr static bool is_f64x4 = std::is_same_v<T, double> && N == 4;
112 constexpr static bool is_f64x8 = std::is_same_v<T, double> && N == 8;
120 template<arithmetic U,
size_t M>
123 if (!std::is_constant_evaluated()) {
124 if constexpr (x86_64_v2 and is_f64x2 and other.is_i32x4) {
127 }
else if constexpr (x86_64_v2 and is_f32x4 and other.is_i32x4) {
130 }
else if constexpr (x86_64_v2 and is_i64x4 and other.is_i32x4) {
133 }
else if constexpr (x86_64_v2 and is_i64x4 and other.is_i16x8) {
136 }
else if constexpr (x86_64_v2 and is_i32x4 and other.is_i16x8) {
139 }
else if constexpr (x86_64_v2 and is_i64x2 and other.is_i8x16) {
142 }
else if constexpr (x86_64_v2 and is_i32x4 and other.is_f32x4) {
145 }
else if constexpr (x86_64_v2 and is_i32x4 and other.is_i8x16) {
148 }
else if constexpr (x86_64_v2 and is_i16x8 and other.is_i8x16) {
151 }
else if constexpr (x86_64_v2_5 and is_f64x4 and other.is_f32x4) {
154 }
else if constexpr (x86_64_v2_5 and is_f64x4 and other.is_i32x4) {
157 }
else if constexpr (x86_64_v2_5 and is_f32x4 and other.is_f64x4) {
160 }
else if constexpr (x86_64_v2_5 and is_i32x4 and other.is_f64x4) {
163 }
else if constexpr (x86_64_v2_5 and is_i32x8 and other.is_f32x8) {
166 }
else if constexpr (x86_64_v2_5 and is_f32x8 and other.is_i32x8) {
172 for (
size_t i = 0; i != N; ++i) {
174 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
176 v[i] =
static_cast<value_type
>(
std::round(other[i]));
178 v[i] =
static_cast<value_type
>(other[i]);
186 template<arithmetic U,
size_t M>
191 if (!std::is_constant_evaluated()) {
192 if constexpr (x86_64_v2_5 and is_f32x8 and other1.is_f32x4 and other2.is_f32x4) {
193 *
this =
numeric_array{_mm256_set_m128(other2.reg(), other1.reg())};
195 }
else if constexpr (x86_64_v2_5 and is_f64x4 and other1.is_f64x2 and other2.is_f64x2) {
196 *
this =
numeric_array{_mm256_set_m128d(other2.reg(), other1.reg())};
198 }
else if constexpr (
199 x86_64_v2_5 and std::is_integral_v<T> and std::is_integral_v<U> and (
sizeof(T) * N == 32) and
200 (
sizeof(U) * M == 16)) {
201 *
this =
numeric_array{_mm256_set_m128i(other2.reg(), other1.reg())};
203 }
else if constexpr (x86_64_v2 and is_i16x8 and other1.is_i32x4 and other2.is_i32x4) {
204 *
this =
numeric_array{_mm_packs_epi32(other2.reg(), other1.reg())};
206 }
else if constexpr (x86_64_v2 and is_i8x16 and other1.is_i16x8 and other2.is_i16x8) {
207 *
this =
numeric_array{_mm_packs_epi16(other2.reg(), other1.reg())};
209 }
else if constexpr (x86_64_v2 and is_u16x8 and other1.is_u32x4 and other2.is_u32x4) {
210 *
this =
numeric_array{_mm_packus_epu32(other2.reg(), other1.reg())};
212 }
else if constexpr (x86_64_v2 and is_u8x16 and other1.is_u16x8 and other2.is_u16x8) {
213 *
this =
numeric_array{_mm_packus_epu16(other2.reg(), other1.reg())};
218 for (
size_t i = 0; i != N; ++i) {
220 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
222 v[i] =
static_cast<value_type
>(
std::round(other1[i]));
224 v[i] =
static_cast<value_type
>(other1[i]);
226 }
else if (i < M * 2) {
227 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
229 v[i] =
static_cast<value_type
>(
std::round(other2[i - M]));
231 v[i] =
static_cast<value_type
>(other2[i - M]);
251 "Expecting the std:initializer_list size to be <= to the size of the numeric array");
262 requires(
sizeof...(Rest) + 2 <= N)
263 [[nodiscard]]
constexpr numeric_array(T
const &first, T
const &second, Rest
const &...rest) noexcept :
268 [[nodiscard]]
static constexpr numeric_array broadcast(T rhs)
noexcept
271 for (
size_t i = 0; i != N; ++i) {
290 [[nodiscard]] __m128i reg()
const noexcept requires(x86_64_v2 and std::is_integral_v<T> and
sizeof(T) * N == 16)
292 return _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(v.
data()));
295 [[nodiscard]] __m128 reg()
const noexcept requires(x86_64_v2 and is_f32x4)
297 return _mm_loadu_ps(v.
data());
300 [[nodiscard]] __m128d reg()
const noexcept requires(x86_64_v2 and is_f64x2)
302 return _mm_loadu_pd(v.
data());
305 [[nodiscard]]
explicit numeric_array(__m128i
const &rhs)
noexcept
306 requires(x86_64_v2 and std::is_integral_v<T> and
sizeof(T) * N == 16)
308 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
311 [[nodiscard]]
explicit numeric_array(__m128
const &rhs)
noexcept requires(x86_64_v2 and is_f32x4)
313 _mm_storeu_ps(v.
data(), rhs);
316 [[nodiscard]]
explicit numeric_array(__m128d
const &rhs)
noexcept requires(x86_64_v2 and is_f64x2)
318 _mm_storeu_pd(v.
data(), rhs);
321 numeric_array &operator=(__m128i
const &rhs)
noexcept requires(x86_64_v2 and std::is_integral_v<T> and
sizeof(T) * N == 16)
323 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
327 numeric_array &operator=(__m128
const &rhs)
noexcept requires(x86_64_v2 and is_f32x4)
329 _mm_storeu_ps(v.
data(), rhs);
333 numeric_array &operator=(__m128d
const &rhs)
noexcept requires(x86_64_v2 and is_f64x2)
335 _mm_storeu_pd(v.
data(), rhs);
339 [[nodiscard]] __m256i reg()
const noexcept requires(x86_64_v2_5 and std::is_integral_v<T> and
sizeof(T) * N == 32)
341 return _mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(v.
data()));
344 [[nodiscard]] __m256 reg()
const noexcept requires(x86_64_v2_5 and is_f32x8)
346 return _mm256_loadu_ps(v.
data());
349 [[nodiscard]] __m256d reg()
const noexcept requires(x86_64_v2_5 and is_f64x4)
351 return _mm256_loadu_pd(v.
data());
354 [[nodiscard]]
explicit numeric_array(__m256i
const &rhs)
noexcept
355 requires(x86_64_v2_5 and std::is_integral_v<T> and
sizeof(T) * N == 32)
357 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
360 [[nodiscard]]
explicit numeric_array(__m256
const &rhs)
noexcept requires(x86_64_v2_5 and is_f32x8)
362 _mm256_storeu_ps(v.
data(), rhs);
365 [[nodiscard]]
explicit numeric_array(__m256d
const &rhs)
noexcept requires(x86_64_v2_5 and is_f64x4)
367 _mm256_storeu_pd(v.
data(), rhs);
370 numeric_array &operator=(__m256i
const &rhs)
noexcept requires(x86_64_v2_5 and std::is_integral_v<T> and
sizeof(T) * N == 32)
372 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
376 numeric_array &operator=(__m256
const &rhs)
noexcept requires(x86_64_v2_5 and is_f32x8)
378 _mm256_storeu_ps(v.
data(), rhs);
382 numeric_array &operator=(__m256d
const &rhs)
noexcept requires(x86_64_v2_5 and is_f64x4)
384 _mm256_storeu_pd(v.
data(), rhs);
388 template<
typename Other>
389 requires(
sizeof(Other) ==
sizeof(
numeric_array)) [[nodiscard]]
constexpr friend Other
392 using rhs_value_type =
typename std::remove_cvref_t<
decltype(rhs)>::value_type;
394 if (not std::is_constant_evaluated()) {
395 if constexpr (Other::is_f32x4 and std::is_integral_v<rhs_value_type> and x86_64_v2) {
396 return Other{_mm_castsi128_ps(rhs.reg())};
397 }
else if constexpr (Other::is_f32x4 and rhs.is_f64x2 and x86_64_v2) {
398 return Other{_mm_castpd_ps(rhs.reg())};
399 }
else if constexpr (Other::is_f64x2 and std::is_integral_v<rhs_value_type> and x86_64_v2) {
400 return Other{_mm_castsi128_pd(rhs.reg())};
401 }
else if constexpr (Other::is_f64x2 and rhs.is_f32x4 and x86_64_v2) {
402 return Other{_mm_castps_pd(rhs.reg())};
403 }
else if constexpr (std::is_integral_v<Other::value_type> and rhs.is_f32x4 and x86_64_v2) {
404 return Other{_mm_castps_si128(rhs.reg())};
405 }
else if constexpr (std::is_integral_v<Other::value_type> and rhs.is_f64x2 and x86_64_v2) {
406 return Other{_mm_castpd_si128(rhs.reg())};
407 }
else if constexpr (std::is_integral_v<Other::value_type> and std::is_integral_v<rhs_value_type> and x86_64_v2) {
408 return Other{rhs.reg()};
411 return std::bit_cast<Other>(rhs);
418 if (not std::is_constant_evaluated()) {
419 if constexpr (x86_64_v2 and is_f64x2) {
421 }
else if constexpr (x86_64_v2 and is_f32x4) {
423 }
else if constexpr (x86_64_v2 and is_i64x2) {
425 }
else if constexpr (x86_64_v2 and is_i32x4) {
427 }
else if constexpr (x86_64_v2 and is_i16x8) {
429 }
else if constexpr (x86_64_v2 and is_i8x16) {
435 for (
size_t i = 0; i != N; ++i) {
436 r[i] = (i % 2 == 0) ? a[i / 2] : b[i / 2];
477 constexpr void store(std::byte *ptr)
const noexcept
485 constexpr void store(std::byte *ptr)
const noexcept
487 store<sizeof(*this)>(ptr);
490 [[nodiscard]]
constexpr T
const &operator[](
size_t i)
const noexcept
492 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
497 [[nodiscard]]
constexpr T &operator[](
size_t i)
noexcept
499 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
504 [[nodiscard]]
constexpr reference front() noexcept
509 [[nodiscard]]
constexpr const_reference front() const noexcept
514 [[nodiscard]]
constexpr reference back() noexcept
519 [[nodiscard]]
constexpr const_reference back() const noexcept
524 [[nodiscard]]
constexpr pointer data() noexcept
529 [[nodiscard]]
constexpr const_pointer data() const noexcept
534 [[nodiscard]]
constexpr iterator begin() noexcept
539 [[nodiscard]]
constexpr const_iterator begin() const noexcept
544 [[nodiscard]]
constexpr const_iterator cbegin() const noexcept
549 [[nodiscard]]
constexpr iterator end() noexcept
554 [[nodiscard]]
constexpr const_iterator end() const noexcept
559 [[nodiscard]]
constexpr const_iterator cend() const noexcept
564 [[nodiscard]]
constexpr bool empty() const noexcept
569 [[nodiscard]]
constexpr size_type size() const noexcept
574 [[nodiscard]]
constexpr size_type max_size() const noexcept
579 constexpr bool is_point() const noexcept
581 return v.
back() != T{};
584 constexpr bool is_vector() const noexcept
586 return v.
back() == T{};
589 constexpr bool is_opaque() const noexcept
594 constexpr bool is_transparent() const noexcept
599 [[nodiscard]]
constexpr T
const &x() const noexcept requires(N >= 1)
601 return std::get<0>(v);
604 [[nodiscard]]
constexpr T
const &y() const noexcept requires(N >= 2)
606 return std::get<1>(v);
609 [[nodiscard]]
constexpr T
const &z() const noexcept requires(N >= 3)
611 return std::get<2>(v);
614 [[nodiscard]]
constexpr T
const &w() const noexcept requires(N >= 4)
616 return std::get<3>(v);
619 [[nodiscard]]
constexpr T &x() noexcept requires(N >= 1)
621 return std::get<0>(v);
624 [[nodiscard]]
constexpr T &y() noexcept requires(N >= 2)
626 return std::get<1>(v);
629 [[nodiscard]]
constexpr T &z() noexcept requires(N >= 3)
631 return std::get<2>(v);
634 [[nodiscard]]
constexpr T &w() noexcept requires(N >= 4)
636 return std::get<3>(v);
639 [[nodiscard]]
constexpr T
const &r() const noexcept requires(N >= 1)
641 return std::get<0>(v);
644 [[nodiscard]]
constexpr T
const &g() const noexcept requires(N >= 2)
646 return std::get<1>(v);
649 [[nodiscard]]
constexpr T
const &b() const noexcept requires(N >= 3)
651 return std::get<2>(v);
654 [[nodiscard]]
constexpr T
const &a() const noexcept requires(N >= 4)
656 return std::get<3>(v);
659 [[nodiscard]]
constexpr T &r() noexcept requires(N >= 1)
661 return std::get<0>(v);
664 [[nodiscard]]
constexpr T &g() noexcept requires(N >= 2)
666 return std::get<1>(v);
669 [[nodiscard]]
constexpr T &b() noexcept requires(N >= 3)
671 return std::get<2>(v);
674 [[nodiscard]]
constexpr T &a() noexcept requires(N >= 4)
676 return std::get<3>(v);
679 [[nodiscard]]
constexpr T
const &width() const noexcept requires(N >= 1)
681 return std::get<0>(v);
684 [[nodiscard]]
constexpr T
const &height() const noexcept requires(N >= 2)
686 return std::get<1>(v);
689 [[nodiscard]]
constexpr T
const &depth() const noexcept requires(N >= 3)
691 return std::get<2>(v);
694 [[nodiscard]]
constexpr T &width() noexcept requires(N >= 1)
696 return std::get<0>(v);
699 [[nodiscard]]
constexpr T &height() noexcept requires(N >= 2)
701 return std::get<1>(v);
704 [[nodiscard]]
constexpr T &depth() noexcept requires(N >= 3)
706 return std::get<2>(v);
709 constexpr numeric_array &operator<<=(
unsigned int rhs)
noexcept
711 return *
this = *
this << rhs;
714 constexpr numeric_array &operator>>=(
unsigned int rhs)
noexcept
716 return *
this = *
this >> rhs;
719 constexpr numeric_array &operator|=(numeric_array
const &rhs)
noexcept
721 return *
this = *
this | rhs;
724 constexpr numeric_array &operator|=(T
const &rhs)
noexcept
726 return *
this = *
this | rhs;
729 constexpr numeric_array &operator&=(numeric_array
const &rhs)
noexcept
731 return *
this = *
this & rhs;
734 constexpr numeric_array &operator&=(T
const &rhs)
noexcept
736 return *
this = *
this & rhs;
739 constexpr numeric_array &operator^=(numeric_array
const &rhs)
noexcept
741 return *
this = *
this ^ rhs;
744 constexpr numeric_array &operator^=(T
const &rhs)
noexcept
746 return *
this = *
this ^ rhs;
749 constexpr numeric_array &operator+=(numeric_array
const &rhs)
noexcept
751 return *
this = *
this + rhs;
754 constexpr numeric_array &operator+=(T
const &rhs)
noexcept
756 return *
this = *
this + rhs;
759 constexpr numeric_array &operator-=(numeric_array
const &rhs)
noexcept
761 return *
this = *
this - rhs;
764 constexpr numeric_array &operator-=(T
const &rhs)
noexcept
766 return *
this = *
this - rhs;
769 constexpr numeric_array &operator*=(numeric_array
const &rhs)
noexcept
771 return *
this = *
this * rhs;
774 constexpr numeric_array &operator*=(T
const &rhs)
noexcept
776 return *
this = *
this * rhs;
779 constexpr numeric_array &operator/=(numeric_array
const &rhs)
noexcept
781 return *
this = *
this / rhs;
784 constexpr numeric_array &operator/=(T
const &rhs)
noexcept
786 return *
this = *
this / rhs;
789 constexpr numeric_array &operator%=(numeric_array
const &rhs)
noexcept
791 return *
this = *
this % rhs;
794 constexpr numeric_array &operator%=(T
const &rhs)
noexcept
796 return *
this = *
this % rhs;
799 constexpr static ssize_t get_zero = -1;
800 constexpr static ssize_t get_one = -2;
809 static_assert(I < N,
"Index out of bounds");
810 return std::get<I>(rhs.v);
821 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
822 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
823 if constexpr (I == get_zero) {
825 }
else if constexpr (I == get_one) {
828 return std::get<I>(rhs.v);
840 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
841 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
842 if constexpr (I == get_zero) {
844 }
else if constexpr (I == get_one) {
847 return std::get<I>(rhs.v);
855 template<
size_t Mask = ~size_t{0}>
858 if (!std::is_constant_evaluated()) {
859 if constexpr (is_f32x4 && x86_64_v2) {
865 for (
size_t i = 0; i != N; ++i) {
866 if (
static_cast<bool>((Mask >> i) & 1)) {
879 template<
size_t Mask = ~size_t{0}>
882 if (!std::is_constant_evaluated()) {
883 if constexpr (is_f32x4 && x86_64_v2) {
889 for (
size_t i = 0; i != N; ++i) {
890 if (
static_cast<bool>((Mask >> i) & 1)) {
902 for (
size_t i = 0; i != N; ++i) {
904 r.v[i] = T{} - rhs.v[i];
909 [[nodiscard]]
friend constexpr numeric_array abs(numeric_array
const &rhs)
noexcept
913 auto r = numeric_array{};
914 for (
size_t i = 0; i != N; ++i) {
915 r.v[i] = rhs.v[i] < T{} ? neg_rhs.v[i] : rhs.v[i];
920 [[nodiscard]]
friend constexpr numeric_array rcp(numeric_array
const &rhs)
noexcept
922 if (!std::is_constant_evaluated()) {
923 if constexpr (is_f32x4 and x86_64_v2) {
924 return numeric_array{_mm_rcp_ps(rhs.reg())};
928 auto r = numeric_array{};
929 for (
size_t i = 0; i != N; ++i) {
930 r[i] = 1.0f / rhs.v[i];
935 [[nodiscard]]
friend constexpr numeric_array sqrt(numeric_array
const &rhs)
noexcept
937 if (!std::is_constant_evaluated()) {
938 if constexpr (is_f32x4 and x86_64_v2) {
939 return numeric_array{_mm_sqrt_ps(rhs.reg())};
943 auto r = numeric_array{};
944 for (
size_t i = 0; i != N; ++i) {
950 [[nodiscard]]
friend constexpr numeric_array rcp_sqrt(numeric_array
const &rhs)
noexcept
952 if (!std::is_constant_evaluated()) {
953 if constexpr (is_f32x4 and x86_64_v2) {
954 return numeric_array{_mm_rcp_sqrt_ps(rhs.reg())};
958 auto r = numeric_array{};
959 for (
size_t i = 0; i != N; ++i) {
965 [[nodiscard]]
friend constexpr numeric_array floor(numeric_array
const &rhs)
noexcept
967 if (!std::is_constant_evaluated()) {
968 if constexpr (is_f32x4 and x86_64_v2) {
969 return numeric_array{_mm_floor_ps(rhs.reg())};
973 auto r = numeric_array{};
974 for (
size_t i = 0; i != N; ++i) {
980 [[nodiscard]]
friend constexpr numeric_array ceil(numeric_array
const &rhs)
noexcept
982 if (!std::is_constant_evaluated()) {
983 if constexpr (is_f32x4 and x86_64_v2) {
984 return numeric_array{_mm_ceil_ps(rhs.reg())};
988 auto r = numeric_array{};
989 for (
size_t i = 0; i != N; ++i) {
995 [[nodiscard]]
friend constexpr numeric_array round(numeric_array
const &rhs)
noexcept
997 if (!std::is_constant_evaluated()) {
998 if constexpr (is_f32x4 and x86_64_v2) {
999 return numeric_array{_mm_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1003 auto r = numeric_array{};
1004 for (
size_t i = 0; i != N; ++i) {
1017 template<
size_t Mask>
1020 if (!std::is_constant_evaluated()) {
1021 if constexpr (is_f32x4 and x86_64_v2) {
1022 return f32x4_x64v2_dot<Mask>(lhs.v, rhs.v);
1027 for (
size_t i = 0; i != N; ++i) {
1028 if (
static_cast<bool>(Mask & (1_uz << i))) {
1029 r += lhs.v[i] * rhs.v[i];
1042 template<
size_t Mask>
1045 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1046 return f32x4_x64v2_hypot<Mask>(rhs.v);
1058 template<
size_t Mask>
1061 return dot<Mask>(rhs, rhs);
1070 template<
size_t Mask>
1073 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1074 return f32x4_x64v2_rcp_hypot<Mask>(rhs.v);
1077 return 1.0f / hypot<Mask>(rhs);
1088 template<
size_t Mask>
1091 tt_axiom(rhs.is_vector());
1093 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1097 ttlet rcp_hypot_ = rcp_hypot<Mask>(rhs);
1100 for (
size_t i = 0; i != N; ++i) {
1101 if (
static_cast<bool>(Mask & (1_uz << i))) {
1102 r.v[i] = rhs.v[i] * rcp_hypot_;
1109 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1111 if (!std::is_constant_evaluated()) {
1112 if constexpr (is_f32x4 and x86_64_v2) {
1113 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpeq_ps(lhs.reg(), rhs.reg())));
1118 for (
size_t i = 0; i != N; ++i) {
1119 r |=
static_cast<unsigned int>(lhs.v[i] == rhs.v[i]) << i;
1124 [[nodiscard]]
friend constexpr unsigned int ne(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1125 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1127 if (!std::is_constant_evaluated()) {
1128 if constexpr (is_f32x4 and x86_64_v2) {
1129 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpne_ps(lhs.reg(), rhs.reg())));
1133 for (
size_t i = 0; i != N; ++i) {
1134 r |=
static_cast<unsigned int>(lhs.v[i] != rhs.v[i]) << i;
1139 [[nodiscard]]
friend constexpr unsigned int lt(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1140 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1142 if (!std::is_constant_evaluated()) {
1143 if constexpr (is_f32x4 and x86_64_v2) {
1144 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmplt_ps(lhs.reg(), rhs.reg())));
1148 for (
size_t i = 0; i != N; ++i) {
1149 r |=
static_cast<unsigned int>(lhs.v[i] < rhs.v[i]) << i;
1154 [[nodiscard]]
friend constexpr unsigned int gt(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1155 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1157 if (!std::is_constant_evaluated()) {
1158 if constexpr (is_f32x4 and x86_64_v2) {
1159 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpgt_ps(lhs.reg(), rhs.reg())));
1163 for (
size_t i = 0; i != N; ++i) {
1164 r |=
static_cast<unsigned int>(lhs.v[i] > rhs.v[i]) << i;
1169 [[nodiscard]]
friend constexpr unsigned int le(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1170 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1172 if (!std::is_constant_evaluated()) {
1173 if constexpr (is_f32x4 and x86_64_v2) {
1174 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmple_ps(lhs.reg(), rhs.reg())));
1178 for (
size_t i = 0; i != N; ++i) {
1179 r |=
static_cast<unsigned int>(lhs.v[i] <= rhs.v[i]) << i;
1184 [[nodiscard]]
friend constexpr unsigned int ge(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1185 requires(N <=
sizeof(
unsigned int) * CHAR_BIT)
1187 if (!std::is_constant_evaluated()) {
1188 if constexpr (is_f32x4 and x86_64_v2) {
1189 return static_cast<unsigned int>(_mm_movemask_ps(_mm_cmpge_ps(lhs.reg(), rhs.reg())));
1193 for (
size_t i = 0; i != N; ++i) {
1194 r |=
static_cast<unsigned int>(lhs.v[i] >= rhs.v[i]) << i;
1199 [[nodiscard]]
friend constexpr numeric_array gt_mask(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1201 if (not std::is_constant_evaluated()) {
1202 if constexpr (is_f32x4 and x86_64_v2) {
1203 return numeric_array{_mm_cmpgt_ps(lhs.reg(), rhs.reg())};
1204 }
else if constexpr (is_i64x4 and x86_64_v2) {
1205 return numeric_array{_mm_cmpgt_epi64(lhs.reg(), rhs.reg())};
1206 }
else if constexpr (is_i32x4 and x86_64_v2) {
1207 return numeric_array{_mm_cmpgt_epi32(lhs.reg(), rhs.reg())};
1208 }
else if constexpr (is_i16x4 and x86_64_v2) {
1209 return numeric_array{_mm_cmpgt_epi16(lhs.reg(), rhs.reg())};
1213 auto r = numeric_array{};
1214 for (
size_t i = 0; i != N; ++i) {
1215 if constexpr (
sizeof(value_type) == 4) {
1216 r[i] = std::bit_cast<value_type>((
static_cast<int32_t
>(lhs.v[i] > rhs.v[i]) << 31) >> 31);
1218 tt_static_not_implemented();
1224 [[nodiscard]]
friend constexpr numeric_array ge_mask(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1226 if (not std::is_constant_evaluated()) {
1227 if constexpr (is_f32x4 and x86_64_v2) {
1228 return numeric_array{_mm_cmpge_ps(lhs.reg(), rhs.reg())};
1232 auto r = numeric_array{};
1233 for (
size_t i = 0; i != N; ++i) {
1234 if constexpr (
sizeof(value_type) == 4) {
1235 r[i] = std::bit_cast<value_type>((
static_cast<int32_t
>(lhs.v[i] >= rhs.v[i]) << 31) >> 31);
1237 tt_static_not_implemented();
1243 [[nodiscard]]
friend constexpr bool operator==(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1245 if (!std::is_constant_evaluated()) {
1246 if constexpr (is_f32x4 && x86_64_v2) {
1248 return f32x4_x64v2_eq(lhs.v, rhs.v);
1253 for (
size_t i = 0; i != N; ++i) {
1254 r &= (lhs.v[i] == rhs.v[i]);
1259 [[nodiscard]]
friend constexpr bool operator!=(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1261 return !(lhs == rhs);
1264 [[nodiscard]]
friend constexpr numeric_array operator<<(numeric_array
const &lhs,
unsigned int rhs)
noexcept
1266 if (not std::is_constant_evaluated()) {
1267 if constexpr (x86_64_v2 and is_i64x2) {
1268 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1269 }
else if constexpr (x86_64_v2 and is_i32x4) {
1270 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1271 }
else if constexpr (x86_64_v2 and is_i16x8) {
1272 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1273 }
else if constexpr (x86_64_v2 and is_u64x2) {
1274 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1275 }
else if constexpr (x86_64_v2 and is_u32x4) {
1276 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1277 }
else if constexpr (x86_64_v2 and is_u16x8) {
1278 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1282 auto r = numeric_array{};
1283 for (
size_t i = 0; i != N; ++i) {
1284 r.v[i] = lhs.v[i] << rhs;
1289 [[nodiscard]]
friend constexpr numeric_array operator>>(numeric_array
const &lhs,
unsigned int rhs)
noexcept
1291 if (not std::is_constant_evaluated()) {
1292 if constexpr (x86_64_v2 and is_i32x4) {
1293 return numeric_array{_mm_srai_epi32(lhs.reg(), rhs)};
1294 }
else if constexpr (x86_64_v2 and is_i16x8) {
1295 return numeric_array{_mm_srai_epi16(lhs.reg(), rhs)};
1296 }
else if constexpr (x86_64_v2 and is_u64x2) {
1297 return numeric_array{_mm_srli_epi64(lhs.reg(), rhs)};
1298 }
else if constexpr (x86_64_v2 and is_u32x4) {
1299 return numeric_array{_mm_srli_epi32(lhs.reg(), rhs)};
1300 }
else if constexpr (x86_64_v2 and is_u16x8) {
1301 return numeric_array{_mm_srli_epi16(lhs.reg(), rhs)};
1305 auto r = numeric_array{};
1306 for (
size_t i = 0; i != N; ++i) {
1307 r.v[i] = lhs.v[i] >> rhs;
1312 [[nodiscard]]
friend constexpr numeric_array operator|(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1314 if (!std::is_constant_evaluated()) {
1315 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1316 return numeric_array{_mm_or_si128(lhs.reg(), rhs.reg())};
1319 auto r = numeric_array{};
1320 for (
size_t i = 0; i != N; ++i) {
1321 r.v[i] = lhs.v[i] | rhs.v[i];
1326 [[nodiscard]]
friend constexpr numeric_array operator|(numeric_array
const &lhs, T
const &rhs)
noexcept
1328 return lhs | broadcast(rhs);
1331 [[nodiscard]]
friend constexpr numeric_array operator|(T
const &lhs, numeric_array
const &rhs)
noexcept
1333 return broadcast(lhs) | rhs;
1336 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1338 if (!std::is_constant_evaluated()) {
1339 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1340 return numeric_array{_mm_and_si128(lhs.reg(), rhs.reg())};
1343 auto r = numeric_array{};
1344 for (
size_t i = 0; i != N; ++i) {
1345 r.v[i] = lhs.v[i] & rhs.v[i];
1350 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const &lhs, T
const &rhs)
noexcept
1352 return lhs & broadcast(rhs);
1355 [[nodiscard]]
friend constexpr numeric_array operator&(T
const &lhs, numeric_array
const &rhs)
noexcept
1357 return broadcast(lhs) & rhs;
1360 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1362 if (!std::is_constant_evaluated()) {
1363 if constexpr (std::is_integral_v<T> and x86_64_v2) {
1364 return numeric_array{_mm_xor_si128(lhs.reg(), rhs.reg())};
1367 auto r = numeric_array{};
1368 for (
size_t i = 0; i != N; ++i) {
1369 r.v[i] = lhs.v[i] ^ rhs.v[i];
1374 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const &lhs, T
const &rhs)
noexcept
1376 return lhs ^ broadcast(rhs);
1379 [[nodiscard]]
friend constexpr numeric_array operator^(T
const &lhs, numeric_array
const &rhs)
noexcept
1381 return broadcast(lhs) ^ rhs;
1384 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1386 if (!std::is_constant_evaluated()) {
1387 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1388 return numeric_array{_mm256_add_ps(lhs.reg(), rhs.reg())};
1392 auto r = numeric_array{};
1393 for (
size_t i = 0; i != N; ++i) {
1394 r.v[i] = lhs.v[i] + rhs.v[i];
1399 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const &lhs, T
const &rhs)
noexcept
1401 return lhs + broadcast(rhs);
1404 [[nodiscard]]
friend constexpr numeric_array operator+(T
const &lhs, numeric_array
const &rhs)
noexcept
1406 return broadcast(lhs) + rhs;
1409 [[nodiscard]]
friend constexpr numeric_array hadd(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1411 if (!std::is_constant_evaluated()) {
1412 if constexpr (is_f64x2 and x86_64_v2) {
1413 return numeric_array{_mm_hadd_pd(lhs.reg(), rhs.reg())};
1414 }
else if constexpr (is_f32x4 and x86_64_v2) {
1415 return numeric_array{_mm_hadd_ps(lhs.reg(), rhs.reg())};
1416 }
else if constexpr (is_i32x4 and x86_64_v2) {
1417 return numeric_array{_mm_hadd_epi32(lhs.reg(), rhs.reg())};
1418 }
else if constexpr (is_i16x8 and x86_64_v2) {
1419 return numeric_array{_mm_hadd_epi16(lhs.reg(), rhs.reg())};
1420 }
else if constexpr (is_i8x16 and x86_64_v2) {
1421 return numeric_array{_mm_hadd_epi8(lhs.reg(), rhs.reg())};
1425 tt_axiom(N % 2 == 0);
1427 auto r = numeric_array{};
1431 while (src_i != N) {
1432 auto tmp = lhs[src_i++];
1433 tmp += lhs[src_i++];
1438 while (src_i != N) {
1439 auto tmp = rhs[src_i++];
1440 tmp += rhs[src_i++];
1446 [[nodiscard]]
friend constexpr numeric_array hsub(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1448 if (!std::is_constant_evaluated()) {
1449 if constexpr (is_f64x2 and x86_64_v2) {
1450 return numeric_array{_mm_hsub_pd(lhs.reg(), rhs.reg())};
1451 }
else if constexpr (is_f32x4 and x86_64_v2) {
1452 return numeric_array{_mm_hsub_ps(lhs.reg(), rhs.reg())};
1453 }
else if constexpr (is_i32x4 and x86_64_v2) {
1454 return numeric_array{_mm_hsub_epi32(lhs.reg(), rhs.reg())};
1455 }
else if constexpr (is_i16x8 and x86_64_v2) {
1456 return numeric_array{_mm_hsub_epi16(lhs.reg(), rhs.reg())};
1457 }
else if constexpr (is_i8x16 and x86_64_v2) {
1458 return numeric_array{_mm_hsub_epi8(lhs.reg(), rhs.reg())};
1462 tt_axiom(N % 2 == 0);
1464 auto r = numeric_array{};
1468 while (src_i != N) {
1469 auto tmp = lhs[src_i++];
1470 tmp -= lhs[src_i++];
1475 while (src_i != N) {
1476 auto tmp = rhs[src_i++];
1477 tmp -= rhs[src_i++];
1483 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1485 if (!std::is_constant_evaluated()) {
1486 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1487 return numeric_array{_mm256_sub_ps(lhs.reg(), rhs.reg())};
1491 auto r = numeric_array{};
1492 for (
size_t i = 0; i != N; ++i) {
1493 r.v[i] = lhs.v[i] - rhs.v[i];
1498 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const &lhs, T
const &rhs)
noexcept
1500 return lhs - broadcast(rhs);
1503 [[nodiscard]]
friend constexpr numeric_array operator-(T
const &lhs, numeric_array
const &rhs)
noexcept
1505 return broadcast(lhs) - rhs;
1512 template<
size_t Mask = ~size_t{0}>
1515 if (!std::is_constant_evaluated()) {
1516 if constexpr (is_f32x4 && x86_64_v2) {
1517 return numeric_array{f32x4_x64v2_addsub<Mask & 0xf>(lhs.v, rhs.v)};
1522 for (
size_t i = 0; i != N; ++i) {
1523 if (
static_cast<bool>((Mask >> i) & 1)) {
1524 r.v[i] = lhs.v[i] + rhs.v[i];
1526 r.v[i] = lhs.v[i] - rhs.v[i];
1534 if (!std::is_constant_evaluated()) {
1535 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1540 auto r = numeric_array{};
1541 for (
size_t i = 0; i != N; ++i) {
1542 r.v[i] = lhs.v[i] * rhs.v[i];
1547 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const &lhs, T
const &rhs)
noexcept
1549 return lhs * broadcast(rhs);
1552 [[nodiscard]]
friend constexpr numeric_array operator*(T
const &lhs, numeric_array
const &rhs)
noexcept
1554 return broadcast(lhs) * rhs;
1557 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1559 if (!std::is_constant_evaluated()) {
1560 if constexpr (x86_64_v2_5 and lhs.is_f32x8 and rhs.is_f32x8) {
1561 return numeric_array{_mm256_div_ps(lhs.reg(), rhs.reg())};
1565 auto r = numeric_array{};
1566 for (
size_t i = 0; i != N; ++i) {
1567 r.v[i] = lhs.v[i] / rhs.v[i];
1572 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const &lhs, T
const &rhs)
noexcept
1574 return lhs / broadcast(rhs);
1577 [[nodiscard]]
friend constexpr numeric_array operator/(T
const &lhs, numeric_array
const &rhs)
noexcept
1579 return broadcast(lhs) / rhs;
1582 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1584 auto r = numeric_array{};
1585 for (
size_t i = 0; i != N; ++i) {
1586 r.v[i] = lhs.v[i] % rhs.v[i];
1591 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const &lhs, T
const &rhs)
noexcept
1593 return lhs % broadcast(rhs);
1596 [[nodiscard]]
friend constexpr numeric_array operator%(T
const &lhs, numeric_array
const &rhs)
noexcept
1598 return broadcast(lhs) % rhs;
1601 [[nodiscard]]
friend constexpr numeric_array min(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1603 auto r = numeric_array{};
1604 for (
size_t i = 0; i != N; ++i) {
1606 r.v[i] = lhs.v[i] < rhs.v[i] ? lhs.v[i] : rhs.v[i];
1611 [[nodiscard]]
friend constexpr numeric_array max(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1613 auto r = numeric_array{};
1614 for (
size_t i = 0; i != N; ++i) {
1616 r.v[i] = lhs.v[i] > rhs.v[i] ? lhs.v[i] : rhs.v[i];
1621 [[nodiscard]]
friend constexpr numeric_array
1622 clamp(numeric_array
const &lhs, numeric_array
const &low, numeric_array
const &high)
noexcept
1624 auto r = numeric_array{};
1625 for (
size_t i = 0; i != N; ++i) {
1627 r.v[i] = lhs.v[i] < low.v[i] ? low.v[i] : lhs.v[i] > high.v[i] ? high.v[i] : lhs.v[i];
1636 tt_axiom(rhs.z() == 0.0f && rhs.is_vector());
1644 return normalize<0b0011>(
cross_2D(rhs));
1651 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1652 return f32x4_x64v2_viktor_cross(lhs.v, rhs.v);
1655 return lhs.x() * rhs.y() - lhs.y() * rhs.x();
1665 if (!std::is_constant_evaluated()) {
1666 if constexpr (is_f32x4 && x86_64_v2) {
1671 return numeric_array{
1672 lhs.y() * rhs.z() - lhs.z() * rhs.y(),
1673 lhs.z() * rhs.x() - lhs.x() * rhs.z(),
1674 lhs.x() * rhs.y() - lhs.y() * rhs.x(),
1685 requires(D == 4) [[nodiscard]]
friend numeric_array
1686 hamilton_cross(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1688 ttlet col0 = lhs.wwww() * rhs;
1689 ttlet col1 = lhs.xxxx() * rhs.wzyx();
1690 ttlet col2 = lhs.yyyy() * rhs.zwxy();
1691 ttlet col3 = lhs.zzzz() * rhs.yxwz();
1693 ttlet col01 =
addsub(col0, col1);
1694 ttlet col012 =
addsub(col01.xzyw(), col2.xzyw()).xzyw();
1696 return numeric_array{
1707 for (
ssize_t i = 0; i != N; ++i) {
1708 if ((i - rhs) >= 0) {
1709 r[i] = lhs[i - rhs];
1723 for (
ssize_t i = 0; i != N; ++i) {
1724 if ((i + rhs) < N) {
1725 r[i] = lhs[i + rhs];
1736 if (!std::is_constant_evaluated()) {
1737 if constexpr (x86_64_v2) {
1738 return numeric_array{_mm_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1742 auto r = numeric_array{};
1744 for (
size_t i = 0; i != N; ++i) {
1745 r[i] = mask[i] >= 0 ? a[i] : b[i];
1751 [[nodiscard]]
static constexpr numeric_array byte_srl_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
1753 static_assert(std::endian::native == std::endian::little);
1755 auto r = numeric_array{};
1756 for (
auto i = 0; i != 16; ++i) {
1757 if ((i + rhs) < 16) {
1758 r[i] = narrow_cast<int8_t>(i + rhs);
1767 [[nodiscard]]
static constexpr numeric_array byte_sll_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
1769 static_assert(std::endian::native == std::endian::little);
1771 auto r = numeric_array{};
1772 for (
auto i = 0; i != 16; ++i) {
1773 if ((i - rhs) >= 0) {
1774 r[i] = narrow_cast<int8_t>(i - rhs);
1787 if (!std::is_constant_evaluated()) {
1788 if constexpr (x86_64_v2) {
1789 return numeric_array{_mm_shuffle_epi8(lhs.reg(), rhs.reg())};
1795 for (
size_t i = 0; i != N; ++i) {
1797 r[i] = lhs[rhs[i] & 0xf];
1810 tt_axiom(p1.is_point());
1811 tt_axiom(p2.is_point());
1812 return (p1 + p2) * 0.5f;
1819 tt_axiom(p.is_point());
1820 tt_axiom(anchor.is_point());
1821 return anchor - (p - anchor);
1824 template<
typename... Columns>
1827 static_assert(
sizeof...(Columns) == N,
"Can only transpose square matrices");
1831 if (is_f32x4 && x86_64_v2 && !std::is_constant_evaluated()) {
1832 auto tmp = f32x4_x64v2_transpose(columns.v...);
1833 for (
int i = 0; i != N; ++i) {
1834 r[i] = numeric_array{tmp[i]};
1838 transpose_detail<0, Columns...>(columns..., r);
1844 [[nodiscard]]
constexpr friend numeric_array composit(numeric_array
const &under, numeric_array
const &over)
noexcept
1845 requires(N == 4 && std::is_floating_point_v<T>)
1847 if (over.is_transparent()) {
1850 if (over.is_opaque()) {
1854 ttlet over_alpha = over.wwww();
1855 ttlet under_alpha = under.wwww();
1857 ttlet over_color = over.xyz1();
1858 ttlet under_color = under.xyz1();
1860 ttlet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
1862 return output_color / output_color.www1();
1865 [[nodiscard]]
friend std::string to_string(numeric_array
const &rhs)
noexcept
1870 for (
size_t i = 0; i != N; ++i) {
1874 r += std::format(
"{}", rhs[i]);
1882 return lhs << to_string(rhs);
1889 template<
size_t FromElement,
size_t ToElement,
size_t ZeroMask = 0>
1894 if (!std::is_constant_evaluated()) {
1895 if constexpr (is_f32x4 && x86_64_v2) {
1896 return numeric_array{f32x4_x64v2_insert<FromElement, ToElement, ZeroMask>(lhs.v, rhs.v)};
1897 }
else if constexpr (is_u64x2 and x86_64_v2) {
1898 return numeric_array{u64x2_x64v2_insert<FromElement, ToElement, ZeroMask>(lhs.v, rhs.v)};
1902 for (
size_t i = 0; i != N; ++i) {
1903 if ((ZeroMask >> i) & 1) {
1905 }
else if (i == ToElement) {
1906 r[i] = rhs[FromElement];
1925 static_assert(
sizeof...(Elements) <= N);
1927 if (!std::is_constant_evaluated()) {
1928 if constexpr (is_f32x4 && x86_64_v2) {
1930 }
else if constexpr (is_i32x4 && x86_64_v2) {
1932 }
else if constexpr (is_u32x4 && x86_64_v2) {
1934 }
else if constexpr (is_u64x2 and x86_64_v2) {
1940 swizzle_detail<0, Elements...>(r);
1944#define SWIZZLE(swizzle_name, D, ...) \
1945 [[nodiscard]] constexpr numeric_array swizzle_name() const noexcept requires(D == N) \
1947 return swizzle<__VA_ARGS__>(); \
1950#define SWIZZLE_4D_GEN1(name, ...) \
1951 SWIZZLE(name##0, 4, __VA_ARGS__, get_zero) \
1952 SWIZZLE(name##1, 4, __VA_ARGS__, get_one) \
1953 SWIZZLE(name##x, 4, __VA_ARGS__, 0) \
1954 SWIZZLE(name##y, 4, __VA_ARGS__, 1) \
1955 SWIZZLE(name##z, 4, __VA_ARGS__, 2) \
1956 SWIZZLE(name##w, 4, __VA_ARGS__, 3)
1958#define SWIZZLE_4D_GEN2(name, ...) \
1959 SWIZZLE_4D_GEN1(name##0, __VA_ARGS__, get_zero) \
1960 SWIZZLE_4D_GEN1(name##1, __VA_ARGS__, get_one) \
1961 SWIZZLE_4D_GEN1(name##x, __VA_ARGS__, 0) \
1962 SWIZZLE_4D_GEN1(name##y, __VA_ARGS__, 1) \
1963 SWIZZLE_4D_GEN1(name##z, __VA_ARGS__, 2) \
1964 SWIZZLE_4D_GEN1(name##w, __VA_ARGS__, 3)
1966#define SWIZZLE_4D_GEN3(name, ...) \
1967 SWIZZLE_4D_GEN2(name##0, __VA_ARGS__, get_zero) \
1968 SWIZZLE_4D_GEN2(name##1, __VA_ARGS__, get_one) \
1969 SWIZZLE_4D_GEN2(name##x, __VA_ARGS__, 0) \
1970 SWIZZLE_4D_GEN2(name##y, __VA_ARGS__, 1) \
1971 SWIZZLE_4D_GEN2(name##z, __VA_ARGS__, 2) \
1972 SWIZZLE_4D_GEN2(name##w, __VA_ARGS__, 3)
1974 SWIZZLE_4D_GEN3(_0, get_zero)
1975 SWIZZLE_4D_GEN3(_1, get_one)
1976 SWIZZLE_4D_GEN3(x, 0)
1977 SWIZZLE_4D_GEN3(y, 1)
1978 SWIZZLE_4D_GEN3(z, 2)
1979 SWIZZLE_4D_GEN3(w, 3)
1981#define SWIZZLE_3D_GEN1(name, ...) \
1982 SWIZZLE(name##0, 3, __VA_ARGS__, get_zero) \
1983 SWIZZLE(name##1, 3, __VA_ARGS__, get_one) \
1984 SWIZZLE(name##x, 3, __VA_ARGS__, 0) \
1985 SWIZZLE(name##y, 3, __VA_ARGS__, 1) \
1986 SWIZZLE(name##z, 3, __VA_ARGS__, 2)
1988#define SWIZZLE_3D_GEN2(name, ...) \
1989 SWIZZLE_3D_GEN1(name##0, __VA_ARGS__, get_zero) \
1990 SWIZZLE_3D_GEN1(name##1, __VA_ARGS__, get_one) \
1991 SWIZZLE_3D_GEN1(name##x, __VA_ARGS__, 0) \
1992 SWIZZLE_3D_GEN1(name##y, __VA_ARGS__, 1) \
1993 SWIZZLE_3D_GEN1(name##z, __VA_ARGS__, 2)
1995 SWIZZLE_3D_GEN2(_0, get_zero)
1996 SWIZZLE_3D_GEN2(_1, get_one)
1997 SWIZZLE_3D_GEN2(x, 0)
1998 SWIZZLE_3D_GEN2(y, 1)
1999 SWIZZLE_3D_GEN2(z, 2)
2001#define SWIZZLE_2D_GEN1(name, ...) \
2002 SWIZZLE(name##0, 2, __VA_ARGS__, get_zero) \
2003 SWIZZLE(name##1, 2, __VA_ARGS__, get_one) \
2004 SWIZZLE(name##x, 2, __VA_ARGS__, 0) \
2005 SWIZZLE(name##y, 2, __VA_ARGS__, 1)
2007 SWIZZLE_2D_GEN1(_0, get_zero)
2008 SWIZZLE_2D_GEN1(_1, get_one)
2009 SWIZZLE_2D_GEN1(x, 0)
2010 SWIZZLE_2D_GEN1(y, 1)
2013#undef SWIZZLE_4D_GEN1
2014#undef SWIZZLE_4D_GEN2
2015#undef SWIZZLE_4D_GEN3
2016#undef SWIZZLE_3D_GEN1
2017#undef SWIZZLE_3D_GEN2
2018#undef SWIZZLE_2D_GEN1
2023 template<
int I,
typename First,
typename... Rest>
2026 for (
size_t j = 0; j != N; ++j) {
2030 if constexpr (
sizeof...(Rest) != 0) {
2031 transpose_detail<I + 1, Rest...>(rest..., r);
2035 template<ssize_t I, ssize_t FirstElement, ssize_t... RestElements>
2036 constexpr void swizzle_detail(numeric_array &r)
const noexcept
2038 static_assert(I < narrow_cast<ssize_t>(N));
2039 static_assert(FirstElement >= -2 && FirstElement < narrow_cast<ssize_t>(N),
"Index out of bounds");
2041 get<I>(r) = get<FirstElement>(*
this);
2042 if constexpr (
sizeof...(RestElements) != 0) {
2043 swizzle_detail<I + 1, RestElements...>(r);