56 using value_type =
typename container_type::value_type;
57 using size_type =
typename container_type::size_type;
58 using difference_type =
typename container_type::difference_type;
59 using reference =
typename container_type::reference;
60 using const_reference =
typename container_type::const_reference;
61 using pointer =
typename container_type::pointer;
62 using const_pointer =
typename container_type::const_pointer;
63 using iterator =
typename container_type::iterator;
64 using const_iterator =
typename container_type::const_iterator;
66 constexpr static bool is_i8x1 = std::is_same_v<T, int8_t> && N == 1;
67 constexpr static bool is_i8x2 = std::is_same_v<T, int8_t> && N == 2;
68 constexpr static bool is_i8x4 = std::is_same_v<T, int8_t> && N == 4;
69 constexpr static bool is_i8x8 = std::is_same_v<T, int8_t> && N == 8;
70 constexpr static bool is_i8x16 = std::is_same_v<T, int8_t> && N == 16;
71 constexpr static bool is_i8x32 = std::is_same_v<T, int8_t> && N == 32;
72 constexpr static bool is_i8x64 = std::is_same_v<T, int8_t> && N == 64;
73 constexpr static bool is_u8x1 = std::is_same_v<T, uint8_t> && N == 1;
74 constexpr static bool is_u8x2 = std::is_same_v<T, uint8_t> && N == 2;
75 constexpr static bool is_u8x4 = std::is_same_v<T, uint8_t> && N == 4;
76 constexpr static bool is_u8x8 = std::is_same_v<T, uint8_t> && N == 8;
77 constexpr static bool is_u8x16 = std::is_same_v<T, uint8_t> && N == 16;
78 constexpr static bool is_u8x32 = std::is_same_v<T, uint8_t> && N == 32;
79 constexpr static bool is_u8x64 = std::is_same_v<T, uint8_t> && N == 64;
81 constexpr static bool is_i16x1 = std::is_same_v<T, int16_t> && N == 1;
82 constexpr static bool is_i16x2 = std::is_same_v<T, int16_t> && N == 2;
83 constexpr static bool is_i16x4 = std::is_same_v<T, int16_t> && N == 4;
84 constexpr static bool is_i16x8 = std::is_same_v<T, int16_t> && N == 8;
85 constexpr static bool is_i16x16 = std::is_same_v<T, int16_t> && N == 16;
86 constexpr static bool is_i16x32 = std::is_same_v<T, int16_t> && N == 32;
87 constexpr static bool is_u16x1 = std::is_same_v<T, uint16_t> && N == 1;
88 constexpr static bool is_u16x2 = std::is_same_v<T, uint16_t> && N == 2;
89 constexpr static bool is_u16x4 = std::is_same_v<T, uint16_t> && N == 4;
90 constexpr static bool is_u16x8 = std::is_same_v<T, uint16_t> && N == 8;
91 constexpr static bool is_u16x16 = std::is_same_v<T, uint16_t> && N == 16;
92 constexpr static bool is_u16x32 = std::is_same_v<T, uint16_t> && N == 32;
93 constexpr static bool is_f16x4 = std::is_same_v<T, float16> && N == 4;
95 constexpr static bool is_i32x1 = std::is_same_v<T, int32_t> && N == 1;
96 constexpr static bool is_i32x2 = std::is_same_v<T, int32_t> && N == 2;
97 constexpr static bool is_i32x4 = std::is_same_v<T, int32_t> && N == 4;
98 constexpr static bool is_i32x8 = std::is_same_v<T, int32_t> && N == 8;
99 constexpr static bool is_i32x16 = std::is_same_v<T, int32_t> && N == 16;
100 constexpr static bool is_u32x1 = std::is_same_v<T, uint32_t> && N == 1;
101 constexpr static bool is_u32x2 = std::is_same_v<T, uint32_t> && N == 2;
102 constexpr static bool is_u32x4 = std::is_same_v<T, uint32_t> && N == 4;
103 constexpr static bool is_u32x8 = std::is_same_v<T, uint32_t> && N == 8;
104 constexpr static bool is_u32x16 = std::is_same_v<T, uint32_t> && N == 16;
105 constexpr static bool is_f32x1 = std::is_same_v<T, float> && N == 1;
106 constexpr static bool is_f32x2 = std::is_same_v<T, float> && N == 2;
107 constexpr static bool is_f32x4 = std::is_same_v<T, float> && N == 4;
108 constexpr static bool is_f32x8 = std::is_same_v<T, float> && N == 8;
109 constexpr static bool is_f32x16 = std::is_same_v<T, float> && N == 16;
111 constexpr static bool is_i64x1 = std::is_same_v<T, int64_t> && N == 1;
112 constexpr static bool is_i64x2 = std::is_same_v<T, int64_t> && N == 2;
113 constexpr static bool is_i64x4 = std::is_same_v<T, int64_t> && N == 4;
114 constexpr static bool is_i64x8 = std::is_same_v<T, int64_t> && N == 8;
115 constexpr static bool is_u64x1 = std::is_same_v<T, uint64_t> && N == 1;
116 constexpr static bool is_u64x2 = std::is_same_v<T, uint64_t> && N == 2;
117 constexpr static bool is_u64x4 = std::is_same_v<T, uint64_t> && N == 4;
118 constexpr static bool is_u64x8 = std::is_same_v<T, uint64_t> && N == 8;
119 constexpr static bool is_f64x1 = std::is_same_v<T, double> && N == 1;
120 constexpr static bool is_f64x2 = std::is_same_v<T, double> && N == 2;
121 constexpr static bool is_f64x4 = std::is_same_v<T, double> && N == 4;
122 constexpr static bool is_f64x8 = std::is_same_v<T, double> && N == 8;
128 if (not std::is_constant_evaluated()) {
129#if defined(HI_HAS_AVX)
130 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x16 or is_u16x16 or is_i8x32 or is_u8x32) {
131 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), _mm256_setzero_si256());
133 }
else if constexpr (is_f64x4) {
134 _mm256_storeu_pd(
reinterpret_cast<__m256d *
>(v.
data()), _mm256_setzero_pd());
136 }
else if constexpr (is_f32x8) {
137 _mm256_storeu_ps(v.
data(), _mm256_setzero_ps());
141#if defined(HI_HAS_SSE2)
142 if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_u8x16) {
143 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), _mm_setzero_si128());
145 }
else if constexpr (is_f64x2) {
146 _mm_storeu_pd(
reinterpret_cast<__m128d *
>(v.
data()), _mm_setzero_pd());
150#if defined(HI_HAS_SSE)
151 if constexpr (is_f32x4) {
152 _mm_storeu_ps(v.
data(), _mm_setzero_ps());
158 for (
auto i = 0_uz; i != N; ++i) {
168 template<numeric_limited U, std::
size_t M>
171 if (!std::is_constant_evaluated()) {
172#if defined(HI_HAS_AVX)
173 if constexpr (is_f64x4 and other.is_f32x4) {
176 }
else if constexpr (is_f64x4 and other.is_i32x4) {
179 }
else if constexpr (is_f32x4 and other.is_f64x4) {
182 }
else if constexpr (is_i32x4 and other.is_f64x4) {
185 }
else if constexpr (is_i32x8 and other.is_f32x8) {
188 }
else if constexpr (is_f32x8 and other.is_i32x8) {
193#if defined(HI_HAS_SSE4_1)
194 if constexpr (is_u8x4 and other.is_f32x4) {
195 hilet i32_4 = _mm_cvtps_epi32(other.reg());
196 hilet i16_8 = _mm_packs_epi32(i32_4, _mm_setzero_si128());
197 hilet u8_16 = _mm_packus_epi16(i16_8, _mm_setzero_si128());
200 }
else if constexpr (is_i64x4 and other.is_i32x4) {
203 }
else if constexpr (is_i64x4 and other.is_i16x8) {
206 }
else if constexpr (is_i32x4 and other.is_i16x8) {
209 }
else if constexpr (is_i64x2 and other.is_i8x16) {
212 }
else if constexpr (is_i32x4 and other.is_i8x16) {
215 }
else if constexpr (is_i16x8 and other.is_i8x16) {
218 }
else if constexpr (is_f16x4 and other.is_f32x4) {
221 }
else if constexpr (is_f32x4 and other.is_f16x4) {
227#if defined(HI_HAS_SSE2)
228 if constexpr (is_f64x2 and other.is_i32x4) {
231 }
else if constexpr (is_f32x4 and other.is_i32x4) {
234 }
else if constexpr (is_i32x4 and other.is_f32x4) {
243 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
245 v[i] =
static_cast<value_type
>(
std::round(other[i]));
247 v[i] =
static_cast<value_type
>(other[i]);
255 template<numeric_limited U, std::
size_t M>
260 if (!std::is_constant_evaluated()) {
261#if defined(HI_HAS_AVX)
262 if constexpr (is_f64x4 and other1.is_f64x2 and other2.is_f64x2) {
263 v =
numeric_array{_mm256_set_m128d(other2.reg(), other1.reg())};
264 }
else if constexpr (is_f32x8 and other1.is_f32x4 and other2.is_f32x4) {
265 v =
numeric_array{_mm256_set_m128(other2.reg(), other1.reg())};
266 }
else if constexpr (
267 std::is_integral_v<T> and std::is_integral_v<U> and (
sizeof(T) * N == 32) and (
sizeof(U) * M == 16)) {
268 v =
numeric_array{_mm256_set_m128i(other2.reg(), other1.reg())};
271#if defined(HI_HAS_SSE4_1)
272 if constexpr (is_u16x8 and other1.is_u32x4 and other2.is_u32x4) {
273 v =
numeric_array{_mm_packus_epu32(other2.reg(), other1.reg())};
276#if defined(HI_HAS_SSE2)
277 if constexpr (is_i16x8 and other1.is_i32x4 and other2.is_i32x4) {
278 v =
numeric_array{_mm_packs_epi32(other2.reg(), other1.reg())};
279 }
else if constexpr (is_i8x16 and other1.is_i16x8 and other2.is_i16x8) {
280 v =
numeric_array{_mm_packs_epi16(other2.reg(), other1.reg())};
281 }
else if constexpr (is_u8x16 and other1.is_u16x8 and other2.is_u16x8) {
282 v =
numeric_array{_mm_packus_epu16(other2.reg(), other1.reg())};
289 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
291 v[i] =
static_cast<value_type
>(
std::round(other1[i]));
293 v[i] =
static_cast<value_type
>(other1[i]);
295 }
else if (i < M * 2) {
296 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
298 v[i] =
static_cast<value_type
>(
std::round(other2[i - M]));
300 v[i] =
static_cast<value_type
>(other2[i - M]);
308 [[nodiscard]]
constexpr explicit numeric_array(T
const &x) noexcept : v()
310 if (not std::is_constant_evaluated()) {
311#if defined(HI_HAS_SSE)
312 if constexpr (is_f32x4) {
321 [[nodiscard]]
constexpr explicit numeric_array(T
const &x, T
const &y)
noexcept requires(N >= 2) : v()
323 if (not std::is_constant_evaluated()) {
324#if defined(HI_HAS_SSE2)
325 if constexpr (is_i32x4) {
335 [[nodiscard]]
constexpr explicit numeric_array(T
const &x, T
const &y, T
const &z)
noexcept requires(N >= 3) : v()
337 if (not std::is_constant_evaluated()) {
338#if defined(HI_HAS_SSE2)
339 if constexpr (is_i32x4) {
350 [[nodiscard]]
constexpr explicit numeric_array(T
const &x, T
const &y, T
const &z, T
const &w)
noexcept requires(N >= 4) : v()
352 if (not std::is_constant_evaluated()) {
353#if defined(HI_HAS_SSE2)
354 if constexpr (is_i32x4) {
366 [[nodiscard]]
static constexpr numeric_array broadcast(T rhs)
noexcept
368 if (not std::is_constant_evaluated()) {
369#if defined(HI_HAS_AVX)
370 if constexpr (is_f64x4) {
372 }
else if constexpr (is_f32x8) {
374 }
else if constexpr (is_i64x4) {
376 }
else if constexpr (is_i32x8) {
378 }
else if constexpr (is_i16x16) {
380 }
else if constexpr (is_i8x32) {
384#if defined(HI_HAS_SSE2)
385 if constexpr (is_f64x2) {
387 }
else if constexpr (is_i64x2) {
389 }
else if constexpr (is_i32x4) {
391 }
else if constexpr (is_i16x8) {
393 }
else if constexpr (is_i8x16) {
397#if defined(HI_HAS_SSE)
398 if constexpr (is_f32x4) {
410 [[nodiscard]]
static constexpr numeric_array epsilon()
noexcept
412 if constexpr (std::is_floating_point_v<T>) {
415 return broadcast(T{0});
432#if defined(HI_HAS_SSE2)
433 [[nodiscard]] __m128i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
435 return _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(v.
data()));
438 [[nodiscard]] __m128i reg()
const noexcept requires(is_f16x4)
440 return _mm_set_epi16(0, 0, 0, 0, get<3>(v).get(), get<2>(v).get(), get<1>(v).get(), get<0>(v).get());
444#if defined(HI_HAS_SSE2)
445 [[nodiscard]] __m128 reg()
const noexcept requires(is_f32x4)
447 return _mm_loadu_ps(v.
data());
451#if defined(HI_HAS_SSE2)
452 [[nodiscard]] __m128d reg()
const noexcept requires(is_f64x2)
454 return _mm_loadu_pd(v.
data());
458#if defined(HI_HAS_SSE2)
459 [[nodiscard]]
explicit numeric_array(__m128i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
461 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
465#if defined(HI_HAS_SSE4_1)
466 [[nodiscard]]
explicit numeric_array(__m128i
const &rhs)
noexcept requires(is_f16x4) :
467 v(std::bit_cast<
decltype(v)>(_mm_extract_epi64(rhs, 0)))
472#if defined(HI_HAS_SSE4_1)
473 [[nodiscard]]
explicit numeric_array(__m128i
const &rhs)
noexcept requires(is_u8x4) :
474 v(std::bit_cast<
decltype(v)>(_mm_extract_epi32(rhs, 0)))
479#if defined(HI_HAS_SSE2)
480 [[nodiscard]]
explicit numeric_array(__m128
const &rhs)
noexcept requires(is_f32x4)
482 _mm_storeu_ps(v.
data(), rhs);
486#if defined(HI_HAS_SSE2)
487 [[nodiscard]]
explicit numeric_array(__m128d
const &rhs)
noexcept requires(is_f64x2)
489 _mm_storeu_pd(v.
data(), rhs);
493#if defined(HI_HAS_SSE2)
494 numeric_array &operator=(__m128i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
496 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
501#if defined(HI_HAS_SSE2)
502 numeric_array &operator=(__m128
const &rhs)
noexcept requires(is_f32x4)
504 _mm_storeu_ps(v.
data(), rhs);
509#if defined(HI_HAS_SSE2)
510 numeric_array &operator=(__m128d
const &rhs)
noexcept requires(is_f64x2)
512 _mm_storeu_pd(v.
data(), rhs);
517#if defined(HI_HAS_AVX)
518 [[nodiscard]] __m256i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
520 return _mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(v.
data()));
524#if defined(HI_HAS_AVX)
525 [[nodiscard]] __m256 reg()
const noexcept requires(is_f32x8)
527 return _mm256_loadu_ps(v.
data());
531#if defined(HI_HAS_AVX)
532 [[nodiscard]] __m256d reg()
const noexcept requires(is_f64x4)
534 return _mm256_loadu_pd(v.
data());
538#if defined(HI_HAS_AVX)
539 [[nodiscard]]
explicit numeric_array(__m256i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
541 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
545#if defined(HI_HAS_AVX)
546 [[nodiscard]]
explicit numeric_array(__m256
const &rhs)
noexcept requires(is_f32x8)
548 _mm256_storeu_ps(v.
data(), rhs);
552#if defined(HI_HAS_AVX)
553 [[nodiscard]]
explicit numeric_array(__m256d
const &rhs)
noexcept requires(is_f64x4)
555 _mm256_storeu_pd(v.
data(), rhs);
559#if defined(HI_HAS_AVX)
560 numeric_array &operator=(__m256i
const &rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
562 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
567#if defined(HI_HAS_AVX)
568 numeric_array &operator=(__m256
const &rhs)
noexcept requires(is_f32x8)
570 _mm256_storeu_ps(v.
data(), rhs);
575#if defined(HI_HAS_AVX)
576 numeric_array &operator=(__m256d
const &rhs)
noexcept requires(is_f64x4)
578 _mm256_storeu_pd(v.
data(), rhs);
583 template<
typename Other>
584 [[nodiscard]]
constexpr friend Other bit_cast(
numeric_array const &rhs)
noexcept
587 if (not std::is_constant_evaluated()) {
588#if defined(HI_HAS_SSE2)
589 if constexpr (Other::is_f32x4 and std::is_integral_v<T>) {
590 return Other{_mm_castsi128_ps(rhs.reg())};
591 }
else if constexpr (Other::is_f32x4 and is_f64x2) {
592 return Other{_mm_castpd_ps(rhs.reg())};
593 }
else if constexpr (Other::is_f64x2 and std::is_integral_v<T>) {
594 return Other{_mm_castsi128_pd(rhs.reg())};
595 }
else if constexpr (Other::is_f64x2 and is_f32x4) {
596 return Other{_mm_castps_pd(rhs.reg())};
597 }
else if constexpr (std::is_integral_v<Other::value_type> and is_f32x4) {
598 return Other{_mm_castps_si128(rhs.reg())};
599 }
else if constexpr (std::is_integral_v<Other::value_type> and is_f64x2) {
600 return Other{_mm_castpd_si128(rhs.reg())};
601 }
else if constexpr (std::is_integral_v<Other::value_type> and std::is_integral_v<T>) {
602 return Other{rhs.reg()};
606 return std::bit_cast<Other>(rhs);
613 if (not std::is_constant_evaluated()) {
614#if defined(HI_HAS_SSE2)
615 if constexpr (is_f64x2) {
617 }
else if constexpr (is_i64x2 or is_u64x2) {
619 }
else if constexpr (is_i32x4 or is_u32x4) {
621 }
else if constexpr (is_i16x8 or is_u16x8) {
623 }
else if constexpr (is_i8x16 or is_u8x16) {
627#if defined(HI_HAS_SSE)
628 if constexpr (is_f32x4) {
636 r[i] = (i % 2 == 0) ? a[i / 2] : b[i / 2];
645 template<std::
size_t S>
675 template<std::
size_t S>
676 constexpr void store(std::byte *ptr)
const noexcept
684 constexpr void store(std::byte *ptr)
const noexcept
686 store<sizeof(*this)>(ptr);
692 constexpr explicit operator bool() const noexcept
694 if constexpr (std::is_floating_point_v<T>) {
695 hilet ep = epsilon();
697 return static_cast<bool>(gt(-ep, *
this) | gt(*
this, ep));
699 return static_cast<bool>(ne(*
this, T{0}));
703 [[nodiscard]]
constexpr T
const &operator[](
std::size_t i)
const noexcept
705 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
710 [[nodiscard]]
constexpr T &operator[](
std::size_t i)
noexcept
712 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
717 [[nodiscard]]
constexpr reference front() noexcept
722 [[nodiscard]]
constexpr const_reference front() const noexcept
727 [[nodiscard]]
constexpr reference back() noexcept
732 [[nodiscard]]
constexpr const_reference back() const noexcept
737 [[nodiscard]]
constexpr pointer data() noexcept
742 [[nodiscard]]
constexpr const_pointer data() const noexcept
747 [[nodiscard]]
constexpr iterator
begin() noexcept
752 [[nodiscard]]
constexpr const_iterator
begin() const noexcept
757 [[nodiscard]]
constexpr const_iterator cbegin() const noexcept
762 [[nodiscard]]
constexpr iterator
end() noexcept
767 [[nodiscard]]
constexpr const_iterator
end() const noexcept
772 [[nodiscard]]
constexpr const_iterator cend() const noexcept
777 [[nodiscard]]
constexpr bool empty() const noexcept
782 [[nodiscard]]
constexpr size_type size() const noexcept
787 [[nodiscard]]
constexpr size_type max_size() const noexcept
792 constexpr bool is_point() const noexcept
794 return v.
back() != T{};
797 constexpr bool is_vector() const noexcept
799 return v.
back() == T{};
802 constexpr bool is_opaque() const noexcept
807 constexpr bool is_transparent() const noexcept
812 [[nodiscard]]
constexpr T
const &x() const noexcept requires(N >= 1)
814 return std::get<0>(v);
817 [[nodiscard]]
constexpr T
const &y() const noexcept requires(N >= 2)
819 return std::get<1>(v);
822 [[nodiscard]]
constexpr T
const &z() const noexcept requires(N >= 3)
824 return std::get<2>(v);
827 [[nodiscard]]
constexpr T
const &w() const noexcept requires(N >= 4)
829 return std::get<3>(v);
832 [[nodiscard]]
constexpr T &x() noexcept requires(N >= 1)
834 return std::get<0>(v);
837 [[nodiscard]]
constexpr T &y() noexcept requires(N >= 2)
839 return std::get<1>(v);
842 [[nodiscard]]
constexpr T &z() noexcept requires(N >= 3)
844 return std::get<2>(v);
847 [[nodiscard]]
constexpr T &w() noexcept requires(N >= 4)
849 return std::get<3>(v);
852 [[nodiscard]]
constexpr T
const &r() const noexcept requires(N >= 1)
854 return std::get<0>(v);
857 [[nodiscard]]
constexpr T
const &g() const noexcept requires(N >= 2)
859 return std::get<1>(v);
862 [[nodiscard]]
constexpr T
const &b() const noexcept requires(N >= 3)
864 return std::get<2>(v);
867 [[nodiscard]]
constexpr T
const &a() const noexcept requires(N >= 4)
869 return std::get<3>(v);
872 [[nodiscard]]
constexpr T &r() noexcept requires(N >= 1)
874 return std::get<0>(v);
877 [[nodiscard]]
constexpr T &g() noexcept requires(N >= 2)
879 return std::get<1>(v);
882 [[nodiscard]]
constexpr T &b() noexcept requires(N >= 3)
884 return std::get<2>(v);
887 [[nodiscard]]
constexpr T &a() noexcept requires(N >= 4)
889 return std::get<3>(v);
892 [[nodiscard]]
constexpr T
const &width() const noexcept requires(N >= 1)
894 return std::get<0>(v);
897 [[nodiscard]]
constexpr T
const &height() const noexcept requires(N >= 2)
899 return std::get<1>(v);
902 [[nodiscard]]
constexpr T
const &depth() const noexcept requires(N >= 3)
904 return std::get<2>(v);
907 [[nodiscard]]
constexpr T &width() noexcept requires(N >= 1)
909 return std::get<0>(v);
912 [[nodiscard]]
constexpr T &height() noexcept requires(N >= 2)
914 return std::get<1>(v);
917 [[nodiscard]]
constexpr T &depth() noexcept requires(N >= 3)
919 return std::get<2>(v);
922 constexpr numeric_array &operator<<=(
unsigned int rhs)
noexcept
924 return *
this = *
this << rhs;
927 constexpr numeric_array &operator>>=(
unsigned int rhs)
noexcept
929 return *
this = *
this >> rhs;
932 constexpr numeric_array &operator|=(numeric_array
const &rhs)
noexcept
934 return *
this = *
this | rhs;
937 constexpr numeric_array &operator|=(T
const &rhs)
noexcept
939 return *
this = *
this | rhs;
942 constexpr numeric_array &operator&=(numeric_array
const &rhs)
noexcept
944 return *
this = *
this & rhs;
947 constexpr numeric_array &operator&=(T
const &rhs)
noexcept
949 return *
this = *
this & rhs;
952 constexpr numeric_array &operator^=(numeric_array
const &rhs)
noexcept
954 return *
this = *
this ^ rhs;
957 constexpr numeric_array &operator^=(T
const &rhs)
noexcept
959 return *
this = *
this ^ rhs;
962 constexpr numeric_array &operator+=(numeric_array
const &rhs)
noexcept
964 return *
this = *
this + rhs;
967 constexpr numeric_array &operator+=(T
const &rhs)
noexcept
969 return *
this = *
this + rhs;
972 constexpr numeric_array &operator-=(numeric_array
const &rhs)
noexcept
974 return *
this = *
this - rhs;
977 constexpr numeric_array &operator-=(T
const &rhs)
noexcept
979 return *
this = *
this - rhs;
982 constexpr numeric_array &operator*=(numeric_array
const &rhs)
noexcept
984 return *
this = *
this * rhs;
987 constexpr numeric_array &operator*=(T
const &rhs)
noexcept
989 return *
this = *
this * rhs;
992 constexpr numeric_array &operator/=(numeric_array
const &rhs)
noexcept
994 return *
this = *
this / rhs;
997 constexpr numeric_array &operator/=(T
const &rhs)
noexcept
999 return *
this = *
this / rhs;
1002 constexpr numeric_array &operator%=(numeric_array
const &rhs)
noexcept
1004 return *
this = *
this % rhs;
1007 constexpr numeric_array &operator%=(T
const &rhs)
noexcept
1009 return *
this = *
this % rhs;
1012 constexpr static ssize_t get_zero = -1;
1013 constexpr static ssize_t get_one = -2;
1019 template<std::
size_t I>
1022 static_assert(I < N,
"Index out of bounds");
1023 return std::get<I>(rhs.v);
1034 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
1035 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
1036 if constexpr (I == get_zero) {
1038 }
else if constexpr (I == get_one) {
1041 return std::get<I>(rhs.v);
1051 template<std::
size_t I>
1054 static_assert(I < N);
1056 if (not std::is_constant_evaluated()) {
1057#if defined(HI_HAS_AVX2)
1058 if constexpr (is_i16x16 or is_u16x16) {
1059 return static_cast<T
>(_mm256_extract_epi16(rhs.v.reg(), I));
1060 }
else if constexpr (is_i8x32 or is_u8x32) {
1061 return static_cast<T
>(_mm256_extract_epi8(rhs.v.reg(), I));
1064#if defined(HI_HAS_AVX)
1065 if constexpr (is_f64x4) {
1066 return bit_cast<T>(_mm256_extract_epi64(_mm256_castpd_si256(rhs.v.reg()), I));
1067 }
else if constexpr (is_f32x8) {
1068 return bit_cast<T>(_mm256_extract_epi32(_mm256_castps_si256(rhs.v.reg()), I));
1069 }
else if constexpr (is_i64x4 or is_u64x4) {
1070 return static_cast<T
>(_mm256_extract_epi64(rhs.v.reg(), I));
1071 }
else if constexpr (is_i32x8 or is_u32x8) {
1072 return static_cast<T
>(_mm256_extract_epi32(rhs.v.reg(), I));
1075#if defined(HI_HAS_SSE4_1)
1076 if constexpr (is_f64x2) {
1077 return bit_cast<T>(_mm_extract_epi64(_mm_castpd_si128(rhs.v.reg()), I));
1078 }
else if constexpr (is_f32x4) {
1079 return std::bit_cast<T>(_mm_extract_ps(rhs.v.reg(), I));
1080 }
else if constexpr (is_i64x2 or is_u64x2) {
1081 return static_cast<T
>(_mm_extract_epi64(rhs.v.reg(), I));
1082 }
else if constexpr (is_i32x4 or is_u32x4) {
1083 return static_cast<T
>(_mm_extract_epi32(rhs.v.reg(), I));
1084 }
else if constexpr (is_i8x16 or is_u8x16) {
1085 return static_cast<T
>(_mm_extract_epi8(rhs.v.reg(), I));
1088#if defined(HI_HAS_SSE2)
1089 if constexpr (is_i16x8 or is_u16x8) {
1090 return static_cast<T
>(_mm_extract_epi16(rhs.v.reg(), I));
1106 template<std::
size_t I, std::
size_t ZeroMask = 0>
1108 requires(is_f32x4 or is_i32x4 or is_u32x4)
1110 static_assert(I < N);
1111 static_assert(ZeroMask <= ((1 << N) - 1));
1113 if (not std::is_constant_evaluated()) {
1114#if defined(HI_HAS_SSE4_1)
1115 if constexpr (is_f32x4) {
1116 constexpr int imm8 = (I << 4) | ZeroMask;
1117 return numeric_array{_mm_insert_ps(lhs.reg(), _mm_set_ss(rhs), imm8)};
1118 }
else if constexpr (is_i32x4 or is_u32x4) {
1119 constexpr int imm8 = (I << 4) | ZeroMask;
1121 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(_mm_set1_epi32(rhs)), imm8))};
1127 std::get<I>(r.v) = rhs;
1129 if ((ZeroMask >> i) & 1) {
1144 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
1145 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
1146 if constexpr (I == get_zero) {
1148 }
else if constexpr (I == get_one) {
1151 return std::get<I>(rhs.v);
1162 if (not std::is_constant_evaluated()) {
1163#if defined(HI_HAS_SSE4_1)
1164 if constexpr (is_f32x4) {
1165 return numeric_array{_mm_insert_ps(rhs.reg(), rhs.reg(), Mask)};
1166 }
else if constexpr (is_i32x4 or is_u32x4) {
1168 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(rhs.reg()), _mm_castsi128_ps(rhs.reg()), Mask))};
1175 if (
static_cast<bool>((Mask >> i) & 1)) {
1184 template<std::
size_t Mask>
1187 if (not std::is_constant_evaluated()) {
1188#if defined(HI_HAS_AVX2)
1189 if constexpr (is_i32x8) {
1190 return numeric_array{_mm256_blend_epi32(lhs.reg(), rhs.reg(), Mask)};
1191 }
else if constexpr (is_i32x4 or is_u32x4) {
1192 return numeric_array{_mm_blend_epi32(lhs.reg(), rhs.reg(), Mask)};
1193 }
else if constexpr (is_i16x16 or is_u16x16) {
1194 return numeric_array{_mm256_blend_epi16(lhs.reg(), rhs.reg(), Mask)};
1197#if defined(HI_HAS_AVX)
1198 if constexpr (is_f64x4) {
1199 return numeric_array{_mm256_blend_pd(lhs.reg(), rhs.reg(), Mask)};
1200 }
else if constexpr (is_f32x8) {
1201 return numeric_array{_mm256_blend_ps(lhs.reg(), rhs.reg(), Mask)};
1202 }
else if constexpr (is_i64x4 or is_u64x4) {
1203 return numeric_array{
1204 _mm256_castpd_si256(_mm256_blend_pd(_mm256_castsi256_pd(lhs.reg()), _mm256_castsi256_pd(rhs.reg()), Mask))};
1205 }
else if constexpr (is_i32x8 or is_u32x8) {
1206 return numeric_array{
1207 _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg()), Mask))};
1210#if defined(HI_HAS_SSE4_1)
1211 if constexpr (is_f64x2) {
1212 return numeric_array{_mm_blend_pd(lhs.reg(), rhs.reg(), Mask)};
1213 }
else if constexpr (is_f32x4) {
1214 return numeric_array{_mm_blend_ps(lhs.reg(), rhs.reg(), Mask)};
1215 }
else if constexpr (is_i64x2 or is_u64x2) {
1216 return numeric_array{
1217 _mm_castpd_si128(_mm_blend_pd(_mm_castsi128_pd(lhs.reg()), _mm_castsi128_pd(rhs.reg()), Mask))};
1218 }
else if constexpr (is_i32x4 or is_u32x4) {
1219 return numeric_array{
1220 _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg()), Mask))};
1221 }
else if constexpr (is_i16x8 or is_u16x8) {
1222 return numeric_array{_mm_blend_epi16(lhs.reg(), rhs.reg(), Mask)};
1227 auto r = numeric_array{};
1229 r[i] =
static_cast<bool>((Mask >> i) & 1) ? rhs[i] : lhs[i];
1238 if (not std::is_constant_evaluated()) {
1239#if defined(HI_HAS_AVX2)
1240 if constexpr (is_i8x32 or is_u8x32) {
1241 return numeric_array{_mm256_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1244#if defined(HI_HAS_AVX)
1245 if constexpr (is_f64x4) {
1246 return numeric_array{_mm256_blendv_pd(a.reg(), b.reg(), mask.reg())};
1247 }
else if constexpr (is_f32x8) {
1248 return numeric_array{_mm256_blendv_ps(a.reg(), b.reg(), mask.reg())};
1249 }
else if constexpr (is_i64x4 or is_u64x4) {
1251 _mm256_castsi256_pd(a.reg()), _mm256_castsi256_pd(b.reg()), _mm256_castsi256_pd(mask.reg())))};
1252 }
else if constexpr (is_i32x8 or is_u32x8) {
1254 _mm256_castsi256_ps(a.reg()), _mm256_castsi256_ps(b.reg()), _mm256_castsi256_ps(mask.reg())))};
1257#if defined(HI_HAS_SSE4_1)
1258 if constexpr (is_f64x2) {
1259 return numeric_array{_mm_blendv_pd(a.reg(), b.reg(), mask.reg())};
1260 }
else if constexpr (is_f32x4) {
1261 return numeric_array{_mm_blendv_ps(a.reg(), b.reg(), mask.reg())};
1262 }
else if constexpr (is_i64x2 or is_u64x2) {
1264 _mm_blendv_pd(_mm_castsi128_pd(a.reg()), _mm_castsi128_pd(b.reg()), _mm_castsi128_pd(mask.reg())))};
1265 }
else if constexpr (is_i32x4 or is_u32x4) {
1267 _mm_blendv_ps(_mm_castsi128_ps(a.reg()), _mm_castsi128_ps(b.reg()), _mm_castsi128_ps(mask.reg())))};
1268 }
else if constexpr (is_i8x16 or is_u8x16) {
1269 return numeric_array{_mm_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1276 r[i] = mask[i] != T{0} ? b[i] : a[i];
1285 template<std::
size_t Mask>
1288 return blend<Mask>(rhs, -rhs);
1296 [[nodiscard]]
friend constexpr numeric_array abs(numeric_array
const &rhs)
noexcept
1298 if (not std::is_constant_evaluated()) {
1299#if defined(HI_HAS_AVX2)
1300 if constexpr (is_i32x8) {
1301 return numeric_array{_mm256_abs_epi32(rhs.reg())};
1302 }
else if constexpr (is_i16x16) {
1303 return numeric_array{_mm256_abs_epi16(rhs.reg())};
1304 }
else if constexpr (is_i8x32) {
1305 return numeric_array{_mm256_abs_epi8(rhs.reg())};
1308#if defined(HI_HAS_SSSE3)
1309 if constexpr (is_i32x4) {
1310 return numeric_array{_mm_abs_epi32(rhs.reg())};
1311 }
else if constexpr (is_i16x8) {
1312 return numeric_array{_mm_abs_epi16(rhs.reg())};
1313 }
else if constexpr (is_i8x16) {
1314 return numeric_array{_mm_abs_epi8(rhs.reg())};
1317#if defined(HI_HAS_SSE2)
1318 if constexpr (is_f64x2) {
1319 return numeric_array{_mm_castsi128_ps(_mm_srli_epi64(_mm_slli_epi64(_mm_castpd_si128(rhs.reg()), 1), 1))};
1320 }
else if constexpr (is_f32x4) {
1321 return numeric_array{_mm_castsi128_ps(_mm_srli_epi32(_mm_slli_epi32(_mm_castps_si128(rhs.reg()), 1), 1))};
1326 return max(rhs, -rhs);
1329 [[nodiscard]]
friend constexpr numeric_array rcp(numeric_array
const &rhs)
noexcept
1331 if (not std::is_constant_evaluated()) {
1332#if defined(HI_HAS_AVX)
1333 if constexpr (is_f32x8) {
1334 return numeric_array{_mm256_rcp_ps(rhs.reg())};
1337#if defined(HI_HAS_SSE)
1338 if constexpr (is_f32x4) {
1339 return numeric_array{_mm_rcp_ps(rhs.reg())};
1347 [[nodiscard]]
friend constexpr numeric_array
sqrt(numeric_array
const &rhs)
noexcept
1349 if (not std::is_constant_evaluated()) {
1350#if defined(HI_HAS_AVX)
1351 if constexpr (is_f64x4) {
1352 return numeric_array{_mm256_sqrt_pd(rhs.reg())};
1353 }
else if constexpr (is_f32x8) {
1354 return numeric_array{_mm256_sqrt_ps(rhs.reg())};
1357#if defined(HI_HAS_SSE2)
1358 if constexpr (is_f64x2) {
1359 return numeric_array{_mm_sqrt_pd(rhs.reg())};
1362#if defined(HI_HAS_SSE)
1363 if constexpr (is_f32x4) {
1364 return numeric_array{_mm_sqrt_ps(rhs.reg())};
1369 auto r = numeric_array{};
1376 [[nodiscard]]
friend constexpr numeric_array rcp_sqrt(numeric_array
const &rhs)
noexcept
1378 if (not std::is_constant_evaluated()) {
1379#if defined(HI_HAS_AVX)
1380 if constexpr (is_f32x8) {
1381 return numeric_array{_mm256_rsqrt_ps(rhs.reg())};
1384#if defined(HI_HAS_SSE)
1385 if constexpr (is_f32x4) {
1386 return numeric_array{_mm_rsqrt_ps(rhs.reg())};
1391 return rcp(
sqrt(rhs));
1394 [[nodiscard]]
friend constexpr numeric_array
floor(numeric_array
const &rhs)
noexcept
1395 requires(std::is_floating_point_v<value_type>)
1397 if (not std::is_constant_evaluated()) {
1398#if defined(HI_HAS_AVX)
1399 if constexpr (is_f64x4) {
1400 return numeric_array{_mm256_floor_pd(rhs.reg())};
1401 }
else if constexpr (is_f32x8) {
1402 return numeric_array{_mm256_floor_ps(rhs.reg())};
1405#if defined(HI_HAS_SSE4_1)
1406 if constexpr (is_f64x2) {
1407 return numeric_array{_mm_floor_pd(rhs.reg())};
1408 }
else if constexpr (is_f32x4) {
1409 return numeric_array{_mm_floor_ps(rhs.reg())};
1414 auto r = numeric_array{};
1421 [[nodiscard]]
friend constexpr numeric_array
ceil(numeric_array
const &rhs)
noexcept
1422 requires(std::is_floating_point_v<value_type>)
1424 if (not std::is_constant_evaluated()) {
1425#if defined(HI_HAS_AVX)
1426 if constexpr (is_f64x4) {
1427 return numeric_array{_mm256_ceil_pd(rhs.reg())};
1428 }
else if constexpr (is_f32x8) {
1429 return numeric_array{_mm256_ceil_ps(rhs.reg())};
1432#if defined(HI_HAS_SSE4_1)
1433 if constexpr (is_f64x2) {
1434 return numeric_array{_mm_ceil_pd(rhs.reg())};
1435 }
else if constexpr (is_f32x4) {
1436 return numeric_array{_mm_ceil_ps(rhs.reg())};
1441 auto r = numeric_array{};
1448 [[nodiscard]]
friend constexpr numeric_array
round(numeric_array
const &rhs)
noexcept
1449 requires(std::is_floating_point_v<value_type>)
1451 if (not std::is_constant_evaluated()) {
1452#if defined(HI_HAS_AVX)
1453 if constexpr (is_f64x4) {
1454 return numeric_array{_mm256_round_pd(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1455 }
else if constexpr (is_f32x8) {
1456 return numeric_array{_mm256_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1459#if defined(HI_HAS_SSE4_1)
1460 if constexpr (is_f64x2) {
1461 return numeric_array{_mm_round_pd(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1462 }
else if constexpr (is_f32x4) {
1463 return numeric_array{_mm_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1468 auto r = numeric_array{};
1482 template<std::
size_t Mask>
1485 if (not std::is_constant_evaluated()) {
1486#if defined(HI_HAS_SSE4_1)
1487 if constexpr (is_f64x2) {
1488 return std::bit_cast<double>(_mm_extract_epi64(_mm_dp_pd(lhs.reg(), rhs.reg(), (Mask << 4) | 0xf), 0));
1489 }
else if constexpr (is_f32x4) {
1490 return std::bit_cast<float>(_mm_extract_ps(_mm_dp_ps(lhs.reg(), rhs.reg(), (Mask << 4) | 0xf), 0));
1497 if (
static_cast<bool>(Mask & (1_uz << i))) {
1498 r += lhs.v[i] * rhs.v[i];
1511 template<std::
size_t Mask>
1524 template<std::
size_t Mask>
1527 return dot<Mask>(rhs, rhs);
1536 template<std::
size_t Mask>
1539 if (not std::is_constant_evaluated()) {
1540#if defined(HI_HAS_SSE4_1)
1541 if constexpr (is_f32x4) {
1542 return std::bit_cast<float>(_mm_extract_ps(_mm_rsqrt_ps(_mm_dp_ps(rhs.reg(), rhs.reg(), (Mask << 4) | 0xf)), 0));
1547 return 1.0f / hypot<Mask>(rhs);
1558 template<std::
size_t Mask>
1561 hi_axiom(rhs.is_vector());
1563 if (not std::is_constant_evaluated()) {
1564#if defined(HI_HAS_SSE4_1)
1565 if constexpr (is_f32x4) {
1566 hilet rhs_ = rhs.reg();
1567 hilet tmp = _mm_mul_ps(_mm_rsqrt_ps(_mm_dp_ps(rhs_, rhs_, (Mask << 4) | 0xf)), rhs_);
1573 hilet rcp_hypot_ = rcp_hypot<Mask>(rhs);
1577 if (
static_cast<bool>(Mask & (1_uz << i))) {
1578 r.v[i] = rhs.v[i] * rcp_hypot_;
1587 if (not std::is_constant_evaluated()) {
1588#if defined(HI_HAS_AVX2)
1589 if constexpr (is_i64x4 or is_u64x4) {
1590 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpeq_epi64(lhs.reg(), rhs.reg()))));
1591 }
else if constexpr (is_i32x8 or is_u32x8) {
1592 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpeq_epi32(lhs.reg(), rhs.reg()))));
1593 }
else if constexpr (is_i8x32 or is_u8x32) {
1594 return static_cast<std::size_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(lhs.reg(), rhs.reg())));
1597#if defined(HI_HAS_AVX)
1598 if constexpr (is_f64x4) {
1599 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_EQ_OQ)));
1600 }
else if constexpr (is_f32x8) {
1601 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_EQ_OQ)));
1604#if defined(HI_HAS_SSE4_1)
1605 if constexpr (is_i64x2 or is_u64x2) {
1606 return static_cast<std::size_t>(_mm_movemask_pd(_mm_castsi128_pd(_mm_cmpeq_epi64(lhs.reg(), rhs.reg()))));
1609#if defined(HI_HAS_SSE2)
1610 if constexpr (is_f64x2) {
1611 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpeq_pd(lhs.reg(), rhs.reg())));
1612 }
else if constexpr (is_i32x4 or is_u32x4) {
1613 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs.reg(), rhs.reg()))));
1614 }
else if constexpr (is_i8x16 or is_u8x16) {
1615 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(lhs.reg(), rhs.reg())));
1618#if defined(HI_HAS_SSE)
1619 if constexpr (is_f32x4) {
1620 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpeq_ps(lhs.reg(), rhs.reg())));
1627 r |=
static_cast<std::size_t>(lhs.v[i] == rhs.v[i]) << i;
1632 [[nodiscard]]
friend constexpr std::size_t ne(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1635 if (not std::is_constant_evaluated()) {
1636#if defined(HI_HAS_AVX)
1637 if constexpr (is_f64x4) {
1638 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_NEQ_OQ)));
1639 }
else if constexpr (is_f32x8) {
1640 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_NEQ_OQ)));
1643#if defined(HI_HAS_SSE2)
1644 if constexpr (is_f64x2) {
1645 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpneq_pd(lhs.reg(), rhs.reg())));
1648#if defined(HI_HAS_SSE)
1649 if constexpr (is_f32x4) {
1650 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpneq_ps(lhs.reg(), rhs.reg())));
1656 return eq(lhs, rhs) ^ not_mask;
1659 [[nodiscard]]
friend constexpr std::size_t gt(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1662 if (not std::is_constant_evaluated()) {
1663#if defined(HI_HAS_AVX2)
1664 if constexpr (is_i64x4) {
1665 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpgt_epi64(lhs.reg(), rhs.reg()))));
1666 }
else if constexpr (is_i32x8) {
1667 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(lhs.reg(), rhs.reg()))));
1668 }
else if constexpr (is_i8x32) {
1669 return static_cast<std::size_t>(_mm256_movemask_epi8(_mm256_cmpgt_epi8(lhs.reg(), rhs.reg())));
1672#if defined(HI_HAS_AVX)
1673 if constexpr (is_f64x4) {
1674 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_GT_OQ)));
1675 }
else if constexpr (is_f32x8) {
1676 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_GT_OQ)));
1679#if defined(HI_HAS_SSE4_1)
1680 if constexpr (is_i64x2) {
1681 return static_cast<std::size_t>(_mm_movemask_pd(_mm_castsi128_pd(_mm_cmpgt_epi64(lhs.reg(), rhs.reg()))));
1684#if defined(HI_HAS_SSE2)
1685 if constexpr (is_f64x2) {
1686 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpgt_pd(lhs.reg(), rhs.reg())));
1687 }
else if constexpr (is_i32x4) {
1688 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(lhs.reg(), rhs.reg()))));
1689 }
else if constexpr (is_i8x16) {
1690 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmpgt_epi8(lhs.reg(), rhs.reg())));
1693#if defined(HI_HAS_SSE)
1694 if constexpr (is_f32x4) {
1695 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpgt_ps(lhs.reg(), rhs.reg())));
1702 r |=
static_cast<std::size_t>(lhs.v[i] > rhs.v[i]) << i;
1707 [[nodiscard]]
friend constexpr std::size_t lt(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1710 if (not std::is_constant_evaluated()) {
1711#if defined(HI_HAS_AVX)
1712 if constexpr (is_f64x4) {
1713 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_LT_OQ)));
1714 }
else if constexpr (is_f32x8) {
1715 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_LT_OQ)));
1718#if defined(HI_HAS_SSE2)
1719 if constexpr (is_f64x2) {
1720 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmplt_pd(lhs.reg(), rhs.reg())));
1721 }
else if constexpr (is_i32x4) {
1722 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(lhs.reg(), rhs.reg()))));
1723 }
else if constexpr (is_i8x16) {
1724 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmplt_epi8(lhs.reg(), rhs.reg())));
1727#if defined(HI_HAS_SSE)
1728 if constexpr (is_f32x4) {
1729 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmplt_ps(lhs.reg(), rhs.reg())));
1735 return gt(rhs, lhs);
1738 [[nodiscard]]
friend constexpr std::size_t ge(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1741 if (not std::is_constant_evaluated()) {
1742#if defined(HI_HAS_AVX)
1743 if constexpr (is_f64x4) {
1744 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_GE_OQ)));
1745 }
else if constexpr (is_f32x8) {
1746 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_GE_OQ)));
1749#if defined(HI_HAS_SSE2)
1750 if constexpr (is_f64x2) {
1751 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpge_pd(lhs.reg(), rhs.reg())));
1754#if defined(HI_HAS_SSE)
1755 if constexpr (is_f32x4) {
1756 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpge_ps(lhs.reg(), rhs.reg())));
1762 return gt(lhs, rhs) | eq(lhs, rhs);
1765 [[nodiscard]]
friend constexpr std::size_t le(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1768 if (not std::is_constant_evaluated()) {
1769#if defined(HI_HAS_AVX)
1770 if constexpr (is_f64x4) {
1771 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_LE_OQ)));
1772 }
else if constexpr (is_f32x8) {
1773 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_LE_OQ)));
1776#if defined(HI_HAS_SSE2)
1777 if constexpr (is_f64x2) {
1778 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmple_pd(lhs.reg(), rhs.reg())));
1781#if defined(HI_HAS_SSE)
1782 if constexpr (is_f32x4) {
1783 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmple_ps(lhs.reg(), rhs.reg())));
1789 return gt(rhs, lhs) | eq(rhs, lhs);
1792 [[nodiscard]]
friend constexpr numeric_array gt_mask(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1794 if (not std::is_constant_evaluated()) {
1795#if defined(HI_HAS_SSE4_2)
1796 if constexpr (is_i64x2) {
1797 return numeric_array{_mm_cmpgt_epi64(lhs.reg(), rhs.reg())};
1800#if defined(HI_HAS_SSE2)
1801 if constexpr (is_i32x4) {
1802 return numeric_array{_mm_cmpgt_epi32(lhs.reg(), rhs.reg())};
1803 }
else if constexpr (is_i16x8) {
1804 return numeric_array{_mm_cmpgt_epi16(lhs.reg(), rhs.reg())};
1805 }
else if constexpr (is_i8x16) {
1806 return numeric_array{_mm_cmpgt_epi8(lhs.reg(), rhs.reg())};
1809#if defined(HI_HAS_SSE)
1810 if constexpr (is_f32x4) {
1811 return numeric_array{_mm_cmpgt_ps(lhs.reg(), rhs.reg())};
1816 using uint_type = make_uintxx_t<
sizeof(T) * CHAR_BIT>;
1817 constexpr auto ones = std::bit_cast<T>(~uint_type{0});
1819 auto r = numeric_array{};
1821 r[i] = lhs.v[i] > rhs.v[i] ? ones : T{0};
1826 [[nodiscard]]
friend constexpr bool operator==(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1828 return not ne(lhs, rhs);
1831 [[nodiscard]]
friend constexpr numeric_array operator<<(numeric_array
const &lhs,
unsigned int rhs)
noexcept
1833 if (not std::is_constant_evaluated()) {
1834#if defined(HI_HAS_AVX2)
1835 if constexpr (is_f64x4) {
1836 return numeric_array{_mm256_castsi256_pd(_mm256_slli_epi64(_mm256_castpd_si256(lhs.reg()), rhs))};
1837 }
else if constexpr (is_f32x8) {
1838 return numeric_array{_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(lhs.reg()), rhs))};
1839 }
else if constexpr (is_i64x4 or is_u64x4) {
1840 return numeric_array{_mm256_slli_epi64(lhs.reg(), rhs)};
1841 }
else if constexpr (is_i32x8 or is_u32x8) {
1842 return numeric_array{_mm256_slli_epi32(lhs.reg(), rhs)};
1843 }
else if constexpr (is_i16x16 or is_u16x16) {
1844 return numeric_array{_mm256_slli_epi16(lhs.reg(), rhs)};
1847#if defined(HI_HAS_SSE2)
1848 if constexpr (is_f64x2) {
1849 return numeric_array{_mm_castsi128_pd(_mm_slli_epi64(_mm_castpd_si128(lhs.reg()), rhs))};
1850 }
else if constexpr (is_f32x4) {
1851 return numeric_array{_mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(lhs.reg()), rhs))};
1852 }
else if constexpr (is_i64x2 or is_u64x2) {
1853 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1854 }
else if constexpr (is_i32x4 or is_u32x4) {
1855 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1856 }
else if constexpr (is_i16x8 or is_u16x8) {
1857 return numeric_array{_mm_slli_epi16(lhs.reg(), rhs)};
1862 auto r = numeric_array{};
1864 r.v[i] = lhs.v[i] << rhs;
1869 [[nodiscard]]
friend constexpr numeric_array operator>>(numeric_array
const &lhs,
unsigned int rhs)
noexcept
1871 if (not std::is_constant_evaluated()) {
1872#if defined(HI_HAS_AVX2)
1873 if constexpr (is_f64x4) {
1874 return numeric_array{_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_castpd_si256(lhs.reg()), rhs))};
1875 }
else if constexpr (is_f32x8) {
1876 return numeric_array{_mm256_castsi256_ps(_mm256_srli_epi32(_mm256_castps_si256(lhs.reg()), rhs))};
1877 }
else if constexpr (is_u64x4) {
1878 return numeric_array{_mm256_srli_epi64(lhs.reg(), rhs)};
1879 }
else if constexpr (is_i32x8) {
1880 return numeric_array{_mm256_srai_epi32(lhs.reg(), rhs)};
1881 }
else if constexpr (is_u32x8) {
1882 return numeric_array{_mm256_srli_epi32(lhs.reg(), rhs)};
1883 }
else if constexpr (is_i16x16) {
1884 return numeric_array{_mm256_srai_epi16(lhs.reg(), rhs)};
1885 }
else if constexpr (is_u16x16) {
1886 return numeric_array{_mm256_srli_epi16(lhs.reg(), rhs)};
1889#if defined(HI_HAS_SSE2)
1890 if constexpr (is_f64x2) {
1891 return numeric_array{_mm_castsi128_pd(_mm_srli_epi64(_mm_castpd_si128(lhs.reg()), rhs))};
1892 }
else if constexpr (is_f32x4) {
1893 return numeric_array{_mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(lhs.reg()), rhs))};
1894 }
else if constexpr (is_u64x2) {
1895 return numeric_array{_mm_srli_epi64(lhs.reg(), rhs)};
1896 }
else if constexpr (is_i32x4) {
1897 return numeric_array{_mm_srai_epi32(lhs.reg(), rhs)};
1898 }
else if constexpr (is_u32x4) {
1899 return numeric_array{_mm_srli_epi32(lhs.reg(), rhs)};
1900 }
else if constexpr (is_i16x8) {
1901 return numeric_array{_mm_srai_epi16(lhs.reg(), rhs)};
1902 }
else if constexpr (is_u16x8) {
1903 return numeric_array{_mm_srli_epi16(lhs.reg(), rhs)};
1908 auto r = numeric_array{};
1910 r.v[i] = lhs.v[i] >> rhs;
1915 [[nodiscard]]
friend constexpr numeric_array
operator|(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1917 if (not std::is_constant_evaluated()) {
1918#if defined(HI_HAS_AVX2)
1919 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1920 return numeric_array{_mm256_or_si256(lhs.reg(), rhs.reg())};
1923#if defined(HI_HAS_AVX)
1924 if constexpr (is_f64x4) {
1925 return numeric_array{_mm256_or_pd(lhs.reg(), rhs.reg())};
1926 }
else if constexpr (is_f32x8) {
1927 return numeric_array{_mm256_or_ps(lhs.reg(), rhs.reg())};
1928 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1929 return numeric_array{
1930 _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
1933#if defined(HI_HAS_SSE2)
1934 if constexpr (is_f64x2) {
1935 return numeric_array{_mm_or_pd(lhs.reg(), rhs.reg())};
1936 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1937 return numeric_array{_mm_or_si128(lhs.reg(), rhs.reg())};
1940#if defined(HI_HAS_SSE)
1941 if constexpr (is_f64x2) {
1942 return numeric_array{_mm_castps_pd(_mm_or_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
1944 }
else if constexpr (is_f32x4) {
1945 return numeric_array{_mm_or_ps(lhs.reg(), rhs.reg())};
1947 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1948 return numeric_array{_mm_castps_si128(_mm_or_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
1953 using uint_type = make_uintxx_t<
sizeof(T) * CHAR_BIT>;
1955 auto r = numeric_array{};
1958 std::bit_cast<T>(
static_cast<uint_type
>(std::bit_cast<uint_type>(lhs.v[i]) | std::bit_cast<uint_type>(rhs.v[i])));
1963 [[nodiscard]]
friend constexpr numeric_array
operator|(numeric_array
const &lhs, T
const &rhs)
noexcept
1965 return lhs | broadcast(rhs);
1968 [[nodiscard]]
friend constexpr numeric_array
operator|(T
const &lhs, numeric_array
const &rhs)
noexcept
1970 return broadcast(lhs) | rhs;
1973 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
1975 if (not std::is_constant_evaluated()) {
1976#if defined(HI_HAS_AVX2)
1977 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1978 return numeric_array{_mm256_and_si256(lhs.reg(), rhs.reg())};
1981#if defined(HI_HAS_AVX)
1982 if constexpr (is_f64x4) {
1983 return numeric_array{_mm256_and_pd(lhs.reg(), rhs.reg())};
1984 }
else if constexpr (is_f32x8) {
1985 return numeric_array{_mm256_and_ps(lhs.reg(), rhs.reg())};
1986 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1987 return numeric_array{
1988 _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
1991#if defined(HI_HAS_SSE2)
1992 if constexpr (is_f64x2) {
1993 return numeric_array{_mm_and_pd(lhs.reg(), rhs.reg())};
1994 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1995 return numeric_array{_mm_and_si128(lhs.reg(), rhs.reg())};
1998#if defined(HI_HAS_SSE)
1999 if constexpr (is_f64x2) {
2000 return numeric_array{_mm_castps_pd(_mm_and_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
2002 }
else if constexpr (is_f32x4) {
2003 return numeric_array{_mm_and_ps(lhs.reg(), rhs.reg())};
2005 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2006 return numeric_array{_mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
2011 auto r = numeric_array{};
2013 r.v[i] = lhs.v[i] & rhs.v[i];
2018 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const &lhs, T
const &rhs)
noexcept
2020 return lhs & broadcast(rhs);
2023 [[nodiscard]]
friend constexpr numeric_array operator&(T
const &lhs, numeric_array
const &rhs)
noexcept
2025 return broadcast(lhs) & rhs;
2028 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2030 if (not std::is_constant_evaluated()) {
2031#if defined(HI_HAS_AVX2)
2032 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2033 return numeric_array{_mm256_xor_si256(lhs.reg(), rhs.reg())};
2036#if defined(HI_HAS_AVX)
2037 if constexpr (is_f64x4) {
2038 return numeric_array{_mm256_xor_pd(lhs.reg(), rhs.reg())};
2039 }
else if constexpr (is_f32x8) {
2040 return numeric_array{_mm256_xor_ps(lhs.reg(), rhs.reg())};
2041 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2042 return numeric_array{
2043 _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
2046#if defined(HI_HAS_SSE2)
2047 if constexpr (is_f64x2) {
2048 return numeric_array{_mm_xor_pd(lhs.reg(), rhs.reg())};
2049 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2050 return numeric_array{_mm_xor_si128(lhs.reg(), rhs.reg())};
2053#if defined(HI_HAS_SSE)
2054 if constexpr (is_f64x2) {
2055 return numeric_array{_mm_castps_pd(_mm_xor_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
2057 }
else if constexpr (is_f32x4) {
2058 return numeric_array{_mm_xor_ps(lhs.reg(), rhs.reg())};
2060 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2061 return numeric_array{_mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
2066 auto r = numeric_array{};
2068 r.v[i] = lhs.v[i] ^ rhs.v[i];
2073 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const &lhs, T
const &rhs)
noexcept
2075 return lhs ^ broadcast(rhs);
2078 [[nodiscard]]
friend constexpr numeric_array operator^(T
const &lhs, numeric_array
const &rhs)
noexcept
2080 return broadcast(lhs) ^ rhs;
2083 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2085 if (not std::is_constant_evaluated()) {
2086#if defined(HI_HAS_AVX2)
2087 if constexpr (is_i64x4 or is_u64x4) {
2088 return numeric_array{_mm256_add_epi64(lhs.reg(), rhs.reg())};
2089 }
else if constexpr (is_i32x8 or is_u32x8) {
2090 return numeric_array{_mm256_add_epi32(lhs.reg(), rhs.reg())};
2091 }
else if constexpr (is_i16x16 or is_u16x16) {
2092 return numeric_array{_mm256_add_epi16(lhs.reg(), rhs.reg())};
2093 }
else if constexpr (is_i8x32 or is_u8x32) {
2094 return numeric_array{_mm256_add_epi8(lhs.reg(), rhs.reg())};
2097#if defined(HI_HAS_AVX)
2098 if constexpr (is_f64x4) {
2099 return numeric_array{_mm256_add_pd(lhs.reg(), rhs.reg())};
2100 }
else if constexpr (is_f32x8) {
2101 return numeric_array{_mm256_add_ps(lhs.reg(), rhs.reg())};
2104#if defined(HI_HAS_SSE2)
2105 if constexpr (is_f64x2) {
2106 return numeric_array{_mm_add_pd(lhs.reg(), rhs.reg())};
2107 }
else if constexpr (is_i64x2 or is_u64x2) {
2108 return numeric_array{_mm_add_epi64(lhs.reg(), rhs.reg())};
2109 }
else if constexpr (is_i32x4 or is_u32x4) {
2110 return numeric_array{_mm_add_epi32(lhs.reg(), rhs.reg())};
2111 }
else if constexpr (is_i16x8 or is_u16x8) {
2112 return numeric_array{_mm_add_epi16(lhs.reg(), rhs.reg())};
2113 }
else if constexpr (is_i8x16 or is_u8x16) {
2114 return numeric_array{_mm_add_epi8(lhs.reg(), rhs.reg())};
2117#if defined(HI_HAS_SSE)
2118 if constexpr (is_f32x4) {
2119 return numeric_array{_mm_add_ps(lhs.reg(), rhs.reg())};
2124 auto r = numeric_array{};
2126 r.v[i] = lhs.v[i] + rhs.v[i];
2131 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const &lhs, T
const &rhs)
noexcept
2133 return lhs + broadcast(rhs);
2136 [[nodiscard]]
friend constexpr numeric_array operator+(T
const &lhs, numeric_array
const &rhs)
noexcept
2138 return broadcast(lhs) + rhs;
2141 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2143 if (not std::is_constant_evaluated()) {
2144#if defined(HI_HAS_AVX2)
2145 if constexpr (is_i64x4 or is_u64x4) {
2146 return numeric_array{_mm256_sub_epi64(lhs.reg(), rhs.reg())};
2147 }
else if constexpr (is_i32x8 or is_u32x8) {
2148 return numeric_array{_mm256_sub_epi32(lhs.reg(), rhs.reg())};
2149 }
else if constexpr (is_i16x16 or is_u16x16) {
2150 return numeric_array{_mm256_sub_epi16(lhs.reg(), rhs.reg())};
2151 }
else if constexpr (is_i8x32 or is_u8x32) {
2152 return numeric_array{_mm256_sub_epi8(lhs.reg(), rhs.reg())};
2155#if defined(HI_HAS_AVX)
2156 if constexpr (is_f64x4) {
2157 return numeric_array{_mm256_sub_pd(lhs.reg(), rhs.reg())};
2158 }
else if constexpr (is_f32x8) {
2159 return numeric_array{_mm256_sub_ps(lhs.reg(), rhs.reg())};
2162#if defined(HI_HAS_SSE2)
2163 if constexpr (is_f64x2) {
2164 return numeric_array{_mm_sub_pd(lhs.reg(), rhs.reg())};
2165 }
else if constexpr (is_i64x2 or is_u64x2) {
2166 return numeric_array{_mm_sub_epi64(lhs.reg(), rhs.reg())};
2167 }
else if constexpr (is_i32x4 or is_u32x4) {
2168 return numeric_array{_mm_sub_epi32(lhs.reg(), rhs.reg())};
2169 }
else if constexpr (is_i16x8 or is_u16x8) {
2170 return numeric_array{_mm_sub_epi16(lhs.reg(), rhs.reg())};
2171 }
else if constexpr (is_i8x16 or is_u8x16) {
2172 return numeric_array{_mm_sub_epi8(lhs.reg(), rhs.reg())};
2175#if defined(HI_HAS_SSE)
2176 if constexpr (is_f32x4) {
2177 return numeric_array{_mm_sub_ps(lhs.reg(), rhs.reg())};
2182 auto r = numeric_array{};
2184 r.v[i] = lhs.v[i] - rhs.v[i];
2189 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const &lhs, T
const &rhs)
noexcept
2191 return lhs - broadcast(rhs);
2194 [[nodiscard]]
friend constexpr numeric_array operator-(T
const &lhs, numeric_array
const &rhs)
noexcept
2196 return broadcast(lhs) - rhs;
2199 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2201 if (not std::is_constant_evaluated()) {
2202#if defined(HI_HAS_AVX2)
2203 if constexpr (is_i32x8) {
2204 return numeric_array{_mm256_mul_epi32(lhs.reg(), rhs.reg())};
2205 }
else if constexpr (is_u32x8) {
2206 return numeric_array{_mm256_mul_epu32(lhs.reg(), rhs.reg())};
2209#if defined(HI_HAS_AVX)
2210 if constexpr (is_f64x4) {
2211 return numeric_array{_mm256_mul_pd(lhs.reg(), rhs.reg())};
2212 }
else if constexpr (is_f32x8) {
2213 return numeric_array{_mm256_mul_ps(lhs.reg(), rhs.reg())};
2216#if defined(HI_HAS_SSE4_1)
2217 if constexpr (is_i32x4) {
2218 return numeric_array{_mm_mul_epi32(lhs.reg(), rhs.reg())};
2219 }
else if constexpr (is_f16x4) {
2220 return numeric_array{numeric_array<float, 4>{lhs} * numeric_array<float, 4>{rhs}};
2223#if defined(HI_HAS_SSE2)
2224 if constexpr (is_f64x2) {
2225 return numeric_array{_mm_mul_pd(lhs.reg(), rhs.reg())};
2228#if defined(HI_HAS_SSE)
2229 if constexpr (is_f32x4) {
2230 return numeric_array{_mm_mul_ps(lhs.reg(), rhs.reg())};
2235 auto r = numeric_array{};
2237 r.v[i] = lhs.v[i] * rhs.v[i];
2242 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const &lhs, T
const &rhs)
noexcept
2244 return lhs * broadcast(rhs);
2247 [[nodiscard]]
friend constexpr numeric_array operator*(T
const &lhs, numeric_array
const &rhs)
noexcept
2249 return broadcast(lhs) * rhs;
2252 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2254 if (not std::is_constant_evaluated()) {
2255#if defined(HI_HAS_AVX)
2256 if constexpr (is_f64x4) {
2257 return numeric_array{_mm256_div_pd(lhs.reg(), rhs.reg())};
2258 }
else if constexpr (is_f32x8) {
2259 return numeric_array{_mm256_div_ps(lhs.reg(), rhs.reg())};
2262#if defined(HI_HAS_SSE2)
2263 if constexpr (is_f64x2) {
2264 return numeric_array{_mm_div_pd(lhs.reg(), rhs.reg())};
2267#if defined(HI_HAS_SSE)
2268 if constexpr (is_f32x4) {
2269 return numeric_array{_mm_div_ps(lhs.reg(), rhs.reg())};
2274 auto r = numeric_array{};
2276 r.v[i] = lhs.v[i] / rhs.v[i];
2281 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const &lhs, T
const &rhs)
noexcept
2283 return lhs / broadcast(rhs);
2286 [[nodiscard]]
friend constexpr numeric_array operator/(T
const &lhs, numeric_array
const &rhs)
noexcept
2288 return broadcast(lhs) / rhs;
2291 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2294 return lhs - (div_result * rhs);
2297 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const &lhs, T
const &rhs)
noexcept
2299 return lhs % broadcast(rhs);
2302 [[nodiscard]]
friend constexpr numeric_array operator%(T
const &lhs, numeric_array
const &rhs)
noexcept
2304 return broadcast(lhs) % rhs;
2307 [[nodiscard]]
friend constexpr numeric_array
min(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2309 if (not std::is_constant_evaluated()) {
2310#if defined(HI_HAS_AVX2)
2311 if constexpr (is_i32x8) {
2312 return numeric_array{_mm256_min_epi32(lhs.reg(), rhs.reg())};
2313 }
else if constexpr (is_u32x8) {
2314 return numeric_array{_mm256_min_epu32(lhs.reg(), rhs.reg())};
2315 }
else if constexpr (is_i16x16) {
2316 return numeric_array{_mm256_min_epi16(lhs.reg(), rhs.reg())};
2317 }
else if constexpr (is_u16x16) {
2318 return numeric_array{_mm256_min_epu16(lhs.reg(), rhs.reg())};
2319 }
else if constexpr (is_i8x32) {
2320 return numeric_array{_mm256_min_epi8(lhs.reg(), rhs.reg())};
2321 }
else if constexpr (is_u8x32) {
2322 return numeric_array{_mm256_min_epu8(lhs.reg(), rhs.reg())};
2325#if defined(HI_HAS_AVX)
2326 if constexpr (is_f64x4) {
2327 return numeric_array{_mm256_min_pd(lhs.reg(), rhs.reg())};
2328 }
else if constexpr (is_f32x8) {
2329 return numeric_array{_mm256_min_ps(lhs.reg(), rhs.reg())};
2332#if defined(HI_HAS_SSE4_1)
2333 if constexpr (is_i32x4) {
2334 return numeric_array{_mm_min_epi32(lhs.reg(), rhs.reg())};
2335 }
else if constexpr (is_u32x4) {
2336 return numeric_array{_mm_min_epu32(lhs.reg(), rhs.reg())};
2337 }
else if constexpr (is_u16x8) {
2338 return numeric_array{_mm_min_epu16(lhs.reg(), rhs.reg())};
2339 }
else if constexpr (is_i8x16) {
2340 return numeric_array{_mm_min_epi8(lhs.reg(), rhs.reg())};
2343#if defined(HI_HAS_SSE2)
2344 if constexpr (is_f64x2) {
2345 return numeric_array{_mm_min_pd(lhs.reg(), rhs.reg())};
2346 }
else if constexpr (is_i16x8) {
2347 return numeric_array{_mm_min_epi16(lhs.reg(), rhs.reg())};
2348 }
else if constexpr (is_u8x16) {
2349 return numeric_array{_mm_min_epu8(lhs.reg(), rhs.reg())};
2352#if defined(HI_HAS_SSE)
2353 if constexpr (is_f32x4) {
2354 return numeric_array{_mm_min_ps(lhs.reg(), rhs.reg())};
2359 auto r = numeric_array{};
2361 r.v[i] =
std::min(lhs.v[i], rhs.v[i]);
2366 [[nodiscard]]
friend constexpr numeric_array
max(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2368 if (not std::is_constant_evaluated()) {
2369#if defined(HI_HAS_AVX2)
2370 if constexpr (is_i32x8) {
2371 return numeric_array{_mm256_max_epi32(lhs.reg(), rhs.reg())};
2372 }
else if constexpr (is_u32x8) {
2373 return numeric_array{_mm256_max_epu32(lhs.reg(), rhs.reg())};
2374 }
else if constexpr (is_i16x16) {
2375 return numeric_array{_mm256_max_epi16(lhs.reg(), rhs.reg())};
2376 }
else if constexpr (is_u16x16) {
2377 return numeric_array{_mm256_max_epu16(lhs.reg(), rhs.reg())};
2378 }
else if constexpr (is_i8x32) {
2379 return numeric_array{_mm256_max_epi8(lhs.reg(), rhs.reg())};
2380 }
else if constexpr (is_u8x32) {
2381 return numeric_array{_mm256_max_epu8(lhs.reg(), rhs.reg())};
2384#if defined(HI_HAS_AVX)
2385 if constexpr (is_f64x4) {
2386 return numeric_array{_mm256_max_pd(lhs.reg(), rhs.reg())};
2387 }
else if constexpr (is_f32x8) {
2388 return numeric_array{_mm256_max_ps(lhs.reg(), rhs.reg())};
2391#if defined(HI_HAS_SSE4_1)
2392 if constexpr (is_i32x4) {
2393 return numeric_array{_mm_max_epi32(lhs.reg(), rhs.reg())};
2394 }
else if constexpr (is_u32x4) {
2395 return numeric_array{_mm_max_epu32(lhs.reg(), rhs.reg())};
2396 }
else if constexpr (is_u16x8) {
2397 return numeric_array{_mm_max_epu16(lhs.reg(), rhs.reg())};
2398 }
else if constexpr (is_i8x16) {
2399 return numeric_array{_mm_max_epi8(lhs.reg(), rhs.reg())};
2402#if defined(HI_HAS_SSE2)
2403 if constexpr (is_f64x2) {
2404 return numeric_array{_mm_max_pd(lhs.reg(), rhs.reg())};
2405 }
else if constexpr (is_i16x8) {
2406 return numeric_array{_mm_max_epi16(lhs.reg(), rhs.reg())};
2407 }
else if constexpr (is_u8x16) {
2408 return numeric_array{_mm_max_epu8(lhs.reg(), rhs.reg())};
2411#if defined(HI_HAS_SSE)
2412 if constexpr (is_f32x4) {
2413 return numeric_array{_mm_max_ps(lhs.reg(), rhs.reg())};
2418 auto r = numeric_array{};
2420 r.v[i] =
std::max(lhs.v[i], rhs.v[i]);
2425 [[nodiscard]]
friend constexpr numeric_array
2426 clamp(numeric_array
const &lhs, numeric_array
const &low, numeric_array
const &high)
noexcept
2428 return min(
max(lhs, low), high);
2431 [[nodiscard]]
friend constexpr numeric_array hadd(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2433 if (not std::is_constant_evaluated()) {
2434#if defined(HI_HAS_AVX2)
2435 if constexpr (is_i32x8 or is_u32x8) {
2436 return numeric_array{_mm256_hadd_epi32(lhs.reg(), rhs.reg())};
2437 }
else if constexpr (is_i16x16 or is_u16x16) {
2438 return numeric_array{_mm256_hadd_epi16(lhs.reg(), rhs.reg())};
2441#if defined(HI_HAS_AVX)
2442 if constexpr (is_f64x4) {
2443 return numeric_array{_mm256_hadd_pd(lhs.reg(), rhs.reg())};
2444 }
else if constexpr (is_f32x8) {
2445 return numeric_array{_mm256_hadd_ps(lhs.reg(), rhs.reg())};
2448#if defined(HI_HAS_SSSE3)
2449 if constexpr (is_i32x4 or is_u32x4) {
2450 return numeric_array{_mm_hadd_epi32(lhs.reg(), rhs.reg())};
2451 }
else if constexpr (is_i16x8 or is_u16x8) {
2452 return numeric_array{_mm_hadd_epi16(lhs.reg(), rhs.reg())};
2455#if defined(HI_HAS_SSE3)
2456 if constexpr (is_f64x2) {
2457 return numeric_array{_mm_hadd_pd(lhs.reg(), rhs.reg())};
2458 }
else if constexpr (is_f32x4) {
2459 return numeric_array{_mm_hadd_ps(lhs.reg(), rhs.reg())};
2464 hi_axiom(N % 2 == 0);
2466 auto r = numeric_array{};
2470 while (src_i != N) {
2471 auto tmp = lhs[src_i++];
2472 tmp += lhs[src_i++];
2477 while (src_i != N) {
2478 auto tmp = rhs[src_i++];
2479 tmp += rhs[src_i++];
2485 [[nodiscard]]
friend constexpr numeric_array hsub(numeric_array
const &lhs, numeric_array
const &rhs)
noexcept
2487 if (not std::is_constant_evaluated()) {
2488#if defined(HI_HAS_AVX2)
2489 if constexpr (is_i32x8 or is_u32x8) {
2490 return numeric_array{_mm256_hsub_epi32(lhs.reg(), rhs.reg())};
2491 }
else if constexpr (is_i16x16 or is_u16x16) {
2492 return numeric_array{_mm256_hsub_epi16(lhs.reg(), rhs.reg())};
2495#if defined(HI_HAS_AVX)
2496 if constexpr (is_f64x4) {
2497 return numeric_array{_mm256_hsub_pd(lhs.reg(), rhs.reg())};
2498 }
else if constexpr (is_f32x8) {
2499 return numeric_array{_mm256_hsub_ps(lhs.reg(), rhs.reg())};
2502#if defined(HI_HAS_SSSE3)
2503 if constexpr (is_i32x4 or is_u32x4) {
2504 return numeric_array{_mm_hsub_epi32(lhs.reg(), rhs.reg())};
2505 }
else if constexpr (is_i16x8 or is_u16x8) {
2506 return numeric_array{_mm_hsub_epi16(lhs.reg(), rhs.reg())};
2509#if defined(HI_HAS_SSE3)
2510 if constexpr (is_f64x2) {
2511 return numeric_array{_mm_hsub_pd(lhs.reg(), rhs.reg())};
2512 }
else if constexpr (is_f32x4) {
2513 return numeric_array{_mm_hsub_ps(lhs.reg(), rhs.reg())};
2518 hi_axiom(N % 2 == 0);
2520 auto r = numeric_array{};
2524 while (src_i != N) {
2525 auto tmp = lhs[src_i++];
2526 tmp -= lhs[src_i++];
2531 while (src_i != N) {
2532 auto tmp = rhs[src_i++];
2533 tmp -= rhs[src_i++];
2543 template<std::
size_t Mask>
2547 return lhs + neg<Mask ^ not_mask>(rhs);
2554 hi_axiom(rhs.z() == 0.0f && rhs.is_vector());
2562 return normalize<0b0011>(cross_2D(rhs));
2570 hilet tmp1 = rhs.yxwz();
2571 hilet tmp2 = lhs * tmp1;
2572 hilet tmp3 = hsub(tmp2, tmp2);
2573 return get<0>(tmp3);
2583 hilet a_left = lhs.yzxw();
2584 hilet b_left = rhs.zxyw();
2585 hilet left = a_left * b_left;
2587 hilet a_right = lhs.zxyw();
2588 hilet b_right = rhs.yzxw();
2589 hilet right = a_right * b_right;
2590 return left - right;
2593 [[nodiscard]]
static constexpr numeric_array byte_srl_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
2595 static_assert(std::endian::native == std::endian::little);
2597 auto r = numeric_array{};
2598 for (
auto i = 0; i != 16; ++i) {
2599 if ((i + rhs) < 16) {
2600 r[i] = narrow_cast<int8_t>(i + rhs);
2609 [[nodiscard]]
static constexpr numeric_array byte_sll_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
2611 static_assert(std::endian::native == std::endian::little);
2613 auto r = numeric_array{};
2614 for (
auto i = 0; i != 16; ++i) {
2615 if ((i - rhs) >= 0) {
2616 r[i] = narrow_cast<int8_t>(i - rhs);
2628 requires(std::is_integral_v<value_type>)
2630 if (!std::is_constant_evaluated()) {
2631#if defined(HI_HAS_SSSE3)
2632 if constexpr (is_i8x16 or is_u8x16) {
2633 return numeric_array{_mm_shuffle_epi8(lhs.reg(), rhs.reg())};
2641 r[i] = lhs[rhs[i] & 0xf];
2654 hi_axiom(p1.is_point());
2655 hi_axiom(p2.is_point());
2656 return (p1 + p2) * 0.5f;
2663 hi_axiom(p.is_point());
2664 hi_axiom(anchor.is_point());
2665 return anchor - (p - anchor);
2668 template<
typename... Columns>
2671 static_assert(
sizeof...(Columns) == N,
"Can only transpose square matrices");
2673 if (not std::is_constant_evaluated()) {
2674#if defined(HI_HAS_SSE)
2675 if constexpr (is_f32x4 and
sizeof...(Columns) == 4) {
2677 _MM_TRANSPOSE4_PS(std::get<0>(tmp), std::get<1>(tmp), std::get<2>(tmp), std::get<3>(tmp));
2679 numeric_array{get<0>(tmp)},
2680 numeric_array{get<1>(tmp)},
2681 numeric_array{get<2>(tmp)},
2682 numeric_array{get<3>(tmp)}};
2688 transpose_detail<0, Columns...>(columns..., r);
2692 [[nodiscard]]
constexpr friend numeric_array composit(numeric_array
const &under, numeric_array
const &over)
noexcept
2693 requires(N == 4 && std::is_floating_point_v<T>)
2695 if (over.is_transparent()) {
2698 if (over.is_opaque()) {
2702 hilet over_alpha = over.wwww();
2703 hilet under_alpha = under.wwww();
2705 hilet over_color = over.xyz1();
2706 hilet under_color = under.xyz1();
2708 hilet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
2710 return output_color / output_color.www1();
2713 [[nodiscard]]
constexpr friend numeric_array composit(numeric_array
const &under, numeric_array
const &over)
noexcept
2716 return numeric_array{composit(
static_cast<numeric_array<float, 4>
>(under),
static_cast<numeric_array<float, 4>
>(over))};
2728 r += std::format(
"{}", rhs[i]);
2743 template<std::
size_t FromElement, std::
size_t ToElement>
2748 if (!std::is_constant_evaluated()) {
2749#if defined(HI_HAS_SSE4_1)
2750 if constexpr (is_f32x4) {
2751 constexpr uint8_t insert_mask =
static_cast<uint8_t
>((FromElement << 6) | (ToElement << 4));
2752 return numeric_array{_mm_insert_ps(lhs.reg(), rhs.reg(), insert_mask)};
2754 }
else if constexpr (is_i32x4 or is_u32x4) {
2755 constexpr uint8_t insert_mask =
static_cast<uint8_t
>((FromElement << 6) | (ToElement << 4));
2757 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg()), insert_mask))};
2760#if defined(HI_HAS_SSE2)
2761 if constexpr (is_f64x2) {
2762 if constexpr (FromElement == 0 and ToElement == 0) {
2763 return numeric_array{_mm_shuffle_pd(rhs.reg(), lhs.reg(), 0b10)};
2764 }
else if constexpr (FromElement == 1 and ToElement == 0) {
2765 return numeric_array{_mm_shuffle_pd(rhs.reg(), lhs.reg(), 0b11)};
2766 }
else if constexpr (FromElement == 0 and ToElement == 1) {
2767 return numeric_array{_mm_shuffle_pd(lhs.reg(), rhs.reg(), 0b00)};
2769 return numeric_array{_mm_shuffle_pd(lhs.reg(), rhs.reg(), 0b10)};
2772 }
else if constexpr (is_i64x2 or is_u64x2) {
2773 hilet lhs_ = _mm_castsi128_pd(lhs.reg());
2774 hilet rhs_ = _mm_castsi128_pd(rhs.reg());
2776 if constexpr (FromElement == 0 and ToElement == 0) {
2777 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(rhs_, lhs_, 0b10))};
2778 }
else if constexpr (FromElement == 1 and ToElement == 0) {
2779 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(rhs_, lhs_, 0b11))};
2780 }
else if constexpr (FromElement == 0 and ToElement == 1) {
2781 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(lhs_, rhs_, 0b00))};
2783 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(lhs_, rhs_, 0b10))};
2790 r[i] = (i == ToElement) ? rhs[FromElement] : lhs[i];
2806 static_assert(
sizeof...(Elements) <= N);
2808 if (!std::is_constant_evaluated()) {
2809#if defined(HI_HAS_AVX)
2810 if constexpr (is_f64x2) {
2812 }
else if constexpr (is_f32x4) {
2814 }
else if constexpr (is_i64x2 or is_u64x2) {
2815 return numeric_array{_mm_swizzle_epi64<Elements...>(reg())};
2816 }
else if constexpr (is_i32x4 or is_u32x4) {
2817 return numeric_array{_mm_swizzle_epi32<Elements...>(reg())};
2823 swizzle_detail<0, Elements...>(r);
2827#define SWIZZLE(swizzle_name, D, ...) \
2828 [[nodiscard]] constexpr numeric_array swizzle_name() const noexcept requires(D == N) \
2830 return swizzle<__VA_ARGS__>(); \
2833#define SWIZZLE_4D_GEN1(name, ...) \
2834 SWIZZLE(name##0, 4, __VA_ARGS__, get_zero) \
2835 SWIZZLE(name##1, 4, __VA_ARGS__, get_one) \
2836 SWIZZLE(name##x, 4, __VA_ARGS__, 0) \
2837 SWIZZLE(name##y, 4, __VA_ARGS__, 1) \
2838 SWIZZLE(name##z, 4, __VA_ARGS__, 2) \
2839 SWIZZLE(name##w, 4, __VA_ARGS__, 3)
2841#define SWIZZLE_4D_GEN2(name, ...) \
2842 SWIZZLE_4D_GEN1(name##0, __VA_ARGS__, get_zero) \
2843 SWIZZLE_4D_GEN1(name##1, __VA_ARGS__, get_one) \
2844 SWIZZLE_4D_GEN1(name##x, __VA_ARGS__, 0) \
2845 SWIZZLE_4D_GEN1(name##y, __VA_ARGS__, 1) \
2846 SWIZZLE_4D_GEN1(name##z, __VA_ARGS__, 2) \
2847 SWIZZLE_4D_GEN1(name##w, __VA_ARGS__, 3)
2849#define SWIZZLE_4D_GEN3(name, ...) \
2850 SWIZZLE_4D_GEN2(name##0, __VA_ARGS__, get_zero) \
2851 SWIZZLE_4D_GEN2(name##1, __VA_ARGS__, get_one) \
2852 SWIZZLE_4D_GEN2(name##x, __VA_ARGS__, 0) \
2853 SWIZZLE_4D_GEN2(name##y, __VA_ARGS__, 1) \
2854 SWIZZLE_4D_GEN2(name##z, __VA_ARGS__, 2) \
2855 SWIZZLE_4D_GEN2(name##w, __VA_ARGS__, 3)
2857 SWIZZLE_4D_GEN3(_0, get_zero)
2858 SWIZZLE_4D_GEN3(_1, get_one)
2859 SWIZZLE_4D_GEN3(x, 0)
2860 SWIZZLE_4D_GEN3(y, 1)
2861 SWIZZLE_4D_GEN3(z, 2)
2862 SWIZZLE_4D_GEN3(w, 3)
2864#define SWIZZLE_3D_GEN1(name, ...) \
2865 SWIZZLE(name##0, 3, __VA_ARGS__, get_zero) \
2866 SWIZZLE(name##1, 3, __VA_ARGS__, get_one) \
2867 SWIZZLE(name##x, 3, __VA_ARGS__, 0) \
2868 SWIZZLE(name##y, 3, __VA_ARGS__, 1) \
2869 SWIZZLE(name##z, 3, __VA_ARGS__, 2)
2871#define SWIZZLE_3D_GEN2(name, ...) \
2872 SWIZZLE_3D_GEN1(name##0, __VA_ARGS__, get_zero) \
2873 SWIZZLE_3D_GEN1(name##1, __VA_ARGS__, get_one) \
2874 SWIZZLE_3D_GEN1(name##x, __VA_ARGS__, 0) \
2875 SWIZZLE_3D_GEN1(name##y, __VA_ARGS__, 1) \
2876 SWIZZLE_3D_GEN1(name##z, __VA_ARGS__, 2)
2878 SWIZZLE_3D_GEN2(_0, get_zero)
2879 SWIZZLE_3D_GEN2(_1, get_one)
2880 SWIZZLE_3D_GEN2(x, 0)
2881 SWIZZLE_3D_GEN2(y, 1)
2882 SWIZZLE_3D_GEN2(z, 2)
2884#define SWIZZLE_2D_GEN1(name, ...) \
2885 SWIZZLE(name##0, 2, __VA_ARGS__, get_zero) \
2886 SWIZZLE(name##1, 2, __VA_ARGS__, get_one) \
2887 SWIZZLE(name##x, 2, __VA_ARGS__, 0) \
2888 SWIZZLE(name##y, 2, __VA_ARGS__, 1)
2890 SWIZZLE_2D_GEN1(_0, get_zero)
2891 SWIZZLE_2D_GEN1(_1, get_one)
2892 SWIZZLE_2D_GEN1(x, 0)
2893 SWIZZLE_2D_GEN1(y, 1)
2896#undef SWIZZLE_4D_GEN1
2897#undef SWIZZLE_4D_GEN2
2898#undef SWIZZLE_4D_GEN3
2899#undef SWIZZLE_3D_GEN1
2900#undef SWIZZLE_3D_GEN2
2901#undef SWIZZLE_2D_GEN1
2903 template<
int I,
typename First,
typename... Rest>
2910 if constexpr (
sizeof...(Rest) != 0) {
2911 transpose_detail<I + 1, Rest...>(rest..., r);
2916 constexpr void swizzle_detail(numeric_array &r)
const noexcept
2918 static_assert(I < narrow_cast<ssize_t>(N));
2919 static_assert(FirstElement >= -2 && FirstElement < narrow_cast<ssize_t>(N),
"Index out of bounds");
2921 get<I>(r) = get<FirstElement>(*
this);
2922 if constexpr (
sizeof...(RestElements) != 0) {
2923 swizzle_detail<I + 1, RestElements...>(r);