61 using value_type =
typename container_type::value_type;
62 using size_type =
typename container_type::size_type;
63 using difference_type =
typename container_type::difference_type;
64 using reference =
typename container_type::reference;
65 using const_reference =
typename container_type::const_reference;
66 using pointer =
typename container_type::pointer;
67 using const_pointer =
typename container_type::const_pointer;
68 using iterator =
typename container_type::iterator;
69 using const_iterator =
typename container_type::const_iterator;
71 constexpr static bool is_i8x1 = std::is_same_v<T, int8_t> && N == 1;
72 constexpr static bool is_i8x2 = std::is_same_v<T, int8_t> && N == 2;
73 constexpr static bool is_i8x4 = std::is_same_v<T, int8_t> && N == 4;
74 constexpr static bool is_i8x8 = std::is_same_v<T, int8_t> && N == 8;
75 constexpr static bool is_i8x16 = std::is_same_v<T, int8_t> && N == 16;
76 constexpr static bool is_i8x32 = std::is_same_v<T, int8_t> && N == 32;
77 constexpr static bool is_i8x64 = std::is_same_v<T, int8_t> && N == 64;
78 constexpr static bool is_u8x1 = std::is_same_v<T, uint8_t> && N == 1;
79 constexpr static bool is_u8x2 = std::is_same_v<T, uint8_t> && N == 2;
80 constexpr static bool is_u8x4 = std::is_same_v<T, uint8_t> && N == 4;
81 constexpr static bool is_u8x8 = std::is_same_v<T, uint8_t> && N == 8;
82 constexpr static bool is_u8x16 = std::is_same_v<T, uint8_t> && N == 16;
83 constexpr static bool is_u8x32 = std::is_same_v<T, uint8_t> && N == 32;
84 constexpr static bool is_u8x64 = std::is_same_v<T, uint8_t> && N == 64;
86 constexpr static bool is_i16x1 = std::is_same_v<T, int16_t> && N == 1;
87 constexpr static bool is_i16x2 = std::is_same_v<T, int16_t> && N == 2;
88 constexpr static bool is_i16x4 = std::is_same_v<T, int16_t> && N == 4;
89 constexpr static bool is_i16x8 = std::is_same_v<T, int16_t> && N == 8;
90 constexpr static bool is_i16x16 = std::is_same_v<T, int16_t> && N == 16;
91 constexpr static bool is_i16x32 = std::is_same_v<T, int16_t> && N == 32;
92 constexpr static bool is_u16x1 = std::is_same_v<T, uint16_t> && N == 1;
93 constexpr static bool is_u16x2 = std::is_same_v<T, uint16_t> && N == 2;
94 constexpr static bool is_u16x4 = std::is_same_v<T, uint16_t> && N == 4;
95 constexpr static bool is_u16x8 = std::is_same_v<T, uint16_t> && N == 8;
96 constexpr static bool is_u16x16 = std::is_same_v<T, uint16_t> && N == 16;
97 constexpr static bool is_u16x32 = std::is_same_v<T, uint16_t> && N == 32;
98 constexpr static bool is_f16x4 = std::is_same_v<T, float16> && N == 4;
100 constexpr static bool is_i32x1 = std::is_same_v<T, int32_t> && N == 1;
101 constexpr static bool is_i32x2 = std::is_same_v<T, int32_t> && N == 2;
102 constexpr static bool is_i32x4 = std::is_same_v<T, int32_t> && N == 4;
103 constexpr static bool is_i32x8 = std::is_same_v<T, int32_t> && N == 8;
104 constexpr static bool is_i32x16 = std::is_same_v<T, int32_t> && N == 16;
105 constexpr static bool is_u32x1 = std::is_same_v<T, uint32_t> && N == 1;
106 constexpr static bool is_u32x2 = std::is_same_v<T, uint32_t> && N == 2;
107 constexpr static bool is_u32x4 = std::is_same_v<T, uint32_t> && N == 4;
108 constexpr static bool is_u32x8 = std::is_same_v<T, uint32_t> && N == 8;
109 constexpr static bool is_u32x16 = std::is_same_v<T, uint32_t> && N == 16;
110 constexpr static bool is_f32x1 = std::is_same_v<T, float> && N == 1;
111 constexpr static bool is_f32x2 = std::is_same_v<T, float> && N == 2;
112 constexpr static bool is_f32x4 = std::is_same_v<T, float> && N == 4;
113 constexpr static bool is_f32x8 = std::is_same_v<T, float> && N == 8;
114 constexpr static bool is_f32x16 = std::is_same_v<T, float> && N == 16;
116 constexpr static bool is_i64x1 = std::is_same_v<T, int64_t> && N == 1;
117 constexpr static bool is_i64x2 = std::is_same_v<T, int64_t> && N == 2;
118 constexpr static bool is_i64x4 = std::is_same_v<T, int64_t> && N == 4;
119 constexpr static bool is_i64x8 = std::is_same_v<T, int64_t> && N == 8;
120 constexpr static bool is_u64x1 = std::is_same_v<T, uint64_t> && N == 1;
121 constexpr static bool is_u64x2 = std::is_same_v<T, uint64_t> && N == 2;
122 constexpr static bool is_u64x4 = std::is_same_v<T, uint64_t> && N == 4;
123 constexpr static bool is_u64x8 = std::is_same_v<T, uint64_t> && N == 8;
124 constexpr static bool is_f64x1 = std::is_same_v<T, double> && N == 1;
125 constexpr static bool is_f64x2 = std::is_same_v<T, double> && N == 2;
126 constexpr static bool is_f64x4 = std::is_same_v<T, double> && N == 4;
127 constexpr static bool is_f64x8 = std::is_same_v<T, double> && N == 8;
133 if (not std::is_constant_evaluated()) {
134#if defined(HI_HAS_AVX)
135 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x16 or is_u16x16 or is_i8x32 or is_u8x32) {
136 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), _mm256_setzero_si256());
138 }
else if constexpr (is_f64x4) {
139 _mm256_storeu_pd(
reinterpret_cast<__m256d *
>(v.
data()), _mm256_setzero_pd());
141 }
else if constexpr (is_f32x8) {
142 _mm256_storeu_ps(v.
data(), _mm256_setzero_ps());
146#if defined(HI_HAS_SSE2)
147 if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_u8x16) {
148 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), _mm_setzero_si128());
150 }
else if constexpr (is_f64x2) {
151 _mm_storeu_pd(
reinterpret_cast<__m128d *
>(v.
data()), _mm_setzero_pd());
155#if defined(HI_HAS_SSE)
156 if constexpr (is_f32x4) {
157 _mm_storeu_ps(v.
data(), _mm_setzero_ps());
163 for (
auto i = 0_uz; i != N; ++i) {
173 template<numeric_limited U, std::
size_t M>
176 if (!std::is_constant_evaluated()) {
177#if defined(HI_HAS_AVX)
178 if constexpr (is_f64x4 and other.is_f32x4) {
181 }
else if constexpr (is_f64x4 and other.is_i32x4) {
184 }
else if constexpr (is_f32x4 and other.is_f64x4) {
187 }
else if constexpr (is_i32x4 and other.is_f64x4) {
190 }
else if constexpr (is_i32x8 and other.is_f32x8) {
193 }
else if constexpr (is_f32x8 and other.is_i32x8) {
198#if defined(HI_HAS_SSE4_1)
199 if constexpr (is_u8x4 and other.is_f32x4) {
200 hilet i32_4 = _mm_cvtps_epi32(other.reg());
201 hilet i16_8 = _mm_packs_epi32(i32_4, _mm_setzero_si128());
202 hilet u8_16 = _mm_packus_epi16(i16_8, _mm_setzero_si128());
205 }
else if constexpr (is_i64x4 and other.is_i32x4) {
208 }
else if constexpr (is_i64x4 and other.is_i16x8) {
211 }
else if constexpr (is_i32x4 and other.is_i16x8) {
214 }
else if constexpr (is_i64x2 and other.is_i8x16) {
217 }
else if constexpr (is_i32x4 and other.is_i8x16) {
220 }
else if constexpr (is_i16x8 and other.is_i8x16) {
223 }
else if constexpr (is_f16x4 and other.is_f32x4) {
226 }
else if constexpr (is_f32x4 and other.is_f16x4) {
232#if defined(HI_HAS_SSE2)
233 if constexpr (is_f64x2 and other.is_i32x4) {
236 }
else if constexpr (is_f32x4 and other.is_i32x4) {
239 }
else if constexpr (is_i32x4 and other.is_f32x4) {
248 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
250 v[i] =
static_cast<value_type
>(
std::round(other[i]));
252 v[i] =
static_cast<value_type
>(other[i]);
260 template<numeric_limited U, std::
size_t M>
265 if (!std::is_constant_evaluated()) {
266#if defined(HI_HAS_AVX)
267 if constexpr (is_f64x4 and other1.is_f64x2 and other2.is_f64x2) {
268 v =
numeric_array{_mm256_set_m128d(other2.reg(), other1.reg())};
269 }
else if constexpr (is_f32x8 and other1.is_f32x4 and other2.is_f32x4) {
270 v =
numeric_array{_mm256_set_m128(other2.reg(), other1.reg())};
271 }
else if constexpr (
272 std::is_integral_v<T> and std::is_integral_v<U> and (
sizeof(T) * N == 32) and (
sizeof(U) * M == 16)) {
273 v =
numeric_array{_mm256_set_m128i(other2.reg(), other1.reg())};
276#if defined(HI_HAS_SSE4_1)
277 if constexpr (is_u16x8 and other1.is_u32x4 and other2.is_u32x4) {
278 v =
numeric_array{_mm_packus_epu32(other2.reg(), other1.reg())};
281#if defined(HI_HAS_SSE2)
282 if constexpr (is_i16x8 and other1.is_i32x4 and other2.is_i32x4) {
283 v =
numeric_array{_mm_packs_epi32(other2.reg(), other1.reg())};
284 }
else if constexpr (is_i8x16 and other1.is_i16x8 and other2.is_i16x8) {
285 v =
numeric_array{_mm_packs_epi16(other2.reg(), other1.reg())};
286 }
else if constexpr (is_u8x16 and other1.is_u16x8 and other2.is_u16x8) {
287 v =
numeric_array{_mm_packus_epu16(other2.reg(), other1.reg())};
294 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
296 v[i] =
static_cast<value_type
>(
std::round(other1[i]));
298 v[i] =
static_cast<value_type
>(other1[i]);
300 }
else if (i < M * 2) {
301 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
303 v[i] =
static_cast<value_type
>(
std::round(other2[i - M]));
305 v[i] =
static_cast<value_type
>(other2[i - M]);
313 [[nodiscard]]
constexpr explicit numeric_array(T
const& x) noexcept : v()
315 if (not std::is_constant_evaluated()) {
316#if defined(HI_HAS_SSE)
317 if constexpr (is_f32x4) {
326 [[nodiscard]]
constexpr explicit numeric_array(T
const& x, T
const& y)
noexcept requires(N >= 2) : v()
328 if (not std::is_constant_evaluated()) {
329#if defined(HI_HAS_SSE2)
330 if constexpr (is_i32x4) {
340 [[nodiscard]]
constexpr explicit numeric_array(T
const& x, T
const& y, T
const& z)
noexcept requires(N >= 3) : v()
342 if (not std::is_constant_evaluated()) {
343#if defined(HI_HAS_SSE2)
344 if constexpr (is_i32x4) {
355 [[nodiscard]]
constexpr explicit numeric_array(T
const& x, T
const& y, T
const& z, T
const& w)
noexcept requires(N >= 4) : v()
357 if (not std::is_constant_evaluated()) {
358#if defined(HI_HAS_SSE2)
359 if constexpr (is_i32x4) {
371 [[nodiscard]]
static constexpr numeric_array broadcast(T rhs)
noexcept
373 if (not std::is_constant_evaluated()) {
374#if defined(HI_HAS_AVX)
375 if constexpr (is_f64x4) {
377 }
else if constexpr (is_f32x8) {
379 }
else if constexpr (is_i64x4) {
381 }
else if constexpr (is_i32x8) {
383 }
else if constexpr (is_i16x16) {
385 }
else if constexpr (is_i8x32) {
389#if defined(HI_HAS_SSE2)
390 if constexpr (is_f64x2) {
392 }
else if constexpr (is_i64x2) {
394 }
else if constexpr (is_i32x4) {
396 }
else if constexpr (is_i16x8) {
398 }
else if constexpr (is_i8x16) {
402#if defined(HI_HAS_SSE)
403 if constexpr (is_f32x4) {
415 [[nodiscard]]
static constexpr numeric_array epsilon()
noexcept
417 if constexpr (std::is_floating_point_v<T>) {
420 return broadcast(T{0});
437#if defined(HI_HAS_SSE2)
438 [[nodiscard]] __m128i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
440 return _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(v.
data()));
443 [[nodiscard]] __m128i reg()
const noexcept requires(is_f16x4)
445 return _mm_set_epi16(0, 0, 0, 0, get<3>(v).get(), get<2>(v).get(), get<1>(v).get(), get<0>(v).get());
449#if defined(HI_HAS_SSE2)
450 [[nodiscard]] __m128 reg()
const noexcept requires(is_f32x4)
452 return _mm_loadu_ps(v.
data());
456#if defined(HI_HAS_SSE2)
457 [[nodiscard]] __m128d reg()
const noexcept requires(is_f64x2)
459 return _mm_loadu_pd(v.
data());
463#if defined(HI_HAS_SSE2)
464 [[nodiscard]]
explicit numeric_array(__m128i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
466 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
470#if defined(HI_HAS_SSE4_1)
471 [[nodiscard]]
explicit numeric_array(__m128i
const& rhs)
noexcept requires(is_f16x4) :
472 v(std::bit_cast<
decltype(v)>(_mm_extract_epi64(rhs, 0)))
477#if defined(HI_HAS_SSE4_1)
478 [[nodiscard]]
explicit numeric_array(__m128i
const& rhs)
noexcept requires(is_u8x4) :
479 v(std::bit_cast<
decltype(v)>(_mm_extract_epi32(rhs, 0)))
484#if defined(HI_HAS_SSE2)
485 [[nodiscard]]
explicit numeric_array(__m128
const& rhs)
noexcept requires(is_f32x4)
487 _mm_storeu_ps(v.
data(), rhs);
491#if defined(HI_HAS_SSE2)
492 [[nodiscard]]
explicit numeric_array(__m128d
const& rhs)
noexcept requires(is_f64x2)
494 _mm_storeu_pd(v.
data(), rhs);
498#if defined(HI_HAS_SSE2)
499 numeric_array& operator=(__m128i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
501 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
506#if defined(HI_HAS_SSE2)
507 numeric_array& operator=(__m128
const& rhs)
noexcept requires(is_f32x4)
509 _mm_storeu_ps(v.
data(), rhs);
514#if defined(HI_HAS_SSE2)
515 numeric_array& operator=(__m128d
const& rhs)
noexcept requires(is_f64x2)
517 _mm_storeu_pd(v.
data(), rhs);
522#if defined(HI_HAS_AVX)
523 [[nodiscard]] __m256i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
525 return _mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(v.
data()));
529#if defined(HI_HAS_AVX)
530 [[nodiscard]] __m256 reg()
const noexcept requires(is_f32x8)
532 return _mm256_loadu_ps(v.
data());
536#if defined(HI_HAS_AVX)
537 [[nodiscard]] __m256d reg()
const noexcept requires(is_f64x4)
539 return _mm256_loadu_pd(v.
data());
543#if defined(HI_HAS_AVX)
544 [[nodiscard]]
explicit numeric_array(__m256i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
546 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
550#if defined(HI_HAS_AVX)
551 [[nodiscard]]
explicit numeric_array(__m256
const& rhs)
noexcept requires(is_f32x8)
553 _mm256_storeu_ps(v.
data(), rhs);
557#if defined(HI_HAS_AVX)
558 [[nodiscard]]
explicit numeric_array(__m256d
const& rhs)
noexcept requires(is_f64x4)
560 _mm256_storeu_pd(v.
data(), rhs);
564#if defined(HI_HAS_AVX)
565 numeric_array& operator=(__m256i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
567 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
572#if defined(HI_HAS_AVX)
573 numeric_array& operator=(__m256
const& rhs)
noexcept requires(is_f32x8)
575 _mm256_storeu_ps(v.
data(), rhs);
580#if defined(HI_HAS_AVX)
581 numeric_array& operator=(__m256d
const& rhs)
noexcept requires(is_f64x4)
583 _mm256_storeu_pd(v.
data(), rhs);
588 template<
typename Other>
589 [[nodiscard]]
constexpr friend Other bit_cast(
numeric_array const& rhs)
noexcept
592 if (not std::is_constant_evaluated()) {
593#if defined(HI_HAS_SSE2)
594 if constexpr (Other::is_f32x4 and std::is_integral_v<T>) {
595 return Other{_mm_castsi128_ps(rhs.reg())};
596 }
else if constexpr (Other::is_f32x4 and is_f64x2) {
597 return Other{_mm_castpd_ps(rhs.reg())};
598 }
else if constexpr (Other::is_f64x2 and std::is_integral_v<T>) {
599 return Other{_mm_castsi128_pd(rhs.reg())};
600 }
else if constexpr (Other::is_f64x2 and is_f32x4) {
601 return Other{_mm_castps_pd(rhs.reg())};
602 }
else if constexpr (std::is_integral_v<typename Other::value_type> and is_f32x4) {
603 return Other{_mm_castps_si128(rhs.reg())};
604 }
else if constexpr (std::is_integral_v<typename Other::value_type> and is_f64x2) {
605 return Other{_mm_castpd_si128(rhs.reg())};
606 }
else if constexpr (std::is_integral_v<typename Other::value_type> and std::is_integral_v<T>) {
607 return Other{rhs.reg()};
611 return std::bit_cast<Other>(rhs);
618 if (not std::is_constant_evaluated()) {
619#if defined(HI_HAS_SSE2)
620 if constexpr (is_f64x2) {
622 }
else if constexpr (is_i64x2 or is_u64x2) {
624 }
else if constexpr (is_i32x4 or is_u32x4) {
626 }
else if constexpr (is_i16x8 or is_u16x8) {
628 }
else if constexpr (is_i8x16 or is_u8x16) {
632#if defined(HI_HAS_SSE)
633 if constexpr (is_f32x4) {
641 r[i] = (i % 2 == 0) ? a[i / 2] : b[i / 2];
650 template<std::
size_t S>
680 template<std::
size_t S>
681 constexpr void store(std::byte *ptr)
const noexcept
689 constexpr void store(std::byte *ptr)
const noexcept
691 store<sizeof(*this)>(ptr);
697 constexpr explicit operator bool() const noexcept
699 if constexpr (std::is_floating_point_v<T>) {
700 hilet ep = epsilon();
702 return to_bool(gt(-ep, *
this) | gt(*
this, ep));
704 return to_bool(ne(*
this, T{0}));
708 [[nodiscard]]
constexpr T
const& operator[](
std::size_t i)
const noexcept
710 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
715 [[nodiscard]]
constexpr T& operator[](
std::size_t i)
noexcept
717 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
722 [[nodiscard]]
constexpr reference front() noexcept
727 [[nodiscard]]
constexpr const_reference front() const noexcept
732 [[nodiscard]]
constexpr reference back() noexcept
737 [[nodiscard]]
constexpr const_reference back() const noexcept
742 [[nodiscard]]
constexpr pointer data() noexcept
747 [[nodiscard]]
constexpr const_pointer data() const noexcept
752 [[nodiscard]]
constexpr iterator
begin() noexcept
757 [[nodiscard]]
constexpr const_iterator
begin() const noexcept
762 [[nodiscard]]
constexpr const_iterator cbegin() const noexcept
767 [[nodiscard]]
constexpr iterator
end() noexcept
772 [[nodiscard]]
constexpr const_iterator
end() const noexcept
777 [[nodiscard]]
constexpr const_iterator cend() const noexcept
782 [[nodiscard]]
constexpr bool empty() const noexcept
787 [[nodiscard]]
constexpr size_type size() const noexcept
792 [[nodiscard]]
constexpr size_type max_size() const noexcept
797 constexpr bool is_point() const noexcept
799 return v.
back() != T{};
802 constexpr bool is_vector() const noexcept
804 return v.
back() == T{};
807 constexpr bool is_opaque() const noexcept
812 constexpr bool is_transparent() const noexcept
817 [[nodiscard]]
constexpr T
const& x() const noexcept requires(N >= 1)
819 return std::get<0>(v);
822 [[nodiscard]]
constexpr T
const& y() const noexcept requires(N >= 2)
824 return std::get<1>(v);
827 [[nodiscard]]
constexpr T
const& z() const noexcept requires(N >= 3)
829 return std::get<2>(v);
832 [[nodiscard]]
constexpr T
const& w() const noexcept requires(N >= 4)
834 return std::get<3>(v);
837 [[nodiscard]]
constexpr T& x() noexcept requires(N >= 1)
839 return std::get<0>(v);
842 [[nodiscard]]
constexpr T& y() noexcept requires(N >= 2)
844 return std::get<1>(v);
847 [[nodiscard]]
constexpr T& z() noexcept requires(N >= 3)
849 return std::get<2>(v);
852 [[nodiscard]]
constexpr T& w() noexcept requires(N >= 4)
854 return std::get<3>(v);
857 [[nodiscard]]
constexpr T
const& r() const noexcept requires(N >= 1)
859 return std::get<0>(v);
862 [[nodiscard]]
constexpr T
const& g() const noexcept requires(N >= 2)
864 return std::get<1>(v);
867 [[nodiscard]]
constexpr T
const& b() const noexcept requires(N >= 3)
869 return std::get<2>(v);
872 [[nodiscard]]
constexpr T
const& a() const noexcept requires(N >= 4)
874 return std::get<3>(v);
877 [[nodiscard]]
constexpr T& r() noexcept requires(N >= 1)
879 return std::get<0>(v);
882 [[nodiscard]]
constexpr T& g() noexcept requires(N >= 2)
884 return std::get<1>(v);
887 [[nodiscard]]
constexpr T& b() noexcept requires(N >= 3)
889 return std::get<2>(v);
892 [[nodiscard]]
constexpr T& a() noexcept requires(N >= 4)
894 return std::get<3>(v);
897 [[nodiscard]]
constexpr T
const& width() const noexcept requires(N >= 1)
899 return std::get<0>(v);
902 [[nodiscard]]
constexpr T
const& height() const noexcept requires(N >= 2)
904 return std::get<1>(v);
907 [[nodiscard]]
constexpr T
const& depth() const noexcept requires(N >= 3)
909 return std::get<2>(v);
912 [[nodiscard]]
constexpr T& width() noexcept requires(N >= 1)
914 return std::get<0>(v);
917 [[nodiscard]]
constexpr T& height() noexcept requires(N >= 2)
919 return std::get<1>(v);
922 [[nodiscard]]
constexpr T& depth() noexcept requires(N >= 3)
924 return std::get<2>(v);
927 constexpr numeric_array& operator<<=(
unsigned int rhs)
noexcept
929 return *
this = *
this << rhs;
932 constexpr numeric_array& operator>>=(
unsigned int rhs)
noexcept
934 return *
this = *
this >> rhs;
937 constexpr numeric_array& operator|=(numeric_array
const& rhs)
noexcept
939 return *
this = *
this | rhs;
942 constexpr numeric_array& operator|=(T
const& rhs)
noexcept
944 return *
this = *
this | rhs;
947 constexpr numeric_array& operator&=(numeric_array
const& rhs)
noexcept
949 return *
this = *
this & rhs;
952 constexpr numeric_array& operator&=(T
const& rhs)
noexcept
954 return *
this = *
this & rhs;
957 constexpr numeric_array& operator^=(numeric_array
const& rhs)
noexcept
959 return *
this = *
this ^ rhs;
962 constexpr numeric_array& operator^=(T
const& rhs)
noexcept
964 return *
this = *
this ^ rhs;
967 constexpr numeric_array& operator+=(numeric_array
const& rhs)
noexcept
969 return *
this = *
this + rhs;
972 constexpr numeric_array& operator+=(T
const& rhs)
noexcept
974 return *
this = *
this + rhs;
977 constexpr numeric_array& operator-=(numeric_array
const& rhs)
noexcept
979 return *
this = *
this - rhs;
982 constexpr numeric_array& operator-=(T
const& rhs)
noexcept
984 return *
this = *
this - rhs;
987 constexpr numeric_array& operator*=(numeric_array
const& rhs)
noexcept
989 return *
this = *
this * rhs;
992 constexpr numeric_array& operator*=(T
const& rhs)
noexcept
994 return *
this = *
this * rhs;
997 constexpr numeric_array& operator/=(numeric_array
const& rhs)
noexcept
999 return *
this = *
this / rhs;
1002 constexpr numeric_array& operator/=(T
const& rhs)
noexcept
1004 return *
this = *
this / rhs;
1007 constexpr numeric_array& operator%=(numeric_array
const& rhs)
noexcept
1009 return *
this = *
this % rhs;
1012 constexpr numeric_array& operator%=(T
const& rhs)
noexcept
1014 return *
this = *
this % rhs;
1017 constexpr static ssize_t get_zero = -1;
1018 constexpr static ssize_t get_one = -2;
1024 template<std::
size_t I>
1027 static_assert(I < N,
"Index out of bounds");
1028 return std::get<I>(rhs.v);
1039 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
1040 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
1041 if constexpr (I == get_zero) {
1043 }
else if constexpr (I == get_one) {
1046 return std::get<I>(rhs.v);
1056 template<std::
size_t I>
1059 static_assert(I < N);
1061 if (not std::is_constant_evaluated()) {
1062#if defined(HI_HAS_AVX2)
1063 if constexpr (is_i16x16 or is_u16x16) {
1064 return static_cast<T
>(_mm256_extract_epi16(rhs.v.reg(), I));
1065 }
else if constexpr (is_i8x32 or is_u8x32) {
1066 return static_cast<T
>(_mm256_extract_epi8(rhs.v.reg(), I));
1069#if defined(HI_HAS_AVX)
1070 if constexpr (is_f64x4) {
1071 return bit_cast<T>(_mm256_extract_epi64(_mm256_castpd_si256(rhs.v.reg()), I));
1072 }
else if constexpr (is_f32x8) {
1073 return bit_cast<T>(_mm256_extract_epi32(_mm256_castps_si256(rhs.v.reg()), I));
1074 }
else if constexpr (is_i64x4 or is_u64x4) {
1075 return static_cast<T
>(_mm256_extract_epi64(rhs.v.reg(), I));
1076 }
else if constexpr (is_i32x8 or is_u32x8) {
1077 return static_cast<T
>(_mm256_extract_epi32(rhs.v.reg(), I));
1080#if defined(HI_HAS_SSE4_1)
1081 if constexpr (is_f64x2) {
1082 return bit_cast<T>(_mm_extract_epi64(_mm_castpd_si128(rhs.v.reg()), I));
1083 }
else if constexpr (is_f32x4) {
1084 return std::bit_cast<T>(_mm_extract_ps(rhs.v.reg(), I));
1085 }
else if constexpr (is_i64x2 or is_u64x2) {
1086 return static_cast<T
>(_mm_extract_epi64(rhs.v.reg(), I));
1087 }
else if constexpr (is_i32x4 or is_u32x4) {
1088 return static_cast<T
>(_mm_extract_epi32(rhs.v.reg(), I));
1089 }
else if constexpr (is_i8x16 or is_u8x16) {
1090 return static_cast<T
>(_mm_extract_epi8(rhs.v.reg(), I));
1093#if defined(HI_HAS_SSE2)
1094 if constexpr (is_i16x8 or is_u16x8) {
1095 return static_cast<T
>(_mm_extract_epi16(rhs.v.reg(), I));
1111 template<std::
size_t I, std::
size_t ZeroMask = 0>
1113 requires(is_f32x4 or is_i32x4 or is_u32x4)
1115 static_assert(I < N);
1116 static_assert(ZeroMask <= ((1 << N) - 1));
1118 if (not std::is_constant_evaluated()) {
1119#if defined(HI_HAS_SSE4_1)
1120 if constexpr (is_f32x4) {
1121 constexpr int imm8 = (I << 4) | ZeroMask;
1122 return numeric_array{_mm_insert_ps(lhs.reg(), _mm_set_ss(rhs), imm8)};
1123 }
else if constexpr (is_i32x4 or is_u32x4) {
1124 constexpr int imm8 = (I << 4) | ZeroMask;
1126 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(_mm_set1_epi32(rhs)), imm8))};
1132 std::get<I>(r.v) = rhs;
1134 if ((ZeroMask >> i) & 1) {
1149 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
1150 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
1151 if constexpr (I == get_zero) {
1153 }
else if constexpr (I == get_one) {
1156 return std::get<I>(rhs.v);
1167 if (not std::is_constant_evaluated()) {
1168#if defined(HI_HAS_SSE4_1)
1169 if constexpr (is_f32x4) {
1170 return numeric_array{_mm_insert_ps(rhs.reg(), rhs.reg(), Mask)};
1171 }
else if constexpr (is_i32x4 or is_u32x4) {
1173 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(rhs.reg()), _mm_castsi128_ps(rhs.reg()), Mask))};
1180 if (to_bool((Mask >> i) & 1)) {
1196 template<std::
size_t Mask>
1199 if (not std::is_constant_evaluated()) {
1200#if defined(HI_HAS_AVX2)
1201 if constexpr (is_i32x8) {
1202 return numeric_array{_mm256_blend_epi32(lhs.reg(), rhs.reg(), Mask)};
1203 }
else if constexpr (is_i64x2 or is_u64x2) {
1204 constexpr auto mask_x2 = ((Mask & 1) ? 0b0011 : 0) | ((Mask & 2) ? 0b1100 : 0);
1205 return numeric_array{_mm_blend_epi32(lhs.reg(), rhs.reg(), mask_x2)};
1206 }
else if constexpr (is_i32x4 or is_u32x4) {
1207 return numeric_array{_mm_blend_epi32(lhs.reg(), rhs.reg(), Mask)};
1208 }
else if constexpr (is_i16x16 or is_u16x16) {
1209 return numeric_array{_mm256_blend_epi16(lhs.reg(), rhs.reg(), Mask)};
1212#if defined(HI_HAS_AVX)
1213 if constexpr (is_f64x4) {
1214 return numeric_array{_mm256_blend_pd(lhs.reg(), rhs.reg(), Mask)};
1215 }
else if constexpr (is_f32x8) {
1216 return numeric_array{_mm256_blend_ps(lhs.reg(), rhs.reg(), Mask)};
1217 }
else if constexpr (is_i64x4 or is_u64x4) {
1219 _mm256_castpd_si256(_mm256_blend_pd(_mm256_castsi256_pd(lhs.reg()), _mm256_castsi256_pd(rhs.reg()), Mask))};
1220 }
else if constexpr (is_i32x8 or is_u32x8) {
1222 _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg()), Mask))};
1225#if defined(HI_HAS_SSE4_1)
1226 if constexpr (is_f64x2) {
1227 return numeric_array{_mm_blend_pd(lhs.reg(), rhs.reg(), Mask)};
1228 }
else if constexpr (is_f32x4) {
1229 return numeric_array{_mm_blend_ps(lhs.reg(), rhs.reg(), Mask)};
1230 }
else if constexpr (is_i64x2 or is_u64x2) {
1232 _mm_castpd_si128(_mm_blend_pd(_mm_castsi128_pd(lhs.reg()), _mm_castsi128_pd(rhs.reg()), Mask))};
1233 }
else if constexpr (is_i32x4 or is_u32x4) {
1235 _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg()), Mask))};
1236 }
else if constexpr (is_i16x8 or is_u16x8) {
1237 return numeric_array{_mm_blend_epi16(lhs.reg(), rhs.reg(), Mask)};
1244 r[i] = to_bool((Mask >> i) & 1) ? rhs[i] : lhs[i];
1253 if (not std::is_constant_evaluated()) {
1254#if defined(HI_HAS_AVX2)
1255 if constexpr (is_i8x32 or is_u8x32) {
1256 return numeric_array{_mm256_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1259#if defined(HI_HAS_AVX)
1260 if constexpr (is_f64x4) {
1261 return numeric_array{_mm256_blendv_pd(a.reg(), b.reg(), mask.reg())};
1262 }
else if constexpr (is_f32x8) {
1263 return numeric_array{_mm256_blendv_ps(a.reg(), b.reg(), mask.reg())};
1264 }
else if constexpr (is_i64x4 or is_u64x4) {
1266 _mm256_castsi256_pd(a.reg()), _mm256_castsi256_pd(b.reg()), _mm256_castsi256_pd(mask.reg())))};
1267 }
else if constexpr (is_i32x8 or is_u32x8) {
1269 _mm256_castsi256_ps(a.reg()), _mm256_castsi256_ps(b.reg()), _mm256_castsi256_ps(mask.reg())))};
1272#if defined(HI_HAS_SSE4_1)
1273 if constexpr (is_f64x2) {
1274 return numeric_array{_mm_blendv_pd(a.reg(), b.reg(), mask.reg())};
1275 }
else if constexpr (is_f32x4) {
1276 return numeric_array{_mm_blendv_ps(a.reg(), b.reg(), mask.reg())};
1277 }
else if constexpr (is_i64x2 or is_u64x2) {
1279 _mm_blendv_pd(_mm_castsi128_pd(a.reg()), _mm_castsi128_pd(b.reg()), _mm_castsi128_pd(mask.reg())))};
1280 }
else if constexpr (is_i32x4 or is_u32x4) {
1282 _mm_blendv_ps(_mm_castsi128_ps(a.reg()), _mm_castsi128_ps(b.reg()), _mm_castsi128_ps(mask.reg())))};
1283 }
else if constexpr (is_i8x16 or is_u8x16) {
1284 return numeric_array{_mm_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1291 r[i] = mask[i] != T{0} ? b[i] : a[i];
1300 template<std::
size_t Mask>
1303 return blend<Mask>(rhs, -rhs);
1311 [[nodiscard]]
friend constexpr numeric_array abs(numeric_array
const& rhs)
noexcept
1313 if (not std::is_constant_evaluated()) {
1314#if defined(HI_HAS_AVX2)
1315 if constexpr (is_i32x8) {
1316 return numeric_array{_mm256_abs_epi32(rhs.reg())};
1317 }
else if constexpr (is_i16x16) {
1318 return numeric_array{_mm256_abs_epi16(rhs.reg())};
1319 }
else if constexpr (is_i8x32) {
1320 return numeric_array{_mm256_abs_epi8(rhs.reg())};
1323#if defined(HI_HAS_SSSE3)
1324 if constexpr (is_i32x4) {
1325 return numeric_array{_mm_abs_epi32(rhs.reg())};
1326 }
else if constexpr (is_i16x8) {
1327 return numeric_array{_mm_abs_epi16(rhs.reg())};
1328 }
else if constexpr (is_i8x16) {
1329 return numeric_array{_mm_abs_epi8(rhs.reg())};
1332#if defined(HI_HAS_SSE2)
1333 if constexpr (is_f64x2) {
1334 return numeric_array{_mm_castsi128_ps(_mm_srli_epi64(_mm_slli_epi64(_mm_castpd_si128(rhs.reg()), 1), 1))};
1335 }
else if constexpr (is_f32x4) {
1336 return numeric_array{_mm_castsi128_ps(_mm_srli_epi32(_mm_slli_epi32(_mm_castps_si128(rhs.reg()), 1), 1))};
1341 return max(rhs, -rhs);
1344 [[nodiscard]]
friend constexpr numeric_array rcp(numeric_array
const& rhs)
noexcept
1346 if (not std::is_constant_evaluated()) {
1347#if defined(HI_HAS_AVX)
1348 if constexpr (is_f32x8) {
1349 return numeric_array{_mm256_rcp_ps(rhs.reg())};
1352#if defined(HI_HAS_SSE)
1353 if constexpr (is_f32x4) {
1354 return numeric_array{_mm_rcp_ps(rhs.reg())};
1362 [[nodiscard]]
friend constexpr numeric_array
sqrt(numeric_array
const& rhs)
noexcept
1364 if (not std::is_constant_evaluated()) {
1365#if defined(HI_HAS_AVX)
1366 if constexpr (is_f64x4) {
1367 return numeric_array{_mm256_sqrt_pd(rhs.reg())};
1368 }
else if constexpr (is_f32x8) {
1369 return numeric_array{_mm256_sqrt_ps(rhs.reg())};
1372#if defined(HI_HAS_SSE2)
1373 if constexpr (is_f64x2) {
1374 return numeric_array{_mm_sqrt_pd(rhs.reg())};
1377#if defined(HI_HAS_SSE)
1378 if constexpr (is_f32x4) {
1379 return numeric_array{_mm_sqrt_ps(rhs.reg())};
1384 auto r = numeric_array{};
1391 [[nodiscard]]
friend constexpr numeric_array rcp_sqrt(numeric_array
const& rhs)
noexcept
1393 if (not std::is_constant_evaluated()) {
1394#if defined(HI_HAS_AVX)
1395 if constexpr (is_f32x8) {
1396 return numeric_array{_mm256_rsqrt_ps(rhs.reg())};
1399#if defined(HI_HAS_SSE)
1400 if constexpr (is_f32x4) {
1401 return numeric_array{_mm_rsqrt_ps(rhs.reg())};
1406 return rcp(
sqrt(rhs));
1409 [[nodiscard]]
friend constexpr numeric_array
floor(numeric_array
const& rhs)
noexcept
1410 requires(std::is_floating_point_v<value_type>)
1412 if (not std::is_constant_evaluated()) {
1413#if defined(HI_HAS_AVX)
1414 if constexpr (is_f64x4) {
1415 return numeric_array{_mm256_floor_pd(rhs.reg())};
1416 }
else if constexpr (is_f32x8) {
1417 return numeric_array{_mm256_floor_ps(rhs.reg())};
1420#if defined(HI_HAS_SSE4_1)
1421 if constexpr (is_f64x2) {
1422 return numeric_array{_mm_floor_pd(rhs.reg())};
1423 }
else if constexpr (is_f32x4) {
1424 return numeric_array{_mm_floor_ps(rhs.reg())};
1429 auto r = numeric_array{};
1436 [[nodiscard]]
friend constexpr numeric_array
ceil(numeric_array
const& rhs)
noexcept
1437 requires(std::is_floating_point_v<value_type>)
1439 if (not std::is_constant_evaluated()) {
1440#if defined(HI_HAS_AVX)
1441 if constexpr (is_f64x4) {
1442 return numeric_array{_mm256_ceil_pd(rhs.reg())};
1443 }
else if constexpr (is_f32x8) {
1444 return numeric_array{_mm256_ceil_ps(rhs.reg())};
1447#if defined(HI_HAS_SSE4_1)
1448 if constexpr (is_f64x2) {
1449 return numeric_array{_mm_ceil_pd(rhs.reg())};
1450 }
else if constexpr (is_f32x4) {
1451 return numeric_array{_mm_ceil_ps(rhs.reg())};
1456 auto r = numeric_array{};
1463 [[nodiscard]]
friend constexpr numeric_array
round(numeric_array
const& rhs)
noexcept
1464 requires(std::is_floating_point_v<value_type>)
1466 if (not std::is_constant_evaluated()) {
1467#if defined(HI_HAS_AVX)
1468 if constexpr (is_f64x4) {
1469 return numeric_array{_mm256_round_pd(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1470 }
else if constexpr (is_f32x8) {
1471 return numeric_array{_mm256_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1474#if defined(HI_HAS_SSE4_1)
1475 if constexpr (is_f64x2) {
1476 return numeric_array{_mm_round_pd(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1477 }
else if constexpr (is_f32x4) {
1478 return numeric_array{_mm_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1483 auto r = numeric_array{};
1497 template<std::
size_t Mask>
1500 if (not std::is_constant_evaluated()) {
1501#if defined(HI_HAS_SSE4_1)
1502 if constexpr (is_f64x2) {
1503 return std::bit_cast<double>(_mm_extract_epi64(_mm_dp_pd(lhs.reg(), rhs.reg(), (Mask << 4) | 0xf), 0));
1504 }
else if constexpr (is_f32x4) {
1505 return std::bit_cast<float>(_mm_extract_ps(_mm_dp_ps(lhs.reg(), rhs.reg(), (Mask << 4) | 0xf), 0));
1512 if (to_bool(Mask & (1_uz << i))) {
1513 r += lhs.v[i] * rhs.v[i];
1526 template<std::
size_t Mask>
1539 template<std::
size_t Mask>
1542 return dot<Mask>(rhs, rhs);
1551 template<std::
size_t Mask>
1554 if (not std::is_constant_evaluated()) {
1555#if defined(HI_HAS_SSE4_1)
1556 if constexpr (is_f32x4) {
1557 return std::bit_cast<float>(_mm_extract_ps(_mm_rsqrt_ps(_mm_dp_ps(rhs.reg(), rhs.reg(), (Mask << 4) | 0xf)), 0));
1562 return 1.0f / hypot<Mask>(rhs);
1573 template<std::
size_t Mask>
1576 hi_axiom(rhs.is_vector());
1578 if (not std::is_constant_evaluated()) {
1579#if defined(HI_HAS_SSE4_1)
1580 if constexpr (is_f32x4) {
1581 hilet rhs_ = rhs.reg();
1582 hilet tmp = _mm_mul_ps(_mm_rsqrt_ps(_mm_dp_ps(rhs_, rhs_, (Mask << 4) | 0xf)), rhs_);
1588 hilet rcp_hypot_ = rcp_hypot<Mask>(rhs);
1592 if (to_bool(Mask & (1_uz << i))) {
1593 r.v[i] = rhs.v[i] * rcp_hypot_;
1602 if (not std::is_constant_evaluated()) {
1603#if defined(HI_HAS_AVX2)
1604 if constexpr (is_i64x4 or is_u64x4) {
1606 _mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpeq_epi64(lhs.reg(), rhs.reg()))));
1607 }
else if constexpr (is_i32x8 or is_u32x8) {
1609 _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpeq_epi32(lhs.reg(), rhs.reg()))));
1610 }
else if constexpr (is_i8x32 or is_u8x32) {
1611 return static_cast<std::size_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(lhs.reg(), rhs.reg())));
1614#if defined(HI_HAS_AVX)
1615 if constexpr (is_f64x4) {
1616 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_EQ_OQ)));
1617 }
else if constexpr (is_f32x8) {
1618 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_EQ_OQ)));
1621#if defined(HI_HAS_SSE4_1)
1622 if constexpr (is_i64x2 or is_u64x2) {
1623 return static_cast<std::size_t>(_mm_movemask_pd(_mm_castsi128_pd(_mm_cmpeq_epi64(lhs.reg(), rhs.reg()))));
1626#if defined(HI_HAS_SSE2)
1627 if constexpr (is_f64x2) {
1628 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpeq_pd(lhs.reg(), rhs.reg())));
1629 }
else if constexpr (is_i32x4 or is_u32x4) {
1630 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs.reg(), rhs.reg()))));
1631 }
else if constexpr (is_i8x16 or is_u8x16) {
1632 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(lhs.reg(), rhs.reg())));
1635#if defined(HI_HAS_SSE)
1636 if constexpr (is_f32x4) {
1637 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpeq_ps(lhs.reg(), rhs.reg())));
1644 r |=
static_cast<std::size_t>(lhs.v[i] == rhs.v[i]) << i;
1649 [[nodiscard]]
friend constexpr std::size_t ne(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1652 if (not std::is_constant_evaluated()) {
1653#if defined(HI_HAS_AVX)
1654 if constexpr (is_f64x4) {
1655 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_NEQ_OQ)));
1656 }
else if constexpr (is_f32x8) {
1657 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_NEQ_OQ)));
1660#if defined(HI_HAS_SSE2)
1661 if constexpr (is_f64x2) {
1662 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpneq_pd(lhs.reg(), rhs.reg())));
1665#if defined(HI_HAS_SSE)
1666 if constexpr (is_f32x4) {
1667 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpneq_ps(lhs.reg(), rhs.reg())));
1673 return eq(lhs, rhs) ^ not_mask;
1676 [[nodiscard]]
friend constexpr std::size_t gt(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1679 if (not std::is_constant_evaluated()) {
1680#if defined(HI_HAS_AVX2)
1681 if constexpr (is_i64x4) {
1683 _mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpgt_epi64(lhs.reg(), rhs.reg()))));
1684 }
else if constexpr (is_i32x8) {
1686 _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(lhs.reg(), rhs.reg()))));
1687 }
else if constexpr (is_i8x32) {
1688 return static_cast<std::size_t>(_mm256_movemask_epi8(_mm256_cmpgt_epi8(lhs.reg(), rhs.reg())));
1691#if defined(HI_HAS_AVX)
1692 if constexpr (is_f64x4) {
1693 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_GT_OQ)));
1694 }
else if constexpr (is_f32x8) {
1695 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_GT_OQ)));
1698#if defined(HI_HAS_SSE4_1)
1699 if constexpr (is_i64x2) {
1700 return static_cast<std::size_t>(_mm_movemask_pd(_mm_castsi128_pd(_mm_cmpgt_epi64(lhs.reg(), rhs.reg()))));
1703#if defined(HI_HAS_SSE2)
1704 if constexpr (is_f64x2) {
1705 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpgt_pd(lhs.reg(), rhs.reg())));
1706 }
else if constexpr (is_i32x4) {
1707 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(lhs.reg(), rhs.reg()))));
1708 }
else if constexpr (is_i8x16) {
1709 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmpgt_epi8(lhs.reg(), rhs.reg())));
1712#if defined(HI_HAS_SSE)
1713 if constexpr (is_f32x4) {
1714 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpgt_ps(lhs.reg(), rhs.reg())));
1721 r |=
static_cast<std::size_t>(lhs.v[i] > rhs.v[i]) << i;
1726 [[nodiscard]]
friend constexpr std::size_t lt(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1729 if (not std::is_constant_evaluated()) {
1730#if defined(HI_HAS_AVX)
1731 if constexpr (is_f64x4) {
1732 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_LT_OQ)));
1733 }
else if constexpr (is_f32x8) {
1734 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_LT_OQ)));
1737#if defined(HI_HAS_SSE2)
1738 if constexpr (is_f64x2) {
1739 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmplt_pd(lhs.reg(), rhs.reg())));
1740 }
else if constexpr (is_i32x4) {
1741 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(lhs.reg(), rhs.reg()))));
1742 }
else if constexpr (is_i8x16) {
1743 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmplt_epi8(lhs.reg(), rhs.reg())));
1746#if defined(HI_HAS_SSE)
1747 if constexpr (is_f32x4) {
1748 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmplt_ps(lhs.reg(), rhs.reg())));
1754 return gt(rhs, lhs);
1757 [[nodiscard]]
friend constexpr std::size_t ge(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1760 if (not std::is_constant_evaluated()) {
1761#if defined(HI_HAS_AVX)
1762 if constexpr (is_f64x4) {
1763 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_GE_OQ)));
1764 }
else if constexpr (is_f32x8) {
1765 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_GE_OQ)));
1768#if defined(HI_HAS_SSE2)
1769 if constexpr (is_f64x2) {
1770 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpge_pd(lhs.reg(), rhs.reg())));
1773#if defined(HI_HAS_SSE)
1774 if constexpr (is_f32x4) {
1775 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpge_ps(lhs.reg(), rhs.reg())));
1781 return gt(lhs, rhs) | eq(lhs, rhs);
1784 [[nodiscard]]
friend constexpr std::size_t le(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1787 if (not std::is_constant_evaluated()) {
1788#if defined(HI_HAS_AVX)
1789 if constexpr (is_f64x4) {
1790 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_LE_OQ)));
1791 }
else if constexpr (is_f32x8) {
1792 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_LE_OQ)));
1795#if defined(HI_HAS_SSE2)
1796 if constexpr (is_f64x2) {
1797 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmple_pd(lhs.reg(), rhs.reg())));
1800#if defined(HI_HAS_SSE)
1801 if constexpr (is_f32x4) {
1802 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmple_ps(lhs.reg(), rhs.reg())));
1808 return gt(rhs, lhs) | eq(rhs, lhs);
1811 [[nodiscard]]
friend constexpr numeric_array gt_mask(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1813 if (not std::is_constant_evaluated()) {
1814#if defined(HI_HAS_SSE4_2)
1815 if constexpr (is_i64x2) {
1816 return numeric_array{_mm_cmpgt_epi64(lhs.reg(), rhs.reg())};
1819#if defined(HI_HAS_SSE2)
1820 if constexpr (is_i32x4) {
1821 return numeric_array{_mm_cmpgt_epi32(lhs.reg(), rhs.reg())};
1822 }
else if constexpr (is_i16x8) {
1823 return numeric_array{_mm_cmpgt_epi16(lhs.reg(), rhs.reg())};
1824 }
else if constexpr (is_i8x16) {
1825 return numeric_array{_mm_cmpgt_epi8(lhs.reg(), rhs.reg())};
1828#if defined(HI_HAS_SSE)
1829 if constexpr (is_f32x4) {
1830 return numeric_array{_mm_cmpgt_ps(lhs.reg(), rhs.reg())};
1835 using uint_type = make_uintxx_t<
sizeof(T) * CHAR_BIT>;
1836 constexpr auto ones = std::bit_cast<T>(~uint_type{0});
1838 auto r = numeric_array{};
1840 r[i] = lhs.v[i] > rhs.v[i] ? ones : T{0};
1845 [[nodiscard]]
friend constexpr bool operator==(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1847 return not ne(lhs, rhs);
1850 [[nodiscard]]
friend constexpr numeric_array operator<<(numeric_array
const& lhs,
unsigned int rhs)
noexcept
1852 if (not std::is_constant_evaluated()) {
1853#if defined(HI_HAS_AVX2)
1854 if constexpr (is_f64x4) {
1855 return numeric_array{_mm256_castsi256_pd(_mm256_slli_epi64(_mm256_castpd_si256(lhs.reg()), rhs))};
1856 }
else if constexpr (is_f32x8) {
1857 return numeric_array{_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(lhs.reg()), rhs))};
1858 }
else if constexpr (is_i64x4 or is_u64x4) {
1859 return numeric_array{_mm256_slli_epi64(lhs.reg(), rhs)};
1860 }
else if constexpr (is_i32x8 or is_u32x8) {
1861 return numeric_array{_mm256_slli_epi32(lhs.reg(), rhs)};
1862 }
else if constexpr (is_i16x16 or is_u16x16) {
1863 return numeric_array{_mm256_slli_epi16(lhs.reg(), rhs)};
1866#if defined(HI_HAS_SSE2)
1867 if constexpr (is_f64x2) {
1868 return numeric_array{_mm_castsi128_pd(_mm_slli_epi64(_mm_castpd_si128(lhs.reg()), rhs))};
1869 }
else if constexpr (is_f32x4) {
1870 return numeric_array{_mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(lhs.reg()), rhs))};
1871 }
else if constexpr (is_i64x2 or is_u64x2) {
1872 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1873 }
else if constexpr (is_i32x4 or is_u32x4) {
1874 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1875 }
else if constexpr (is_i16x8 or is_u16x8) {
1876 return numeric_array{_mm_slli_epi16(lhs.reg(), rhs)};
1881 auto r = numeric_array{};
1883 r.v[i] = lhs.v[i] << rhs;
1888 [[nodiscard]]
friend constexpr numeric_array operator>>(numeric_array
const& lhs,
unsigned int rhs)
noexcept
1890 if (not std::is_constant_evaluated()) {
1891#if defined(HI_HAS_AVX2)
1892 if constexpr (is_f64x4) {
1893 return numeric_array{_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_castpd_si256(lhs.reg()), rhs))};
1894 }
else if constexpr (is_f32x8) {
1895 return numeric_array{_mm256_castsi256_ps(_mm256_srli_epi32(_mm256_castps_si256(lhs.reg()), rhs))};
1896 }
else if constexpr (is_u64x4) {
1897 return numeric_array{_mm256_srli_epi64(lhs.reg(), rhs)};
1898 }
else if constexpr (is_i32x8) {
1899 return numeric_array{_mm256_srai_epi32(lhs.reg(), rhs)};
1900 }
else if constexpr (is_u32x8) {
1901 return numeric_array{_mm256_srli_epi32(lhs.reg(), rhs)};
1902 }
else if constexpr (is_i16x16) {
1903 return numeric_array{_mm256_srai_epi16(lhs.reg(), rhs)};
1904 }
else if constexpr (is_u16x16) {
1905 return numeric_array{_mm256_srli_epi16(lhs.reg(), rhs)};
1908#if defined(HI_HAS_SSE2)
1909 if constexpr (is_f64x2) {
1910 return numeric_array{_mm_castsi128_pd(_mm_srli_epi64(_mm_castpd_si128(lhs.reg()), rhs))};
1911 }
else if constexpr (is_f32x4) {
1912 return numeric_array{_mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(lhs.reg()), rhs))};
1913 }
else if constexpr (is_u64x2) {
1914 return numeric_array{_mm_srli_epi64(lhs.reg(), rhs)};
1915 }
else if constexpr (is_i32x4) {
1916 return numeric_array{_mm_srai_epi32(lhs.reg(), rhs)};
1917 }
else if constexpr (is_u32x4) {
1918 return numeric_array{_mm_srli_epi32(lhs.reg(), rhs)};
1919 }
else if constexpr (is_i16x8) {
1920 return numeric_array{_mm_srai_epi16(lhs.reg(), rhs)};
1921 }
else if constexpr (is_u16x8) {
1922 return numeric_array{_mm_srli_epi16(lhs.reg(), rhs)};
1927 auto r = numeric_array{};
1929 r.v[i] = lhs.v[i] >> rhs;
1940 hi_axiom(rhs > 0 and rhs <
sizeof(value_type) * CHAR_BIT);
1942 hilet remainder = narrow<unsigned int>(
sizeof(value_type) * CHAR_BIT - rhs);
1944 return (lhs << rhs) | (lhs >> remainder);
1953 hi_axiom(rhs > 0 and rhs <
sizeof(value_type) * CHAR_BIT);
1955 hilet remainder = narrow<unsigned int>(
sizeof(value_type) * CHAR_BIT - rhs);
1957 return (lhs >> rhs) | (lhs << remainder);
1962 if (not std::is_constant_evaluated()) {
1963#if defined(HI_HAS_AVX2)
1964 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1965 return numeric_array{_mm256_or_si256(lhs.reg(), rhs.reg())};
1968#if defined(HI_HAS_AVX)
1969 if constexpr (is_f64x4) {
1970 return numeric_array{_mm256_or_pd(lhs.reg(), rhs.reg())};
1971 }
else if constexpr (is_f32x8) {
1972 return numeric_array{_mm256_or_ps(lhs.reg(), rhs.reg())};
1973 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1974 return numeric_array{
1975 _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
1978#if defined(HI_HAS_SSE2)
1979 if constexpr (is_f64x2) {
1980 return numeric_array{_mm_or_pd(lhs.reg(), rhs.reg())};
1981 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1982 return numeric_array{_mm_or_si128(lhs.reg(), rhs.reg())};
1985#if defined(HI_HAS_SSE)
1986 if constexpr (is_f64x2) {
1987 return numeric_array{_mm_castps_pd(_mm_or_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
1989 }
else if constexpr (is_f32x4) {
1990 return numeric_array{_mm_or_ps(lhs.reg(), rhs.reg())};
1992 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1993 return numeric_array{_mm_castps_si128(_mm_or_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
1998 using uint_type = make_uintxx_t<
sizeof(T) * CHAR_BIT>;
2000 auto r = numeric_array{};
2003 std::bit_cast<T>(
static_cast<uint_type
>(std::bit_cast<uint_type>(lhs.v[i]) | std::bit_cast<uint_type>(rhs.v[i])));
2008 [[nodiscard]]
friend constexpr numeric_array
operator|(numeric_array
const& lhs, T
const& rhs)
noexcept
2010 return lhs | broadcast(rhs);
2013 [[nodiscard]]
friend constexpr numeric_array
operator|(T
const& lhs, numeric_array
const& rhs)
noexcept
2015 return broadcast(lhs) | rhs;
2018 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2020 if (not std::is_constant_evaluated()) {
2021#if defined(HI_HAS_AVX2)
2022 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2023 return numeric_array{_mm256_and_si256(lhs.reg(), rhs.reg())};
2026#if defined(HI_HAS_AVX)
2027 if constexpr (is_f64x4) {
2028 return numeric_array{_mm256_and_pd(lhs.reg(), rhs.reg())};
2029 }
else if constexpr (is_f32x8) {
2030 return numeric_array{_mm256_and_ps(lhs.reg(), rhs.reg())};
2031 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2032 return numeric_array{
2033 _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
2036#if defined(HI_HAS_SSE2)
2037 if constexpr (is_f64x2) {
2038 return numeric_array{_mm_and_pd(lhs.reg(), rhs.reg())};
2039 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2040 return numeric_array{_mm_and_si128(lhs.reg(), rhs.reg())};
2043#if defined(HI_HAS_SSE)
2044 if constexpr (is_f64x2) {
2045 return numeric_array{_mm_castps_pd(_mm_and_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
2047 }
else if constexpr (is_f32x4) {
2048 return numeric_array{_mm_and_ps(lhs.reg(), rhs.reg())};
2050 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2051 return numeric_array{_mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
2056 auto r = numeric_array{};
2058 r.v[i] = lhs.v[i] & rhs.v[i];
2063 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const& lhs, T
const& rhs)
noexcept
2065 return lhs & broadcast(rhs);
2068 [[nodiscard]]
friend constexpr numeric_array operator&(T
const& lhs, numeric_array
const& rhs)
noexcept
2070 return broadcast(lhs) & rhs;
2073 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2075 if (not std::is_constant_evaluated()) {
2076#if defined(HI_HAS_AVX2)
2077 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2078 return numeric_array{_mm256_xor_si256(lhs.reg(), rhs.reg())};
2081#if defined(HI_HAS_AVX)
2082 if constexpr (is_f64x4) {
2083 return numeric_array{_mm256_xor_pd(lhs.reg(), rhs.reg())};
2084 }
else if constexpr (is_f32x8) {
2085 return numeric_array{_mm256_xor_ps(lhs.reg(), rhs.reg())};
2086 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2087 return numeric_array{
2088 _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
2091#if defined(HI_HAS_SSE2)
2092 if constexpr (is_f64x2) {
2093 return numeric_array{_mm_xor_pd(lhs.reg(), rhs.reg())};
2094 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2095 return numeric_array{_mm_xor_si128(lhs.reg(), rhs.reg())};
2098#if defined(HI_HAS_SSE)
2099 if constexpr (is_f64x2) {
2100 return numeric_array{_mm_castps_pd(_mm_xor_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
2102 }
else if constexpr (is_f32x4) {
2103 return numeric_array{_mm_xor_ps(lhs.reg(), rhs.reg())};
2105 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2106 return numeric_array{_mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
2111 auto r = numeric_array{};
2113 r.v[i] = lhs.v[i] ^ rhs.v[i];
2118 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const& lhs, T
const& rhs)
noexcept
2120 return lhs ^ broadcast(rhs);
2123 [[nodiscard]]
friend constexpr numeric_array operator^(T
const& lhs, numeric_array
const& rhs)
noexcept
2125 return broadcast(lhs) ^ rhs;
2128 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2130 if (not std::is_constant_evaluated()) {
2131#if defined(HI_HAS_AVX2)
2132 if constexpr (is_i64x4 or is_u64x4) {
2133 return numeric_array{_mm256_add_epi64(lhs.reg(), rhs.reg())};
2134 }
else if constexpr (is_i32x8 or is_u32x8) {
2135 return numeric_array{_mm256_add_epi32(lhs.reg(), rhs.reg())};
2136 }
else if constexpr (is_i16x16 or is_u16x16) {
2137 return numeric_array{_mm256_add_epi16(lhs.reg(), rhs.reg())};
2138 }
else if constexpr (is_i8x32 or is_u8x32) {
2139 return numeric_array{_mm256_add_epi8(lhs.reg(), rhs.reg())};
2142#if defined(HI_HAS_AVX)
2143 if constexpr (is_f64x4) {
2144 return numeric_array{_mm256_add_pd(lhs.reg(), rhs.reg())};
2145 }
else if constexpr (is_f32x8) {
2146 return numeric_array{_mm256_add_ps(lhs.reg(), rhs.reg())};
2149#if defined(HI_HAS_SSE2)
2150 if constexpr (is_f64x2) {
2151 return numeric_array{_mm_add_pd(lhs.reg(), rhs.reg())};
2152 }
else if constexpr (is_i64x2 or is_u64x2) {
2153 return numeric_array{_mm_add_epi64(lhs.reg(), rhs.reg())};
2154 }
else if constexpr (is_i32x4 or is_u32x4) {
2155 return numeric_array{_mm_add_epi32(lhs.reg(), rhs.reg())};
2156 }
else if constexpr (is_i16x8 or is_u16x8) {
2157 return numeric_array{_mm_add_epi16(lhs.reg(), rhs.reg())};
2158 }
else if constexpr (is_i8x16 or is_u8x16) {
2159 return numeric_array{_mm_add_epi8(lhs.reg(), rhs.reg())};
2162#if defined(HI_HAS_SSE)
2163 if constexpr (is_f32x4) {
2164 return numeric_array{_mm_add_ps(lhs.reg(), rhs.reg())};
2169 auto r = numeric_array{};
2171 r.v[i] = lhs.v[i] + rhs.v[i];
2176 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const& lhs, T
const& rhs)
noexcept
2178 return lhs + broadcast(rhs);
2181 [[nodiscard]]
friend constexpr numeric_array operator+(T
const& lhs, numeric_array
const& rhs)
noexcept
2183 return broadcast(lhs) + rhs;
2186 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2188 if (not std::is_constant_evaluated()) {
2189#if defined(HI_HAS_AVX2)
2190 if constexpr (is_i64x4 or is_u64x4) {
2191 return numeric_array{_mm256_sub_epi64(lhs.reg(), rhs.reg())};
2192 }
else if constexpr (is_i32x8 or is_u32x8) {
2193 return numeric_array{_mm256_sub_epi32(lhs.reg(), rhs.reg())};
2194 }
else if constexpr (is_i16x16 or is_u16x16) {
2195 return numeric_array{_mm256_sub_epi16(lhs.reg(), rhs.reg())};
2196 }
else if constexpr (is_i8x32 or is_u8x32) {
2197 return numeric_array{_mm256_sub_epi8(lhs.reg(), rhs.reg())};
2200#if defined(HI_HAS_AVX)
2201 if constexpr (is_f64x4) {
2202 return numeric_array{_mm256_sub_pd(lhs.reg(), rhs.reg())};
2203 }
else if constexpr (is_f32x8) {
2204 return numeric_array{_mm256_sub_ps(lhs.reg(), rhs.reg())};
2207#if defined(HI_HAS_SSE2)
2208 if constexpr (is_f64x2) {
2209 return numeric_array{_mm_sub_pd(lhs.reg(), rhs.reg())};
2210 }
else if constexpr (is_i64x2 or is_u64x2) {
2211 return numeric_array{_mm_sub_epi64(lhs.reg(), rhs.reg())};
2212 }
else if constexpr (is_i32x4 or is_u32x4) {
2213 return numeric_array{_mm_sub_epi32(lhs.reg(), rhs.reg())};
2214 }
else if constexpr (is_i16x8 or is_u16x8) {
2215 return numeric_array{_mm_sub_epi16(lhs.reg(), rhs.reg())};
2216 }
else if constexpr (is_i8x16 or is_u8x16) {
2217 return numeric_array{_mm_sub_epi8(lhs.reg(), rhs.reg())};
2220#if defined(HI_HAS_SSE)
2221 if constexpr (is_f32x4) {
2222 return numeric_array{_mm_sub_ps(lhs.reg(), rhs.reg())};
2227 auto r = numeric_array{};
2229 r.v[i] = lhs.v[i] - rhs.v[i];
2234 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const& lhs, T
const& rhs)
noexcept
2236 return lhs - broadcast(rhs);
2239 [[nodiscard]]
friend constexpr numeric_array operator-(T
const& lhs, numeric_array
const& rhs)
noexcept
2241 return broadcast(lhs) - rhs;
2244 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2246 if (not std::is_constant_evaluated()) {
2247#if defined(HI_HAS_AVX2)
2248 if constexpr (is_i32x8) {
2249 return numeric_array{_mm256_mul_epi32(lhs.reg(), rhs.reg())};
2250 }
else if constexpr (is_u32x8) {
2251 return numeric_array{_mm256_mul_epu32(lhs.reg(), rhs.reg())};
2254#if defined(HI_HAS_AVX)
2255 if constexpr (is_f64x4) {
2256 return numeric_array{_mm256_mul_pd(lhs.reg(), rhs.reg())};
2257 }
else if constexpr (is_f32x8) {
2258 return numeric_array{_mm256_mul_ps(lhs.reg(), rhs.reg())};
2261#if defined(HI_HAS_SSE4_1)
2262 if constexpr (is_i32x4) {
2263 return numeric_array{_mm_mul_epi32(lhs.reg(), rhs.reg())};
2264 }
else if constexpr (is_f16x4) {
2265 return numeric_array{numeric_array<float, 4>{lhs} * numeric_array<float, 4>{rhs}};
2268#if defined(HI_HAS_SSE2)
2269 if constexpr (is_f64x2) {
2270 return numeric_array{_mm_mul_pd(lhs.reg(), rhs.reg())};
2273#if defined(HI_HAS_SSE)
2274 if constexpr (is_f32x4) {
2275 return numeric_array{_mm_mul_ps(lhs.reg(), rhs.reg())};
2280 auto r = numeric_array{};
2282 r.v[i] = lhs.v[i] * rhs.v[i];
2287 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const& lhs, T
const& rhs)
noexcept
2289 return lhs * broadcast(rhs);
2292 [[nodiscard]]
friend constexpr numeric_array operator*(T
const& lhs, numeric_array
const& rhs)
noexcept
2294 return broadcast(lhs) * rhs;
2297 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2299 if (not std::is_constant_evaluated()) {
2300#if defined(HI_HAS_AVX)
2301 if constexpr (is_f64x4) {
2302 return numeric_array{_mm256_div_pd(lhs.reg(), rhs.reg())};
2303 }
else if constexpr (is_f32x8) {
2304 return numeric_array{_mm256_div_ps(lhs.reg(), rhs.reg())};
2307#if defined(HI_HAS_SSE2)
2308 if constexpr (is_f64x2) {
2309 return numeric_array{_mm_div_pd(lhs.reg(), rhs.reg())};
2312#if defined(HI_HAS_SSE)
2313 if constexpr (is_f32x4) {
2314 return numeric_array{_mm_div_ps(lhs.reg(), rhs.reg())};
2319 auto r = numeric_array{};
2321 r.v[i] = lhs.v[i] / rhs.v[i];
2326 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const& lhs, T
const& rhs)
noexcept
2328 return lhs / broadcast(rhs);
2331 [[nodiscard]]
friend constexpr numeric_array operator/(T
const& lhs, numeric_array
const& rhs)
noexcept
2333 return broadcast(lhs) / rhs;
2336 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2339 return lhs - (div_result * rhs);
2342 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const& lhs, T
const& rhs)
noexcept
2344 return lhs % broadcast(rhs);
2347 [[nodiscard]]
friend constexpr numeric_array operator%(T
const& lhs, numeric_array
const& rhs)
noexcept
2349 return broadcast(lhs) % rhs;
2352 [[nodiscard]]
friend constexpr numeric_array
min(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2354 if (not std::is_constant_evaluated()) {
2355#if defined(HI_HAS_AVX2)
2356 if constexpr (is_i32x8) {
2357 return numeric_array{_mm256_min_epi32(lhs.reg(), rhs.reg())};
2358 }
else if constexpr (is_u32x8) {
2359 return numeric_array{_mm256_min_epu32(lhs.reg(), rhs.reg())};
2360 }
else if constexpr (is_i16x16) {
2361 return numeric_array{_mm256_min_epi16(lhs.reg(), rhs.reg())};
2362 }
else if constexpr (is_u16x16) {
2363 return numeric_array{_mm256_min_epu16(lhs.reg(), rhs.reg())};
2364 }
else if constexpr (is_i8x32) {
2365 return numeric_array{_mm256_min_epi8(lhs.reg(), rhs.reg())};
2366 }
else if constexpr (is_u8x32) {
2367 return numeric_array{_mm256_min_epu8(lhs.reg(), rhs.reg())};
2370#if defined(HI_HAS_AVX)
2371 if constexpr (is_f64x4) {
2372 return numeric_array{_mm256_min_pd(lhs.reg(), rhs.reg())};
2373 }
else if constexpr (is_f32x8) {
2374 return numeric_array{_mm256_min_ps(lhs.reg(), rhs.reg())};
2377#if defined(HI_HAS_SSE4_1)
2378 if constexpr (is_i32x4) {
2379 return numeric_array{_mm_min_epi32(lhs.reg(), rhs.reg())};
2380 }
else if constexpr (is_u32x4) {
2381 return numeric_array{_mm_min_epu32(lhs.reg(), rhs.reg())};
2382 }
else if constexpr (is_u16x8) {
2383 return numeric_array{_mm_min_epu16(lhs.reg(), rhs.reg())};
2384 }
else if constexpr (is_i8x16) {
2385 return numeric_array{_mm_min_epi8(lhs.reg(), rhs.reg())};
2388#if defined(HI_HAS_SSE2)
2389 if constexpr (is_f64x2) {
2390 return numeric_array{_mm_min_pd(lhs.reg(), rhs.reg())};
2391 }
else if constexpr (is_i16x8) {
2392 return numeric_array{_mm_min_epi16(lhs.reg(), rhs.reg())};
2393 }
else if constexpr (is_u8x16) {
2394 return numeric_array{_mm_min_epu8(lhs.reg(), rhs.reg())};
2397#if defined(HI_HAS_SSE)
2398 if constexpr (is_f32x4) {
2399 return numeric_array{_mm_min_ps(lhs.reg(), rhs.reg())};
2404 auto r = numeric_array{};
2406 r.v[i] =
std::min(lhs.v[i], rhs.v[i]);
2411 [[nodiscard]]
friend constexpr numeric_array
max(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2413 if (not std::is_constant_evaluated()) {
2414#if defined(HI_HAS_AVX2)
2415 if constexpr (is_i32x8) {
2416 return numeric_array{_mm256_max_epi32(lhs.reg(), rhs.reg())};
2417 }
else if constexpr (is_u32x8) {
2418 return numeric_array{_mm256_max_epu32(lhs.reg(), rhs.reg())};
2419 }
else if constexpr (is_i16x16) {
2420 return numeric_array{_mm256_max_epi16(lhs.reg(), rhs.reg())};
2421 }
else if constexpr (is_u16x16) {
2422 return numeric_array{_mm256_max_epu16(lhs.reg(), rhs.reg())};
2423 }
else if constexpr (is_i8x32) {
2424 return numeric_array{_mm256_max_epi8(lhs.reg(), rhs.reg())};
2425 }
else if constexpr (is_u8x32) {
2426 return numeric_array{_mm256_max_epu8(lhs.reg(), rhs.reg())};
2429#if defined(HI_HAS_AVX)
2430 if constexpr (is_f64x4) {
2431 return numeric_array{_mm256_max_pd(lhs.reg(), rhs.reg())};
2432 }
else if constexpr (is_f32x8) {
2433 return numeric_array{_mm256_max_ps(lhs.reg(), rhs.reg())};
2436#if defined(HI_HAS_SSE4_1)
2437 if constexpr (is_i32x4) {
2438 return numeric_array{_mm_max_epi32(lhs.reg(), rhs.reg())};
2439 }
else if constexpr (is_u32x4) {
2440 return numeric_array{_mm_max_epu32(lhs.reg(), rhs.reg())};
2441 }
else if constexpr (is_u16x8) {
2442 return numeric_array{_mm_max_epu16(lhs.reg(), rhs.reg())};
2443 }
else if constexpr (is_i8x16) {
2444 return numeric_array{_mm_max_epi8(lhs.reg(), rhs.reg())};
2447#if defined(HI_HAS_SSE2)
2448 if constexpr (is_f64x2) {
2449 return numeric_array{_mm_max_pd(lhs.reg(), rhs.reg())};
2450 }
else if constexpr (is_i16x8) {
2451 return numeric_array{_mm_max_epi16(lhs.reg(), rhs.reg())};
2452 }
else if constexpr (is_u8x16) {
2453 return numeric_array{_mm_max_epu8(lhs.reg(), rhs.reg())};
2456#if defined(HI_HAS_SSE)
2457 if constexpr (is_f32x4) {
2458 return numeric_array{_mm_max_ps(lhs.reg(), rhs.reg())};
2463 auto r = numeric_array{};
2465 r.v[i] =
std::max(lhs.v[i], rhs.v[i]);
2470 [[nodiscard]]
friend constexpr numeric_array
2471 clamp(numeric_array
const& lhs, numeric_array
const& low, numeric_array
const& high)
noexcept
2473 return min(
max(lhs, low), high);
2476 [[nodiscard]]
friend constexpr numeric_array hadd(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2478 if (not std::is_constant_evaluated()) {
2479#if defined(HI_HAS_AVX2)
2480 if constexpr (is_i32x8 or is_u32x8) {
2481 return numeric_array{_mm256_hadd_epi32(lhs.reg(), rhs.reg())};
2482 }
else if constexpr (is_i16x16 or is_u16x16) {
2483 return numeric_array{_mm256_hadd_epi16(lhs.reg(), rhs.reg())};
2486#if defined(HI_HAS_AVX)
2487 if constexpr (is_f64x4) {
2488 return numeric_array{_mm256_hadd_pd(lhs.reg(), rhs.reg())};
2489 }
else if constexpr (is_f32x8) {
2490 return numeric_array{_mm256_hadd_ps(lhs.reg(), rhs.reg())};
2493#if defined(HI_HAS_SSSE3)
2494 if constexpr (is_i32x4 or is_u32x4) {
2495 return numeric_array{_mm_hadd_epi32(lhs.reg(), rhs.reg())};
2496 }
else if constexpr (is_i16x8 or is_u16x8) {
2497 return numeric_array{_mm_hadd_epi16(lhs.reg(), rhs.reg())};
2500#if defined(HI_HAS_SSE3)
2501 if constexpr (is_f64x2) {
2502 return numeric_array{_mm_hadd_pd(lhs.reg(), rhs.reg())};
2503 }
else if constexpr (is_f32x4) {
2504 return numeric_array{_mm_hadd_ps(lhs.reg(), rhs.reg())};
2509 hi_axiom(N % 2 == 0);
2511 auto r = numeric_array{};
2515 while (src_i != N) {
2516 auto tmp = lhs[src_i++];
2517 tmp += lhs[src_i++];
2522 while (src_i != N) {
2523 auto tmp = rhs[src_i++];
2524 tmp += rhs[src_i++];
2530 [[nodiscard]]
friend constexpr numeric_array hsub(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2532 if (not std::is_constant_evaluated()) {
2533#if defined(HI_HAS_AVX2)
2534 if constexpr (is_i32x8 or is_u32x8) {
2535 return numeric_array{_mm256_hsub_epi32(lhs.reg(), rhs.reg())};
2536 }
else if constexpr (is_i16x16 or is_u16x16) {
2537 return numeric_array{_mm256_hsub_epi16(lhs.reg(), rhs.reg())};
2540#if defined(HI_HAS_AVX)
2541 if constexpr (is_f64x4) {
2542 return numeric_array{_mm256_hsub_pd(lhs.reg(), rhs.reg())};
2543 }
else if constexpr (is_f32x8) {
2544 return numeric_array{_mm256_hsub_ps(lhs.reg(), rhs.reg())};
2547#if defined(HI_HAS_SSSE3)
2548 if constexpr (is_i32x4 or is_u32x4) {
2549 return numeric_array{_mm_hsub_epi32(lhs.reg(), rhs.reg())};
2550 }
else if constexpr (is_i16x8 or is_u16x8) {
2551 return numeric_array{_mm_hsub_epi16(lhs.reg(), rhs.reg())};
2554#if defined(HI_HAS_SSE3)
2555 if constexpr (is_f64x2) {
2556 return numeric_array{_mm_hsub_pd(lhs.reg(), rhs.reg())};
2557 }
else if constexpr (is_f32x4) {
2558 return numeric_array{_mm_hsub_ps(lhs.reg(), rhs.reg())};
2563 hi_axiom(N % 2 == 0);
2565 auto r = numeric_array{};
2569 while (src_i != N) {
2570 auto tmp = lhs[src_i++];
2571 tmp -= lhs[src_i++];
2576 while (src_i != N) {
2577 auto tmp = rhs[src_i++];
2578 tmp -= rhs[src_i++];
2588 template<std::
size_t Mask>
2592 return lhs + neg<Mask ^ not_mask>(rhs);
2599 hi_axiom(rhs.z() == 0.0f && rhs.is_vector());
2607 return normalize<0b0011>(cross_2D(rhs));
2615 hilet tmp1 = rhs.yxwz();
2616 hilet tmp2 = lhs * tmp1;
2617 hilet tmp3 = hsub(tmp2, tmp2);
2618 return get<0>(tmp3);
2628 hilet a_left = lhs.yzxw();
2629 hilet b_left = rhs.zxyw();
2630 hilet left = a_left * b_left;
2632 hilet a_right = lhs.zxyw();
2633 hilet b_right = rhs.yzxw();
2634 hilet right = a_right * b_right;
2635 return left - right;
2638 [[nodiscard]]
static constexpr numeric_array byte_srl_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
2640 static_assert(std::endian::native == std::endian::little);
2642 auto r = numeric_array{};
2643 for (
auto i = 0; i != 16; ++i) {
2644 if ((i + rhs) < 16) {
2645 r[i] = narrow_cast<int8_t>(i + rhs);
2654 [[nodiscard]]
static constexpr numeric_array byte_sll_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
2656 static_assert(std::endian::native == std::endian::little);
2658 auto r = numeric_array{};
2659 for (
auto i = 0; i != 16; ++i) {
2660 if ((i - rhs) >= 0) {
2661 r[i] = narrow_cast<int8_t>(i - rhs);
2673 requires(std::is_integral_v<value_type>)
2675 if (!std::is_constant_evaluated()) {
2676#if defined(HI_HAS_SSSE3)
2677 if constexpr (is_i8x16 or is_u8x16) {
2678 return numeric_array{_mm_shuffle_epi8(lhs.reg(), rhs.reg())};
2686 r[i] = lhs[rhs[i] & 0xf];
2699 hi_axiom(p1.is_point());
2700 hi_axiom(p2.is_point());
2701 return (p1 + p2) * 0.5f;
2708 hi_axiom(p.is_point());
2709 hi_axiom(anchor.is_point());
2710 return anchor - (p - anchor);
2716 hi_warning_ignore_msvc(26494);
2717 template<
typename... Columns>
2720 static_assert(
sizeof...(Columns) == N,
"Can only transpose square matrices");
2722 if (not std::is_constant_evaluated()) {
2723#if defined(HI_HAS_SSE)
2724 if constexpr (is_f32x4 and
sizeof...(Columns) == 4) {
2726 _MM_TRANSPOSE4_PS(std::get<0>(tmp), std::get<1>(tmp), std::get<2>(tmp), std::get<3>(tmp));
2728 numeric_array{get<0>(tmp)},
2729 numeric_array{get<1>(tmp)},
2730 numeric_array{get<2>(tmp)},
2731 numeric_array{get<3>(tmp)}};
2737 auto f = [&r, &columns... ]<
std::size_t... Ints>(std::index_sequence<Ints...>)
2739 auto tf = [&r](
auto i,
auto v) {
2745 static_cast<void>((tf(Ints, columns) + ...));
2747 f(std::make_index_sequence<
sizeof...(columns)>{});
2752 [[nodiscard]]
constexpr friend numeric_array composit(numeric_array
const& under, numeric_array
const& over)
noexcept
2753 requires(N == 4 && std::is_floating_point_v<T>)
2755 if (over.is_transparent()) {
2758 if (over.is_opaque()) {
2762 hilet over_alpha = over.wwww();
2763 hilet under_alpha = under.wwww();
2765 hilet over_color = over.xyz1();
2766 hilet under_color = under.xyz1();
2768 hilet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
2770 return output_color / output_color.www1();
2773 [[nodiscard]]
constexpr friend numeric_array composit(numeric_array
const& under, numeric_array
const& over)
noexcept
2776 return numeric_array{composit(
static_cast<numeric_array<float, 4>
>(under),
static_cast<numeric_array<float, 4>
>(over))};
2788 r += std::format(
"{}", rhs[i]);
2803 template<std::
size_t FromElement, std::
size_t ToElement>
2808 if (!std::is_constant_evaluated()) {
2809#if defined(HI_HAS_SSE4_1)
2810 if constexpr (is_f32x4) {
2811 constexpr uint8_t insert_mask =
static_cast<uint8_t
>((FromElement << 6) | (ToElement << 4));
2812 return numeric_array{_mm_insert_ps(lhs.reg(), rhs.reg(), insert_mask)};
2814 }
else if constexpr (is_i32x4 or is_u32x4) {
2815 constexpr uint8_t insert_mask =
static_cast<uint8_t
>((FromElement << 6) | (ToElement << 4));
2817 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg()), insert_mask))};
2820#if defined(HI_HAS_SSE2)
2821 if constexpr (is_f64x2) {
2822 if constexpr (FromElement == 0 and ToElement == 0) {
2823 return numeric_array{_mm_shuffle_pd(rhs.reg(), lhs.reg(), 0b10)};
2824 }
else if constexpr (FromElement == 1 and ToElement == 0) {
2825 return numeric_array{_mm_shuffle_pd(rhs.reg(), lhs.reg(), 0b11)};
2826 }
else if constexpr (FromElement == 0 and ToElement == 1) {
2827 return numeric_array{_mm_shuffle_pd(lhs.reg(), rhs.reg(), 0b00)};
2829 return numeric_array{_mm_shuffle_pd(lhs.reg(), rhs.reg(), 0b10)};
2832 }
else if constexpr (is_i64x2 or is_u64x2) {
2833 hilet lhs_ = _mm_castsi128_pd(lhs.reg());
2834 hilet rhs_ = _mm_castsi128_pd(rhs.reg());
2836 if constexpr (FromElement == 0 and ToElement == 0) {
2837 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(rhs_, lhs_, 0b10))};
2838 }
else if constexpr (FromElement == 1 and ToElement == 0) {
2839 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(rhs_, lhs_, 0b11))};
2840 }
else if constexpr (FromElement == 0 and ToElement == 1) {
2841 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(lhs_, rhs_, 0b00))};
2843 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(lhs_, rhs_, 0b10))};
2850 r[i] = (i == ToElement) ? rhs[FromElement] : lhs[i];
2866 static_assert(
sizeof...(Elements) <= N);
2868 if (!std::is_constant_evaluated()) {
2869#if defined(HI_HAS_AVX)
2870 if constexpr (is_f64x2) {
2872 }
else if constexpr (is_f32x4) {
2874 }
else if constexpr (is_i64x2 or is_u64x2) {
2875 return numeric_array{_mm_swizzle_epi64<Elements...>(reg())};
2876 }
else if constexpr (is_i32x4 or is_u32x4) {
2877 return numeric_array{_mm_swizzle_epi32<Elements...>(reg())};
2883 swizzle_detail<0, Elements...>(r);
2887#define SWIZZLE(swizzle_name, D, ...) \
2888 [[nodiscard]] constexpr numeric_array swizzle_name() const noexcept requires(D == N) \
2890 return swizzle<__VA_ARGS__>(); \
2893#define SWIZZLE_4D_GEN1(name, ...) \
2894 SWIZZLE(name##0, 4, __VA_ARGS__, get_zero) \
2895 SWIZZLE(name##1, 4, __VA_ARGS__, get_one) \
2896 SWIZZLE(name##x, 4, __VA_ARGS__, 0) \
2897 SWIZZLE(name##y, 4, __VA_ARGS__, 1) \
2898 SWIZZLE(name##z, 4, __VA_ARGS__, 2) \
2899 SWIZZLE(name##w, 4, __VA_ARGS__, 3)
2901#define SWIZZLE_4D_GEN2(name, ...) \
2902 SWIZZLE_4D_GEN1(name##0, __VA_ARGS__, get_zero) \
2903 SWIZZLE_4D_GEN1(name##1, __VA_ARGS__, get_one) \
2904 SWIZZLE_4D_GEN1(name##x, __VA_ARGS__, 0) \
2905 SWIZZLE_4D_GEN1(name##y, __VA_ARGS__, 1) \
2906 SWIZZLE_4D_GEN1(name##z, __VA_ARGS__, 2) \
2907 SWIZZLE_4D_GEN1(name##w, __VA_ARGS__, 3)
2909#define SWIZZLE_4D_GEN3(name, ...) \
2910 SWIZZLE_4D_GEN2(name##0, __VA_ARGS__, get_zero) \
2911 SWIZZLE_4D_GEN2(name##1, __VA_ARGS__, get_one) \
2912 SWIZZLE_4D_GEN2(name##x, __VA_ARGS__, 0) \
2913 SWIZZLE_4D_GEN2(name##y, __VA_ARGS__, 1) \
2914 SWIZZLE_4D_GEN2(name##z, __VA_ARGS__, 2) \
2915 SWIZZLE_4D_GEN2(name##w, __VA_ARGS__, 3)
2917 SWIZZLE_4D_GEN3(_0, get_zero)
2918 SWIZZLE_4D_GEN3(_1, get_one)
2919 SWIZZLE_4D_GEN3(x, 0)
2920 SWIZZLE_4D_GEN3(y, 1)
2921 SWIZZLE_4D_GEN3(z, 2)
2922 SWIZZLE_4D_GEN3(w, 3)
2924#define SWIZZLE_3D_GEN1(name, ...) \
2925 SWIZZLE(name##0, 3, __VA_ARGS__, get_zero) \
2926 SWIZZLE(name##1, 3, __VA_ARGS__, get_one) \
2927 SWIZZLE(name##x, 3, __VA_ARGS__, 0) \
2928 SWIZZLE(name##y, 3, __VA_ARGS__, 1) \
2929 SWIZZLE(name##z, 3, __VA_ARGS__, 2)
2931#define SWIZZLE_3D_GEN2(name, ...) \
2932 SWIZZLE_3D_GEN1(name##0, __VA_ARGS__, get_zero) \
2933 SWIZZLE_3D_GEN1(name##1, __VA_ARGS__, get_one) \
2934 SWIZZLE_3D_GEN1(name##x, __VA_ARGS__, 0) \
2935 SWIZZLE_3D_GEN1(name##y, __VA_ARGS__, 1) \
2936 SWIZZLE_3D_GEN1(name##z, __VA_ARGS__, 2)
2938 SWIZZLE_3D_GEN2(_0, get_zero)
2939 SWIZZLE_3D_GEN2(_1, get_one)
2940 SWIZZLE_3D_GEN2(x, 0)
2941 SWIZZLE_3D_GEN2(y, 1)
2942 SWIZZLE_3D_GEN2(z, 2)
2944#define SWIZZLE_2D_GEN1(name, ...) \
2945 SWIZZLE(name##0, 2, __VA_ARGS__, get_zero) \
2946 SWIZZLE(name##1, 2, __VA_ARGS__, get_one) \
2947 SWIZZLE(name##x, 2, __VA_ARGS__, 0) \
2948 SWIZZLE(name##y, 2, __VA_ARGS__, 1)
2950 SWIZZLE_2D_GEN1(_0, get_zero)
2951 SWIZZLE_2D_GEN1(_1, get_one)
2952 SWIZZLE_2D_GEN1(x, 0)
2953 SWIZZLE_2D_GEN1(y, 1)
2956#undef SWIZZLE_4D_GEN1
2957#undef SWIZZLE_4D_GEN2
2958#undef SWIZZLE_4D_GEN3
2959#undef SWIZZLE_3D_GEN1
2960#undef SWIZZLE_3D_GEN2
2961#undef SWIZZLE_2D_GEN1
2964 constexpr void swizzle_detail(
numeric_array& r)
const noexcept
2966 static_assert(I < narrow_cast<ssize_t>(N));
2967 static_assert(FirstElement >= -2 && FirstElement < narrow_cast<ssize_t>(N),
"Index out of bounds");
2969 get<I>(r) = get<FirstElement>(*
this);
2970 if constexpr (
sizeof...(RestElements) != 0) {
2971 swizzle_detail<I + 1, RestElements...>(r);