61 using value_type =
typename container_type::value_type;
62 using size_type =
typename container_type::size_type;
63 using difference_type =
typename container_type::difference_type;
64 using reference =
typename container_type::reference;
65 using const_reference =
typename container_type::const_reference;
66 using pointer =
typename container_type::pointer;
67 using const_pointer =
typename container_type::const_pointer;
68 using iterator =
typename container_type::iterator;
69 using const_iterator =
typename container_type::const_iterator;
71 constexpr static bool is_i8x1 = std::is_same_v<T, int8_t> && N == 1;
72 constexpr static bool is_i8x2 = std::is_same_v<T, int8_t> && N == 2;
73 constexpr static bool is_i8x4 = std::is_same_v<T, int8_t> && N == 4;
74 constexpr static bool is_i8x8 = std::is_same_v<T, int8_t> && N == 8;
75 constexpr static bool is_i8x16 = std::is_same_v<T, int8_t> && N == 16;
76 constexpr static bool is_i8x32 = std::is_same_v<T, int8_t> && N == 32;
77 constexpr static bool is_i8x64 = std::is_same_v<T, int8_t> && N == 64;
78 constexpr static bool is_u8x1 = std::is_same_v<T, uint8_t> && N == 1;
79 constexpr static bool is_u8x2 = std::is_same_v<T, uint8_t> && N == 2;
80 constexpr static bool is_u8x4 = std::is_same_v<T, uint8_t> && N == 4;
81 constexpr static bool is_u8x8 = std::is_same_v<T, uint8_t> && N == 8;
82 constexpr static bool is_u8x16 = std::is_same_v<T, uint8_t> && N == 16;
83 constexpr static bool is_u8x32 = std::is_same_v<T, uint8_t> && N == 32;
84 constexpr static bool is_u8x64 = std::is_same_v<T, uint8_t> && N == 64;
86 constexpr static bool is_i16x1 = std::is_same_v<T, int16_t> && N == 1;
87 constexpr static bool is_i16x2 = std::is_same_v<T, int16_t> && N == 2;
88 constexpr static bool is_i16x4 = std::is_same_v<T, int16_t> && N == 4;
89 constexpr static bool is_i16x8 = std::is_same_v<T, int16_t> && N == 8;
90 constexpr static bool is_i16x16 = std::is_same_v<T, int16_t> && N == 16;
91 constexpr static bool is_i16x32 = std::is_same_v<T, int16_t> && N == 32;
92 constexpr static bool is_u16x1 = std::is_same_v<T, uint16_t> && N == 1;
93 constexpr static bool is_u16x2 = std::is_same_v<T, uint16_t> && N == 2;
94 constexpr static bool is_u16x4 = std::is_same_v<T, uint16_t> && N == 4;
95 constexpr static bool is_u16x8 = std::is_same_v<T, uint16_t> && N == 8;
96 constexpr static bool is_u16x16 = std::is_same_v<T, uint16_t> && N == 16;
97 constexpr static bool is_u16x32 = std::is_same_v<T, uint16_t> && N == 32;
98 constexpr static bool is_f16x4 = std::is_same_v<T, float16> && N == 4;
100 constexpr static bool is_i32x1 = std::is_same_v<T, int32_t> && N == 1;
101 constexpr static bool is_i32x2 = std::is_same_v<T, int32_t> && N == 2;
102 constexpr static bool is_i32x4 = std::is_same_v<T, int32_t> && N == 4;
103 constexpr static bool is_i32x8 = std::is_same_v<T, int32_t> && N == 8;
104 constexpr static bool is_i32x16 = std::is_same_v<T, int32_t> && N == 16;
105 constexpr static bool is_u32x1 = std::is_same_v<T, uint32_t> && N == 1;
106 constexpr static bool is_u32x2 = std::is_same_v<T, uint32_t> && N == 2;
107 constexpr static bool is_u32x4 = std::is_same_v<T, uint32_t> && N == 4;
108 constexpr static bool is_u32x8 = std::is_same_v<T, uint32_t> && N == 8;
109 constexpr static bool is_u32x16 = std::is_same_v<T, uint32_t> && N == 16;
110 constexpr static bool is_f32x1 = std::is_same_v<T, float> && N == 1;
111 constexpr static bool is_f32x2 = std::is_same_v<T, float> && N == 2;
112 constexpr static bool is_f32x4 = std::is_same_v<T, float> && N == 4;
113 constexpr static bool is_f32x8 = std::is_same_v<T, float> && N == 8;
114 constexpr static bool is_f32x16 = std::is_same_v<T, float> && N == 16;
116 constexpr static bool is_i64x1 = std::is_same_v<T, int64_t> && N == 1;
117 constexpr static bool is_i64x2 = std::is_same_v<T, int64_t> && N == 2;
118 constexpr static bool is_i64x4 = std::is_same_v<T, int64_t> && N == 4;
119 constexpr static bool is_i64x8 = std::is_same_v<T, int64_t> && N == 8;
120 constexpr static bool is_u64x1 = std::is_same_v<T, uint64_t> && N == 1;
121 constexpr static bool is_u64x2 = std::is_same_v<T, uint64_t> && N == 2;
122 constexpr static bool is_u64x4 = std::is_same_v<T, uint64_t> && N == 4;
123 constexpr static bool is_u64x8 = std::is_same_v<T, uint64_t> && N == 8;
124 constexpr static bool is_f64x1 = std::is_same_v<T, double> && N == 1;
125 constexpr static bool is_f64x2 = std::is_same_v<T, double> && N == 2;
126 constexpr static bool is_f64x4 = std::is_same_v<T, double> && N == 4;
127 constexpr static bool is_f64x8 = std::is_same_v<T, double> && N == 8;
133 if (not std::is_constant_evaluated()) {
134#if defined(HI_HAS_AVX)
135 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x16 or is_u16x16 or is_i8x32 or is_u8x32) {
136 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), _mm256_setzero_si256());
138 }
else if constexpr (is_f64x4) {
139 _mm256_storeu_pd(
reinterpret_cast<__m256d *
>(v.
data()), _mm256_setzero_pd());
141 }
else if constexpr (is_f32x8) {
142 _mm256_storeu_ps(v.
data(), _mm256_setzero_ps());
146#if defined(HI_HAS_SSE2)
147 if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_u8x16) {
148 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), _mm_setzero_si128());
150 }
else if constexpr (is_f64x2) {
151 _mm_storeu_pd(
reinterpret_cast<__m128d *
>(v.
data()), _mm_setzero_pd());
155#if defined(HI_HAS_SSE)
156 if constexpr (is_f32x4) {
157 _mm_storeu_ps(v.
data(), _mm_setzero_ps());
163 for (
auto i = 0_uz; i != N; ++i) {
173 template<numeric_limited U, std::
size_t M>
176 if (!std::is_constant_evaluated()) {
177#if defined(HI_HAS_AVX)
178 if constexpr (is_f64x4 and other.is_f32x4) {
181 }
else if constexpr (is_f64x4 and other.is_i32x4) {
184 }
else if constexpr (is_f32x4 and other.is_f64x4) {
187 }
else if constexpr (is_i32x4 and other.is_f64x4) {
190 }
else if constexpr (is_i32x8 and other.is_f32x8) {
193 }
else if constexpr (is_f32x8 and other.is_i32x8) {
198#if defined(HI_HAS_SSE4_1)
199 if constexpr (is_u8x4 and other.is_f32x4) {
200 hilet i32_4 = _mm_cvtps_epi32(other.reg());
201 hilet i16_8 = _mm_packs_epi32(i32_4, _mm_setzero_si128());
202 hilet u8_16 = _mm_packus_epi16(i16_8, _mm_setzero_si128());
205 }
else if constexpr (is_i64x4 and other.is_i32x4) {
208 }
else if constexpr (is_i64x4 and other.is_i16x8) {
211 }
else if constexpr (is_i32x4 and other.is_i16x8) {
214 }
else if constexpr (is_i64x2 and other.is_i8x16) {
217 }
else if constexpr (is_i32x4 and other.is_i8x16) {
220 }
else if constexpr (is_i16x8 and other.is_i8x16) {
223 }
else if constexpr (is_f16x4 and other.is_f32x4) {
226 }
else if constexpr (is_f32x4 and other.is_f16x4) {
232#if defined(HI_HAS_SSE2)
233 if constexpr (is_f64x2 and other.is_i32x4) {
236 }
else if constexpr (is_f32x4 and other.is_i32x4) {
239 }
else if constexpr (is_i32x4 and other.is_f32x4) {
248 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
250 v[i] =
static_cast<value_type
>(
std::round(other[i]));
252 v[i] =
static_cast<value_type
>(other[i]);
260 template<numeric_limited U, std::
size_t M>
265 if (!std::is_constant_evaluated()) {
266#if defined(HI_HAS_AVX)
267 if constexpr (is_f64x4 and other1.is_f64x2 and other2.is_f64x2) {
268 v =
numeric_array{_mm256_set_m128d(other2.reg(), other1.reg())};
269 }
else if constexpr (is_f32x8 and other1.is_f32x4 and other2.is_f32x4) {
270 v =
numeric_array{_mm256_set_m128(other2.reg(), other1.reg())};
271 }
else if constexpr (
272 std::is_integral_v<T> and std::is_integral_v<U> and (
sizeof(T) * N == 32) and (
sizeof(U) * M == 16)) {
273 v =
numeric_array{_mm256_set_m128i(other2.reg(), other1.reg())};
276#if defined(HI_HAS_SSE4_1)
277 if constexpr (is_u16x8 and other1.is_u32x4 and other2.is_u32x4) {
278 v =
numeric_array{_mm_packus_epu32(other2.reg(), other1.reg())};
281#if defined(HI_HAS_SSE2)
282 if constexpr (is_i16x8 and other1.is_i32x4 and other2.is_i32x4) {
283 v =
numeric_array{_mm_packs_epi32(other2.reg(), other1.reg())};
284 }
else if constexpr (is_i8x16 and other1.is_i16x8 and other2.is_i16x8) {
285 v =
numeric_array{_mm_packs_epi16(other2.reg(), other1.reg())};
286 }
else if constexpr (is_u8x16 and other1.is_u16x8 and other2.is_u16x8) {
287 v =
numeric_array{_mm_packus_epu16(other2.reg(), other1.reg())};
294 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
296 v[i] =
static_cast<value_type
>(
std::round(other1[i]));
298 v[i] =
static_cast<value_type
>(other1[i]);
300 }
else if (i < M * 2) {
301 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
303 v[i] =
static_cast<value_type
>(
std::round(other2[i - M]));
305 v[i] =
static_cast<value_type
>(other2[i - M]);
313 [[nodiscard]]
constexpr explicit numeric_array(T
const& x) noexcept : v()
315 if (not std::is_constant_evaluated()) {
316#if defined(HI_HAS_SSE)
317 if constexpr (is_f32x4) {
326 [[nodiscard]]
constexpr explicit numeric_array(T
const& x, T
const& y)
noexcept requires(N >= 2) : v()
328 if (not std::is_constant_evaluated()) {
329#if defined(HI_HAS_SSE2)
330 if constexpr (is_i32x4) {
340 [[nodiscard]]
constexpr explicit numeric_array(T
const& x, T
const& y, T
const& z)
noexcept requires(N >= 3) : v()
342 if (not std::is_constant_evaluated()) {
343#if defined(HI_HAS_SSE2)
344 if constexpr (is_i32x4) {
355 [[nodiscard]]
constexpr explicit numeric_array(T
const& x, T
const& y, T
const& z, T
const& w)
noexcept requires(N >= 4) : v()
357 if (not std::is_constant_evaluated()) {
358#if defined(HI_HAS_SSE2)
359 if constexpr (is_i32x4) {
371 [[nodiscard]]
static constexpr numeric_array broadcast(T rhs)
noexcept
373 if (not std::is_constant_evaluated()) {
374#if defined(HI_HAS_AVX)
375 if constexpr (is_f64x4) {
377 }
else if constexpr (is_f32x8) {
379 }
else if constexpr (is_i64x4) {
381 }
else if constexpr (is_i32x8) {
383 }
else if constexpr (is_i16x16) {
385 }
else if constexpr (is_i8x32) {
389#if defined(HI_HAS_SSE2)
390 if constexpr (is_f64x2) {
392 }
else if constexpr (is_i64x2) {
394 }
else if constexpr (is_i32x4) {
396 }
else if constexpr (is_i16x8) {
398 }
else if constexpr (is_i8x16) {
402#if defined(HI_HAS_SSE)
403 if constexpr (is_f32x4) {
415 [[nodiscard]]
static constexpr numeric_array epsilon()
noexcept
417 if constexpr (std::is_floating_point_v<T>) {
420 return broadcast(T{0});
437#if defined(HI_HAS_SSE2)
438 [[nodiscard]] __m128i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
440 return _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(v.
data()));
443 [[nodiscard]] __m128i reg()
const noexcept requires(is_f16x4)
445 return _mm_set_epi16(0, 0, 0, 0, get<3>(v).get(), get<2>(v).get(), get<1>(v).get(), get<0>(v).get());
449#if defined(HI_HAS_SSE2)
450 [[nodiscard]] __m128 reg()
const noexcept requires(is_f32x4)
452 return _mm_loadu_ps(v.
data());
456#if defined(HI_HAS_SSE2)
457 [[nodiscard]] __m128d reg()
const noexcept requires(is_f64x2)
459 return _mm_loadu_pd(v.
data());
463#if defined(HI_HAS_SSE2)
464 [[nodiscard]]
explicit numeric_array(__m128i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
466 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
470#if defined(HI_HAS_SSE4_1)
471 [[nodiscard]]
explicit numeric_array(__m128i
const& rhs)
noexcept requires(is_f16x4) :
472 v(std::bit_cast<
decltype(v)>(_mm_extract_epi64(rhs, 0)))
477#if defined(HI_HAS_SSE4_1)
478 [[nodiscard]]
explicit numeric_array(__m128i
const& rhs)
noexcept requires(is_u8x4) :
479 v(std::bit_cast<
decltype(v)>(_mm_extract_epi32(rhs, 0)))
484#if defined(HI_HAS_SSE2)
485 [[nodiscard]]
explicit numeric_array(__m128
const& rhs)
noexcept requires(is_f32x4)
487 _mm_storeu_ps(v.
data(), rhs);
491#if defined(HI_HAS_SSE2)
492 [[nodiscard]]
explicit numeric_array(__m128d
const& rhs)
noexcept requires(is_f64x2)
494 _mm_storeu_pd(v.
data(), rhs);
498#if defined(HI_HAS_SSE2)
499 numeric_array& operator=(__m128i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 16)
501 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(v.
data()), rhs);
506#if defined(HI_HAS_SSE2)
507 numeric_array& operator=(__m128
const& rhs)
noexcept requires(is_f32x4)
509 _mm_storeu_ps(v.
data(), rhs);
514#if defined(HI_HAS_SSE2)
515 numeric_array& operator=(__m128d
const& rhs)
noexcept requires(is_f64x2)
517 _mm_storeu_pd(v.
data(), rhs);
522#if defined(HI_HAS_AVX)
523 [[nodiscard]] __m256i reg()
const noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
525 return _mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(v.
data()));
529#if defined(HI_HAS_AVX)
530 [[nodiscard]] __m256 reg()
const noexcept requires(is_f32x8)
532 return _mm256_loadu_ps(v.
data());
536#if defined(HI_HAS_AVX)
537 [[nodiscard]] __m256d reg()
const noexcept requires(is_f64x4)
539 return _mm256_loadu_pd(v.
data());
543#if defined(HI_HAS_AVX)
544 [[nodiscard]]
explicit numeric_array(__m256i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
546 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
550#if defined(HI_HAS_AVX)
551 [[nodiscard]]
explicit numeric_array(__m256
const& rhs)
noexcept requires(is_f32x8)
553 _mm256_storeu_ps(v.
data(), rhs);
557#if defined(HI_HAS_AVX)
558 [[nodiscard]]
explicit numeric_array(__m256d
const& rhs)
noexcept requires(is_f64x4)
560 _mm256_storeu_pd(v.
data(), rhs);
564#if defined(HI_HAS_AVX)
565 numeric_array& operator=(__m256i
const& rhs)
noexcept requires(std::is_integral_v<T> and
sizeof(T) * N == 32)
567 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(v.
data()), rhs);
572#if defined(HI_HAS_AVX)
573 numeric_array& operator=(__m256
const& rhs)
noexcept requires(is_f32x8)
575 _mm256_storeu_ps(v.
data(), rhs);
580#if defined(HI_HAS_AVX)
581 numeric_array& operator=(__m256d
const& rhs)
noexcept requires(is_f64x4)
583 _mm256_storeu_pd(v.
data(), rhs);
588 template<
typename Other>
589 [[nodiscard]]
constexpr friend Other bit_cast(
numeric_array const& rhs)
noexcept
592 if (not std::is_constant_evaluated()) {
593#if defined(HI_HAS_SSE2)
594 if constexpr (Other::is_f32x4 and std::is_integral_v<T>) {
595 return Other{_mm_castsi128_ps(rhs.reg())};
596 }
else if constexpr (Other::is_f32x4 and is_f64x2) {
597 return Other{_mm_castpd_ps(rhs.reg())};
598 }
else if constexpr (Other::is_f64x2 and std::is_integral_v<T>) {
599 return Other{_mm_castsi128_pd(rhs.reg())};
600 }
else if constexpr (Other::is_f64x2 and is_f32x4) {
601 return Other{_mm_castps_pd(rhs.reg())};
602 }
else if constexpr (std::is_integral_v<typename Other::value_type> and is_f32x4) {
603 return Other{_mm_castps_si128(rhs.reg())};
604 }
else if constexpr (std::is_integral_v<typename Other::value_type> and is_f64x2) {
605 return Other{_mm_castpd_si128(rhs.reg())};
606 }
else if constexpr (std::is_integral_v<typename Other::value_type> and std::is_integral_v<T>) {
607 return Other{rhs.reg()};
611 return std::bit_cast<Other>(rhs);
618 if (not std::is_constant_evaluated()) {
619#if defined(HI_HAS_SSE2)
620 if constexpr (is_f64x2) {
622 }
else if constexpr (is_i64x2 or is_u64x2) {
624 }
else if constexpr (is_i32x4 or is_u32x4) {
626 }
else if constexpr (is_i16x8 or is_u16x8) {
628 }
else if constexpr (is_i8x16 or is_u8x16) {
632#if defined(HI_HAS_SSE)
633 if constexpr (is_f32x4) {
641 r[i] = (i % 2 == 0) ? a[i / 2] : b[i / 2];
650 template<std::
size_t S>
680 template<std::
size_t S>
681 constexpr void store(std::byte *ptr)
const noexcept
689 constexpr void store(std::byte *ptr)
const noexcept
691 store<sizeof(*this)>(ptr);
697 constexpr explicit operator bool() const noexcept
699 if constexpr (std::is_floating_point_v<T>) {
700 hilet ep = epsilon();
702 return to_bool(gt(-ep, *
this) | gt(*
this, ep));
704 return to_bool(ne(*
this, T{0}));
708 [[nodiscard]]
constexpr T
const& operator[](
std::size_t i)
const noexcept
710 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
715 [[nodiscard]]
constexpr T& operator[](
std::size_t i)
noexcept
717 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
722 [[nodiscard]]
constexpr reference front() noexcept
727 [[nodiscard]]
constexpr const_reference front() const noexcept
732 [[nodiscard]]
constexpr reference back() noexcept
737 [[nodiscard]]
constexpr const_reference back() const noexcept
742 [[nodiscard]]
constexpr pointer data() noexcept
747 [[nodiscard]]
constexpr const_pointer data() const noexcept
752 [[nodiscard]]
constexpr iterator
begin() noexcept
757 [[nodiscard]]
constexpr const_iterator
begin() const noexcept
762 [[nodiscard]]
constexpr const_iterator cbegin() const noexcept
767 [[nodiscard]]
constexpr iterator
end() noexcept
772 [[nodiscard]]
constexpr const_iterator
end() const noexcept
777 [[nodiscard]]
constexpr const_iterator cend() const noexcept
782 [[nodiscard]]
constexpr bool empty() const noexcept
787 [[nodiscard]]
constexpr size_type size() const noexcept
792 [[nodiscard]]
constexpr size_type max_size() const noexcept
797 constexpr bool is_point() const noexcept
799 return v.
back() != T{};
802 constexpr bool is_vector() const noexcept
804 return v.
back() == T{};
807 constexpr bool is_opaque() const noexcept
812 constexpr bool is_transparent() const noexcept
817 [[nodiscard]]
constexpr T
const& x() const noexcept requires(N >= 1)
819 return std::get<0>(v);
822 [[nodiscard]]
constexpr T
const& y() const noexcept requires(N >= 2)
824 return std::get<1>(v);
827 [[nodiscard]]
constexpr T
const& z() const noexcept requires(N >= 3)
829 return std::get<2>(v);
832 [[nodiscard]]
constexpr T
const& w() const noexcept requires(N >= 4)
834 return std::get<3>(v);
837 [[nodiscard]]
constexpr T& x() noexcept requires(N >= 1)
839 return std::get<0>(v);
842 [[nodiscard]]
constexpr T& y() noexcept requires(N >= 2)
844 return std::get<1>(v);
847 [[nodiscard]]
constexpr T& z() noexcept requires(N >= 3)
849 return std::get<2>(v);
852 [[nodiscard]]
constexpr T& w() noexcept requires(N >= 4)
854 return std::get<3>(v);
857 [[nodiscard]]
constexpr T
const& r() const noexcept requires(N >= 1)
859 return std::get<0>(v);
862 [[nodiscard]]
constexpr T
const& g() const noexcept requires(N >= 2)
864 return std::get<1>(v);
867 [[nodiscard]]
constexpr T
const& b() const noexcept requires(N >= 3)
869 return std::get<2>(v);
872 [[nodiscard]]
constexpr T
const& a() const noexcept requires(N >= 4)
874 return std::get<3>(v);
877 [[nodiscard]]
constexpr T& r() noexcept requires(N >= 1)
879 return std::get<0>(v);
882 [[nodiscard]]
constexpr T& g() noexcept requires(N >= 2)
884 return std::get<1>(v);
887 [[nodiscard]]
constexpr T& b() noexcept requires(N >= 3)
889 return std::get<2>(v);
892 [[nodiscard]]
constexpr T& a() noexcept requires(N >= 4)
894 return std::get<3>(v);
897 [[nodiscard]]
constexpr T
const& width() const noexcept requires(N >= 1)
899 return std::get<0>(v);
902 [[nodiscard]]
constexpr T
const& height() const noexcept requires(N >= 2)
904 return std::get<1>(v);
907 [[nodiscard]]
constexpr T
const& depth() const noexcept requires(N >= 3)
909 return std::get<2>(v);
912 [[nodiscard]]
constexpr T& width() noexcept requires(N >= 1)
914 return std::get<0>(v);
917 [[nodiscard]]
constexpr T& height() noexcept requires(N >= 2)
919 return std::get<1>(v);
922 [[nodiscard]]
constexpr T& depth() noexcept requires(N >= 3)
924 return std::get<2>(v);
927 constexpr numeric_array& operator<<=(
unsigned int rhs)
noexcept
929 return *
this = *
this << rhs;
932 constexpr numeric_array& operator>>=(
unsigned int rhs)
noexcept
934 return *
this = *
this >> rhs;
937 constexpr numeric_array& operator|=(numeric_array
const& rhs)
noexcept
939 return *
this = *
this | rhs;
942 constexpr numeric_array& operator|=(T
const& rhs)
noexcept
944 return *
this = *
this | rhs;
947 constexpr numeric_array& operator&=(numeric_array
const& rhs)
noexcept
949 return *
this = *
this & rhs;
952 constexpr numeric_array& operator&=(T
const& rhs)
noexcept
954 return *
this = *
this & rhs;
957 constexpr numeric_array& operator^=(numeric_array
const& rhs)
noexcept
959 return *
this = *
this ^ rhs;
962 constexpr numeric_array& operator^=(T
const& rhs)
noexcept
964 return *
this = *
this ^ rhs;
967 constexpr numeric_array& operator+=(numeric_array
const& rhs)
noexcept
969 return *
this = *
this + rhs;
972 constexpr numeric_array& operator+=(T
const& rhs)
noexcept
974 return *
this = *
this + rhs;
977 constexpr numeric_array& operator-=(numeric_array
const& rhs)
noexcept
979 return *
this = *
this - rhs;
982 constexpr numeric_array& operator-=(T
const& rhs)
noexcept
984 return *
this = *
this - rhs;
987 constexpr numeric_array& operator*=(numeric_array
const& rhs)
noexcept
989 return *
this = *
this * rhs;
992 constexpr numeric_array& operator*=(T
const& rhs)
noexcept
994 return *
this = *
this * rhs;
997 constexpr numeric_array& operator/=(numeric_array
const& rhs)
noexcept
999 return *
this = *
this / rhs;
1002 constexpr numeric_array& operator/=(T
const& rhs)
noexcept
1004 return *
this = *
this / rhs;
1007 constexpr numeric_array& operator%=(numeric_array
const& rhs)
noexcept
1009 return *
this = *
this % rhs;
1012 constexpr numeric_array& operator%=(T
const& rhs)
noexcept
1014 return *
this = *
this % rhs;
1017 constexpr static ssize_t get_zero = -1;
1018 constexpr static ssize_t get_one = -2;
1024 template<std::
size_t I>
1027 static_assert(I < N,
"Index out of bounds");
1028 return std::get<I>(rhs.v);
1039 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
1040 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
1041 if constexpr (I == get_zero) {
1043 }
else if constexpr (I == get_one) {
1046 return std::get<I>(rhs.v);
1056 template<std::
size_t I>
1059 static_assert(I < N);
1061 if (not std::is_constant_evaluated()) {
1062#if defined(HI_HAS_AVX2)
1063 if constexpr (is_i16x16 or is_u16x16) {
1064 return static_cast<T
>(_mm256_extract_epi16(rhs.v.reg(), I));
1065 }
else if constexpr (is_i8x32 or is_u8x32) {
1066 return static_cast<T
>(_mm256_extract_epi8(rhs.v.reg(), I));
1069#if defined(HI_HAS_AVX)
1070 if constexpr (is_f64x4) {
1071 return bit_cast<T>(_mm256_extract_epi64(_mm256_castpd_si256(rhs.v.reg()), I));
1072 }
else if constexpr (is_f32x8) {
1073 return bit_cast<T>(_mm256_extract_epi32(_mm256_castps_si256(rhs.v.reg()), I));
1074 }
else if constexpr (is_i64x4 or is_u64x4) {
1075 return static_cast<T
>(_mm256_extract_epi64(rhs.v.reg(), I));
1076 }
else if constexpr (is_i32x8 or is_u32x8) {
1077 return static_cast<T
>(_mm256_extract_epi32(rhs.v.reg(), I));
1080#if defined(HI_HAS_SSE4_1)
1081 if constexpr (is_f64x2) {
1082 return bit_cast<T>(_mm_extract_epi64(_mm_castpd_si128(rhs.v.reg()), I));
1083 }
else if constexpr (is_f32x4) {
1084 return std::bit_cast<T>(_mm_extract_ps(rhs.v.reg(), I));
1085 }
else if constexpr (is_i64x2 or is_u64x2) {
1086 return static_cast<T
>(_mm_extract_epi64(rhs.v.reg(), I));
1087 }
else if constexpr (is_i32x4 or is_u32x4) {
1088 return static_cast<T
>(_mm_extract_epi32(rhs.v.reg(), I));
1089 }
else if constexpr (is_i8x16 or is_u8x16) {
1090 return static_cast<T
>(_mm_extract_epi8(rhs.v.reg(), I));
1093#if defined(HI_HAS_SSE2)
1094 if constexpr (is_i16x8 or is_u16x8) {
1095 return static_cast<T
>(_mm_extract_epi16(rhs.v.reg(), I));
1111 template<std::
size_t I, std::
size_t ZeroMask = 0>
1113 requires(is_f32x4 or is_i32x4 or is_u32x4)
1115 static_assert(I < N);
1116 static_assert(ZeroMask <= ((1 << N) - 1));
1118 if (not std::is_constant_evaluated()) {
1119#if defined(HI_HAS_SSE4_1)
1120 if constexpr (is_f32x4) {
1121 constexpr int imm8 = (I << 4) | ZeroMask;
1122 return numeric_array{_mm_insert_ps(lhs.reg(), _mm_set_ss(rhs), imm8)};
1123 }
else if constexpr (is_i32x4 or is_u32x4) {
1124 constexpr int imm8 = (I << 4) | ZeroMask;
1126 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(_mm_set1_epi32(rhs)), imm8))};
1132 std::get<I>(r.v) = rhs;
1134 if ((ZeroMask >> i) & 1) {
1149 static_assert(std::endian::native == std::endian::little,
"Indices need to be reversed on big endian machines");
1150 static_assert(I >= -2 && I < narrow_cast<ssize_t>(N),
"Index out of bounds");
1151 if constexpr (I == get_zero) {
1153 }
else if constexpr (I == get_one) {
1156 return std::get<I>(rhs.v);
1167 if (not std::is_constant_evaluated()) {
1168#if defined(HI_HAS_SSE4_1)
1169 if constexpr (is_f32x4) {
1170 return numeric_array{_mm_insert_ps(rhs.reg(), rhs.reg(), Mask)};
1171 }
else if constexpr (is_i32x4 or is_u32x4) {
1173 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(rhs.reg()), _mm_castsi128_ps(rhs.reg()), Mask))};
1180 if (to_bool((Mask >> i) & 1)) {
1196 template<std::
size_t Mask>
1199 if (not std::is_constant_evaluated()) {
1200#if defined(HI_HAS_AVX2)
1201 if constexpr (is_i32x8) {
1202 return numeric_array{_mm256_blend_epi32(lhs.reg(), rhs.reg(), Mask)};
1203 }
else if constexpr (is_i64x2 or is_u64x2) {
1204 constexpr auto mask_x2 = ((Mask & 1) ? 0b0011 : 0) | ((Mask & 2) ? 0b1100 : 0);
1205 return numeric_array{_mm_blend_epi32(lhs.reg(), rhs.reg(), mask_x2)};
1206 }
else if constexpr (is_i32x4 or is_u32x4) {
1207 return numeric_array{_mm_blend_epi32(lhs.reg(), rhs.reg(), Mask)};
1208 }
else if constexpr (is_i16x16 or is_u16x16) {
1209 return numeric_array{_mm256_blend_epi16(lhs.reg(), rhs.reg(), Mask)};
1212#if defined(HI_HAS_AVX)
1213 if constexpr (is_f64x4) {
1214 return numeric_array{_mm256_blend_pd(lhs.reg(), rhs.reg(), Mask)};
1215 }
else if constexpr (is_f32x8) {
1216 return numeric_array{_mm256_blend_ps(lhs.reg(), rhs.reg(), Mask)};
1217 }
else if constexpr (is_i64x4 or is_u64x4) {
1219 _mm256_castpd_si256(_mm256_blend_pd(_mm256_castsi256_pd(lhs.reg()), _mm256_castsi256_pd(rhs.reg()), Mask))};
1220 }
else if constexpr (is_i32x8 or is_u32x8) {
1222 _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg()), Mask))};
1225#if defined(HI_HAS_SSE4_1)
1226 if constexpr (is_f64x2) {
1227 return numeric_array{_mm_blend_pd(lhs.reg(), rhs.reg(), Mask)};
1228 }
else if constexpr (is_f32x4) {
1229 return numeric_array{_mm_blend_ps(lhs.reg(), rhs.reg(), Mask)};
1230 }
else if constexpr (is_i64x2 or is_u64x2) {
1232 _mm_castpd_si128(_mm_blend_pd(_mm_castsi128_pd(lhs.reg()), _mm_castsi128_pd(rhs.reg()), Mask))};
1233 }
else if constexpr (is_i32x4 or is_u32x4) {
1235 _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg()), Mask))};
1236 }
else if constexpr (is_i16x8 or is_u16x8) {
1237 return numeric_array{_mm_blend_epi16(lhs.reg(), rhs.reg(), Mask)};
1244 r[i] = to_bool((Mask >> i) & 1) ? rhs[i] : lhs[i];
1253 if (not std::is_constant_evaluated()) {
1254#if defined(HI_HAS_AVX2)
1255 if constexpr (is_i8x32 or is_u8x32) {
1256 return numeric_array{_mm256_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1259#if defined(HI_HAS_AVX)
1260 if constexpr (is_f64x4) {
1261 return numeric_array{_mm256_blendv_pd(a.reg(), b.reg(), mask.reg())};
1262 }
else if constexpr (is_f32x8) {
1263 return numeric_array{_mm256_blendv_ps(a.reg(), b.reg(), mask.reg())};
1264 }
else if constexpr (is_i64x4 or is_u64x4) {
1266 _mm256_castsi256_pd(a.reg()), _mm256_castsi256_pd(b.reg()), _mm256_castsi256_pd(mask.reg())))};
1267 }
else if constexpr (is_i32x8 or is_u32x8) {
1269 _mm256_castsi256_ps(a.reg()), _mm256_castsi256_ps(b.reg()), _mm256_castsi256_ps(mask.reg())))};
1272#if defined(HI_HAS_SSE4_1)
1273 if constexpr (is_f64x2) {
1274 return numeric_array{_mm_blendv_pd(a.reg(), b.reg(), mask.reg())};
1275 }
else if constexpr (is_f32x4) {
1276 return numeric_array{_mm_blendv_ps(a.reg(), b.reg(), mask.reg())};
1277 }
else if constexpr (is_i64x2 or is_u64x2) {
1279 _mm_blendv_pd(_mm_castsi128_pd(a.reg()), _mm_castsi128_pd(b.reg()), _mm_castsi128_pd(mask.reg())))};
1280 }
else if constexpr (is_i32x4 or is_u32x4) {
1282 _mm_blendv_ps(_mm_castsi128_ps(a.reg()), _mm_castsi128_ps(b.reg()), _mm_castsi128_ps(mask.reg())))};
1283 }
else if constexpr (is_i8x16 or is_u8x16) {
1284 return numeric_array{_mm_blendv_epi8(a.reg(), b.reg(), mask.reg())};
1291 r[i] = mask[i] != T{0} ? b[i] : a[i];
1300 template<std::
size_t Mask>
1303 return blend<Mask>(rhs, -rhs);
1311 [[nodiscard]]
friend constexpr numeric_array abs(numeric_array
const& rhs)
noexcept
1313 if (not std::is_constant_evaluated()) {
1314#if defined(HI_HAS_AVX2)
1315 if constexpr (is_i32x8) {
1316 return numeric_array{_mm256_abs_epi32(rhs.reg())};
1317 }
else if constexpr (is_i16x16) {
1318 return numeric_array{_mm256_abs_epi16(rhs.reg())};
1319 }
else if constexpr (is_i8x32) {
1320 return numeric_array{_mm256_abs_epi8(rhs.reg())};
1323#if defined(HI_HAS_SSSE3)
1324 if constexpr (is_i32x4) {
1325 return numeric_array{_mm_abs_epi32(rhs.reg())};
1326 }
else if constexpr (is_i16x8) {
1327 return numeric_array{_mm_abs_epi16(rhs.reg())};
1328 }
else if constexpr (is_i8x16) {
1329 return numeric_array{_mm_abs_epi8(rhs.reg())};
1332#if defined(HI_HAS_SSE2)
1333 if constexpr (is_f64x2) {
1334 return numeric_array{_mm_castsi128_ps(_mm_srli_epi64(_mm_slli_epi64(_mm_castpd_si128(rhs.reg()), 1), 1))};
1335 }
else if constexpr (is_f32x4) {
1336 return numeric_array{_mm_castsi128_ps(_mm_srli_epi32(_mm_slli_epi32(_mm_castps_si128(rhs.reg()), 1), 1))};
1341 return max(rhs, -rhs);
1344 [[nodiscard]]
friend constexpr numeric_array rcp(numeric_array
const& rhs)
noexcept
1346 if (not std::is_constant_evaluated()) {
1347#if defined(HI_HAS_AVX)
1348 if constexpr (is_f32x8) {
1349 return numeric_array{_mm256_rcp_ps(rhs.reg())};
1352#if defined(HI_HAS_SSE)
1353 if constexpr (is_f32x4) {
1354 return numeric_array{_mm_rcp_ps(rhs.reg())};
1362 [[nodiscard]]
friend constexpr numeric_array
sqrt(numeric_array
const& rhs)
noexcept
1364 if (not std::is_constant_evaluated()) {
1365#if defined(HI_HAS_AVX)
1366 if constexpr (is_f64x4) {
1367 return numeric_array{_mm256_sqrt_pd(rhs.reg())};
1368 }
else if constexpr (is_f32x8) {
1369 return numeric_array{_mm256_sqrt_ps(rhs.reg())};
1372#if defined(HI_HAS_SSE2)
1373 if constexpr (is_f64x2) {
1374 return numeric_array{_mm_sqrt_pd(rhs.reg())};
1377#if defined(HI_HAS_SSE)
1378 if constexpr (is_f32x4) {
1379 return numeric_array{_mm_sqrt_ps(rhs.reg())};
1384 auto r = numeric_array{};
1391 [[nodiscard]]
friend constexpr numeric_array rcp_sqrt(numeric_array
const& rhs)
noexcept
1393 if (not std::is_constant_evaluated()) {
1394#if defined(HI_HAS_AVX)
1395 if constexpr (is_f32x8) {
1396 return numeric_array{_mm256_rsqrt_ps(rhs.reg())};
1399#if defined(HI_HAS_SSE)
1400 if constexpr (is_f32x4) {
1401 return numeric_array{_mm_rsqrt_ps(rhs.reg())};
1406 return rcp(
sqrt(rhs));
1409 [[nodiscard]]
friend constexpr numeric_array
floor(numeric_array
const& rhs)
noexcept
1410 requires(std::is_floating_point_v<value_type>)
1412 if (not std::is_constant_evaluated()) {
1413#if defined(HI_HAS_AVX)
1414 if constexpr (is_f64x4) {
1415 return numeric_array{_mm256_floor_pd(rhs.reg())};
1416 }
else if constexpr (is_f32x8) {
1417 return numeric_array{_mm256_floor_ps(rhs.reg())};
1420#if defined(HI_HAS_SSE4_1)
1421 if constexpr (is_f64x2) {
1422 return numeric_array{_mm_floor_pd(rhs.reg())};
1423 }
else if constexpr (is_f32x4) {
1424 return numeric_array{_mm_floor_ps(rhs.reg())};
1429 auto r = numeric_array{};
1436 [[nodiscard]]
friend constexpr numeric_array
ceil(numeric_array
const& rhs)
noexcept
1437 requires(std::is_floating_point_v<value_type>)
1439 if (not std::is_constant_evaluated()) {
1440#if defined(HI_HAS_AVX)
1441 if constexpr (is_f64x4) {
1442 return numeric_array{_mm256_ceil_pd(rhs.reg())};
1443 }
else if constexpr (is_f32x8) {
1444 return numeric_array{_mm256_ceil_ps(rhs.reg())};
1447#if defined(HI_HAS_SSE4_1)
1448 if constexpr (is_f64x2) {
1449 return numeric_array{_mm_ceil_pd(rhs.reg())};
1450 }
else if constexpr (is_f32x4) {
1451 return numeric_array{_mm_ceil_ps(rhs.reg())};
1456 auto r = numeric_array{};
1463 [[nodiscard]]
friend constexpr numeric_array
round(numeric_array
const& rhs)
noexcept
1464 requires(std::is_floating_point_v<value_type>)
1466 if (not std::is_constant_evaluated()) {
1467#if defined(HI_HAS_AVX)
1468 if constexpr (is_f64x4) {
1469 return numeric_array{_mm256_round_pd(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1470 }
else if constexpr (is_f32x8) {
1471 return numeric_array{_mm256_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1474#if defined(HI_HAS_SSE4_1)
1475 if constexpr (is_f64x2) {
1476 return numeric_array{_mm_round_pd(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1477 }
else if constexpr (is_f32x4) {
1478 return numeric_array{_mm_round_ps(rhs.reg(), _MM_FROUND_CUR_DIRECTION)};
1483 auto r = numeric_array{};
1497 template<std::
size_t Mask>
1500 if (not std::is_constant_evaluated()) {
1501#if defined(HI_HAS_SSE4_1)
1502 if constexpr (is_f64x2) {
1503 return std::bit_cast<double>(_mm_extract_epi64(_mm_dp_pd(lhs.reg(), rhs.reg(), (Mask << 4) | 0xf), 0));
1504 }
else if constexpr (is_f32x4) {
1505 return std::bit_cast<float>(_mm_extract_ps(_mm_dp_ps(lhs.reg(), rhs.reg(), (Mask << 4) | 0xf), 0));
1512 if (to_bool(Mask & (1_uz << i))) {
1513 r += lhs.v[i] * rhs.v[i];
1525 template<std::
size_t Mask>
1537 template<std::
size_t Mask>
1540 return dot<Mask>(rhs, rhs);
1548 template<std::
size_t Mask>
1551 if (not std::is_constant_evaluated()) {
1552#if defined(HI_HAS_SSE4_1)
1553 if constexpr (is_f32x4) {
1554 return std::bit_cast<float>(_mm_extract_ps(_mm_rsqrt_ps(_mm_dp_ps(rhs.reg(), rhs.reg(), (Mask << 4) | 0xf)), 0));
1559 return 1.0f / hypot<Mask>(rhs);
1569 template<std::
size_t Mask>
1572 hi_axiom(rhs.is_vector());
1574 if (not std::is_constant_evaluated()) {
1575#if defined(HI_HAS_SSE4_1)
1576 if constexpr (is_f32x4) {
1577 hilet rhs_ = rhs.reg();
1578 hilet tmp = _mm_mul_ps(_mm_rsqrt_ps(_mm_dp_ps(rhs_, rhs_, (Mask << 4) | 0xf)), rhs_);
1584 hilet rcp_hypot_ = rcp_hypot<Mask>(rhs);
1588 if (to_bool(Mask & (1_uz << i))) {
1589 r.v[i] = rhs.v[i] * rcp_hypot_;
1598 if (not std::is_constant_evaluated()) {
1599#if defined(HI_HAS_AVX2)
1600 if constexpr (is_i64x4 or is_u64x4) {
1602 _mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpeq_epi64(lhs.reg(), rhs.reg()))));
1603 }
else if constexpr (is_i32x8 or is_u32x8) {
1605 _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpeq_epi32(lhs.reg(), rhs.reg()))));
1606 }
else if constexpr (is_i8x32 or is_u8x32) {
1607 return static_cast<std::size_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(lhs.reg(), rhs.reg())));
1610#if defined(HI_HAS_AVX)
1611 if constexpr (is_f64x4) {
1612 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_EQ_OQ)));
1613 }
else if constexpr (is_f32x8) {
1614 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_EQ_OQ)));
1617#if defined(HI_HAS_SSE4_1)
1618 if constexpr (is_i64x2 or is_u64x2) {
1619 return static_cast<std::size_t>(_mm_movemask_pd(_mm_castsi128_pd(_mm_cmpeq_epi64(lhs.reg(), rhs.reg()))));
1622#if defined(HI_HAS_SSE2)
1623 if constexpr (is_f64x2) {
1624 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpeq_pd(lhs.reg(), rhs.reg())));
1625 }
else if constexpr (is_i32x4 or is_u32x4) {
1626 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs.reg(), rhs.reg()))));
1627 }
else if constexpr (is_i8x16 or is_u8x16) {
1628 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(lhs.reg(), rhs.reg())));
1631#if defined(HI_HAS_SSE)
1632 if constexpr (is_f32x4) {
1633 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpeq_ps(lhs.reg(), rhs.reg())));
1640 r |=
static_cast<std::size_t>(lhs.v[i] == rhs.v[i]) << i;
1645 [[nodiscard]]
friend constexpr std::size_t ne(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1648 if (not std::is_constant_evaluated()) {
1649#if defined(HI_HAS_AVX)
1650 if constexpr (is_f64x4) {
1651 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_NEQ_OQ)));
1652 }
else if constexpr (is_f32x8) {
1653 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_NEQ_OQ)));
1656#if defined(HI_HAS_SSE2)
1657 if constexpr (is_f64x2) {
1658 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpneq_pd(lhs.reg(), rhs.reg())));
1661#if defined(HI_HAS_SSE)
1662 if constexpr (is_f32x4) {
1663 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpneq_ps(lhs.reg(), rhs.reg())));
1669 return eq(lhs, rhs) ^ not_mask;
1672 [[nodiscard]]
friend constexpr std::size_t gt(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1675 if (not std::is_constant_evaluated()) {
1676#if defined(HI_HAS_AVX2)
1677 if constexpr (is_i64x4) {
1679 _mm256_movemask_pd(_mm256_castsi256_pd(_mm256_cmpgt_epi64(lhs.reg(), rhs.reg()))));
1680 }
else if constexpr (is_i32x8) {
1682 _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(lhs.reg(), rhs.reg()))));
1683 }
else if constexpr (is_i8x32) {
1684 return static_cast<std::size_t>(_mm256_movemask_epi8(_mm256_cmpgt_epi8(lhs.reg(), rhs.reg())));
1687#if defined(HI_HAS_AVX)
1688 if constexpr (is_f64x4) {
1689 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_GT_OQ)));
1690 }
else if constexpr (is_f32x8) {
1691 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_GT_OQ)));
1694#if defined(HI_HAS_SSE4_1)
1695 if constexpr (is_i64x2) {
1696 return static_cast<std::size_t>(_mm_movemask_pd(_mm_castsi128_pd(_mm_cmpgt_epi64(lhs.reg(), rhs.reg()))));
1699#if defined(HI_HAS_SSE2)
1700 if constexpr (is_f64x2) {
1701 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpgt_pd(lhs.reg(), rhs.reg())));
1702 }
else if constexpr (is_i32x4) {
1703 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(lhs.reg(), rhs.reg()))));
1704 }
else if constexpr (is_i8x16) {
1705 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmpgt_epi8(lhs.reg(), rhs.reg())));
1708#if defined(HI_HAS_SSE)
1709 if constexpr (is_f32x4) {
1710 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpgt_ps(lhs.reg(), rhs.reg())));
1717 r |=
static_cast<std::size_t>(lhs.v[i] > rhs.v[i]) << i;
1722 [[nodiscard]]
friend constexpr std::size_t lt(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1725 if (not std::is_constant_evaluated()) {
1726#if defined(HI_HAS_AVX)
1727 if constexpr (is_f64x4) {
1728 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_LT_OQ)));
1729 }
else if constexpr (is_f32x8) {
1730 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_LT_OQ)));
1733#if defined(HI_HAS_SSE2)
1734 if constexpr (is_f64x2) {
1735 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmplt_pd(lhs.reg(), rhs.reg())));
1736 }
else if constexpr (is_i32x4) {
1737 return static_cast<std::size_t>(_mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(lhs.reg(), rhs.reg()))));
1738 }
else if constexpr (is_i8x16) {
1739 return static_cast<std::size_t>(_mm_movemask_epi8(_mm_cmplt_epi8(lhs.reg(), rhs.reg())));
1742#if defined(HI_HAS_SSE)
1743 if constexpr (is_f32x4) {
1744 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmplt_ps(lhs.reg(), rhs.reg())));
1750 return gt(rhs, lhs);
1753 [[nodiscard]]
friend constexpr std::size_t ge(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1756 if (not std::is_constant_evaluated()) {
1757#if defined(HI_HAS_AVX)
1758 if constexpr (is_f64x4) {
1759 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_GE_OQ)));
1760 }
else if constexpr (is_f32x8) {
1761 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_GE_OQ)));
1764#if defined(HI_HAS_SSE2)
1765 if constexpr (is_f64x2) {
1766 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmpge_pd(lhs.reg(), rhs.reg())));
1769#if defined(HI_HAS_SSE)
1770 if constexpr (is_f32x4) {
1771 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmpge_ps(lhs.reg(), rhs.reg())));
1777 return gt(lhs, rhs) | eq(lhs, rhs);
1780 [[nodiscard]]
friend constexpr std::size_t le(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1783 if (not std::is_constant_evaluated()) {
1784#if defined(HI_HAS_AVX)
1785 if constexpr (is_f64x4) {
1786 return static_cast<std::size_t>(_mm256_movemask_pd(_mm256_cmp_pd(lhs.reg(), rhs.reg(), _CMP_LE_OQ)));
1787 }
else if constexpr (is_f32x8) {
1788 return static_cast<std::size_t>(_mm256_movemask_ps(_mm256_cmp_ps(lhs.reg(), rhs.reg(), _CMP_LE_OQ)));
1791#if defined(HI_HAS_SSE2)
1792 if constexpr (is_f64x2) {
1793 return static_cast<std::size_t>(_mm_movemask_pd(_mm_cmple_pd(lhs.reg(), rhs.reg())));
1796#if defined(HI_HAS_SSE)
1797 if constexpr (is_f32x4) {
1798 return static_cast<std::size_t>(_mm_movemask_ps(_mm_cmple_ps(lhs.reg(), rhs.reg())));
1804 return gt(rhs, lhs) | eq(rhs, lhs);
1807 [[nodiscard]]
friend constexpr numeric_array gt_mask(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1809 if (not std::is_constant_evaluated()) {
1810#if defined(HI_HAS_SSE4_2)
1811 if constexpr (is_i64x2) {
1812 return numeric_array{_mm_cmpgt_epi64(lhs.reg(), rhs.reg())};
1815#if defined(HI_HAS_SSE2)
1816 if constexpr (is_i32x4) {
1817 return numeric_array{_mm_cmpgt_epi32(lhs.reg(), rhs.reg())};
1818 }
else if constexpr (is_i16x8) {
1819 return numeric_array{_mm_cmpgt_epi16(lhs.reg(), rhs.reg())};
1820 }
else if constexpr (is_i8x16) {
1821 return numeric_array{_mm_cmpgt_epi8(lhs.reg(), rhs.reg())};
1824#if defined(HI_HAS_SSE)
1825 if constexpr (is_f32x4) {
1826 return numeric_array{_mm_cmpgt_ps(lhs.reg(), rhs.reg())};
1831 using uint_type = make_uintxx_t<
sizeof(T) * CHAR_BIT>;
1832 constexpr auto ones = std::bit_cast<T>(~uint_type{0});
1834 auto r = numeric_array{};
1836 r[i] = lhs.v[i] > rhs.v[i] ? ones : T{0};
1841 [[nodiscard]]
friend constexpr bool operator==(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
1843 return not ne(lhs, rhs);
1846 [[nodiscard]]
friend constexpr numeric_array operator<<(numeric_array
const& lhs,
unsigned int rhs)
noexcept
1848 if (not std::is_constant_evaluated()) {
1849#if defined(HI_HAS_AVX2)
1850 if constexpr (is_f64x4) {
1851 return numeric_array{_mm256_castsi256_pd(_mm256_slli_epi64(_mm256_castpd_si256(lhs.reg()), rhs))};
1852 }
else if constexpr (is_f32x8) {
1853 return numeric_array{_mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(lhs.reg()), rhs))};
1854 }
else if constexpr (is_i64x4 or is_u64x4) {
1855 return numeric_array{_mm256_slli_epi64(lhs.reg(), rhs)};
1856 }
else if constexpr (is_i32x8 or is_u32x8) {
1857 return numeric_array{_mm256_slli_epi32(lhs.reg(), rhs)};
1858 }
else if constexpr (is_i16x16 or is_u16x16) {
1859 return numeric_array{_mm256_slli_epi16(lhs.reg(), rhs)};
1862#if defined(HI_HAS_SSE2)
1863 if constexpr (is_f64x2) {
1864 return numeric_array{_mm_castsi128_pd(_mm_slli_epi64(_mm_castpd_si128(lhs.reg()), rhs))};
1865 }
else if constexpr (is_f32x4) {
1866 return numeric_array{_mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(lhs.reg()), rhs))};
1867 }
else if constexpr (is_i64x2 or is_u64x2) {
1868 return numeric_array{_mm_slli_epi64(lhs.reg(), rhs)};
1869 }
else if constexpr (is_i32x4 or is_u32x4) {
1870 return numeric_array{_mm_slli_epi32(lhs.reg(), rhs)};
1871 }
else if constexpr (is_i16x8 or is_u16x8) {
1872 return numeric_array{_mm_slli_epi16(lhs.reg(), rhs)};
1877 auto r = numeric_array{};
1879 r.v[i] = lhs.v[i] << rhs;
1884 [[nodiscard]]
friend constexpr numeric_array operator>>(numeric_array
const& lhs,
unsigned int rhs)
noexcept
1886 if (not std::is_constant_evaluated()) {
1887#if defined(HI_HAS_AVX2)
1888 if constexpr (is_f64x4) {
1889 return numeric_array{_mm256_castsi256_pd(_mm256_srli_epi64(_mm256_castpd_si256(lhs.reg()), rhs))};
1890 }
else if constexpr (is_f32x8) {
1891 return numeric_array{_mm256_castsi256_ps(_mm256_srli_epi32(_mm256_castps_si256(lhs.reg()), rhs))};
1892 }
else if constexpr (is_u64x4) {
1893 return numeric_array{_mm256_srli_epi64(lhs.reg(), rhs)};
1894 }
else if constexpr (is_i32x8) {
1895 return numeric_array{_mm256_srai_epi32(lhs.reg(), rhs)};
1896 }
else if constexpr (is_u32x8) {
1897 return numeric_array{_mm256_srli_epi32(lhs.reg(), rhs)};
1898 }
else if constexpr (is_i16x16) {
1899 return numeric_array{_mm256_srai_epi16(lhs.reg(), rhs)};
1900 }
else if constexpr (is_u16x16) {
1901 return numeric_array{_mm256_srli_epi16(lhs.reg(), rhs)};
1904#if defined(HI_HAS_SSE2)
1905 if constexpr (is_f64x2) {
1906 return numeric_array{_mm_castsi128_pd(_mm_srli_epi64(_mm_castpd_si128(lhs.reg()), rhs))};
1907 }
else if constexpr (is_f32x4) {
1908 return numeric_array{_mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(lhs.reg()), rhs))};
1909 }
else if constexpr (is_u64x2) {
1910 return numeric_array{_mm_srli_epi64(lhs.reg(), rhs)};
1911 }
else if constexpr (is_i32x4) {
1912 return numeric_array{_mm_srai_epi32(lhs.reg(), rhs)};
1913 }
else if constexpr (is_u32x4) {
1914 return numeric_array{_mm_srli_epi32(lhs.reg(), rhs)};
1915 }
else if constexpr (is_i16x8) {
1916 return numeric_array{_mm_srai_epi16(lhs.reg(), rhs)};
1917 }
else if constexpr (is_u16x8) {
1918 return numeric_array{_mm_srli_epi16(lhs.reg(), rhs)};
1923 auto r = numeric_array{};
1925 r.v[i] = lhs.v[i] >> rhs;
1936 hi_axiom(rhs > 0 and rhs <
sizeof(value_type) * CHAR_BIT);
1938 hilet remainder = narrow<unsigned int>(
sizeof(value_type) * CHAR_BIT - rhs);
1940 return (lhs << rhs) | (lhs >> remainder);
1949 hi_axiom(rhs > 0 and rhs <
sizeof(value_type) * CHAR_BIT);
1951 hilet remainder = narrow<unsigned int>(
sizeof(value_type) * CHAR_BIT - rhs);
1953 return (lhs >> rhs) | (lhs << remainder);
1958 if (not std::is_constant_evaluated()) {
1959#if defined(HI_HAS_AVX2)
1960 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1961 return numeric_array{_mm256_or_si256(lhs.reg(), rhs.reg())};
1964#if defined(HI_HAS_AVX)
1965 if constexpr (is_f64x4) {
1966 return numeric_array{_mm256_or_pd(lhs.reg(), rhs.reg())};
1967 }
else if constexpr (is_f32x8) {
1968 return numeric_array{_mm256_or_ps(lhs.reg(), rhs.reg())};
1969 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
1970 return numeric_array{
1971 _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
1974#if defined(HI_HAS_SSE2)
1975 if constexpr (is_f64x2) {
1976 return numeric_array{_mm_or_pd(lhs.reg(), rhs.reg())};
1977 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1978 return numeric_array{_mm_or_si128(lhs.reg(), rhs.reg())};
1981#if defined(HI_HAS_SSE)
1982 if constexpr (is_f64x2) {
1983 return numeric_array{_mm_castps_pd(_mm_or_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
1985 }
else if constexpr (is_f32x4) {
1986 return numeric_array{_mm_or_ps(lhs.reg(), rhs.reg())};
1988 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
1989 return numeric_array{_mm_castps_si128(_mm_or_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
1994 using uint_type = make_uintxx_t<
sizeof(T) * CHAR_BIT>;
1996 auto r = numeric_array{};
1999 std::bit_cast<T>(
static_cast<uint_type
>(std::bit_cast<uint_type>(lhs.v[i]) | std::bit_cast<uint_type>(rhs.v[i])));
2004 [[nodiscard]]
friend constexpr numeric_array
operator|(numeric_array
const& lhs, T
const& rhs)
noexcept
2006 return lhs | broadcast(rhs);
2009 [[nodiscard]]
friend constexpr numeric_array
operator|(T
const& lhs, numeric_array
const& rhs)
noexcept
2011 return broadcast(lhs) | rhs;
2014 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2016 if (not std::is_constant_evaluated()) {
2017#if defined(HI_HAS_AVX2)
2018 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2019 return numeric_array{_mm256_and_si256(lhs.reg(), rhs.reg())};
2022#if defined(HI_HAS_AVX)
2023 if constexpr (is_f64x4) {
2024 return numeric_array{_mm256_and_pd(lhs.reg(), rhs.reg())};
2025 }
else if constexpr (is_f32x8) {
2026 return numeric_array{_mm256_and_ps(lhs.reg(), rhs.reg())};
2027 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2028 return numeric_array{
2029 _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
2032#if defined(HI_HAS_SSE2)
2033 if constexpr (is_f64x2) {
2034 return numeric_array{_mm_and_pd(lhs.reg(), rhs.reg())};
2035 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2036 return numeric_array{_mm_and_si128(lhs.reg(), rhs.reg())};
2039#if defined(HI_HAS_SSE)
2040 if constexpr (is_f64x2) {
2041 return numeric_array{_mm_castps_pd(_mm_and_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
2043 }
else if constexpr (is_f32x4) {
2044 return numeric_array{_mm_and_ps(lhs.reg(), rhs.reg())};
2046 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2047 return numeric_array{_mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
2052 auto r = numeric_array{};
2054 r.v[i] = lhs.v[i] & rhs.v[i];
2059 [[nodiscard]]
friend constexpr numeric_array operator&(numeric_array
const& lhs, T
const& rhs)
noexcept
2061 return lhs & broadcast(rhs);
2064 [[nodiscard]]
friend constexpr numeric_array operator&(T
const& lhs, numeric_array
const& rhs)
noexcept
2066 return broadcast(lhs) & rhs;
2069 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2071 if (not std::is_constant_evaluated()) {
2072#if defined(HI_HAS_AVX2)
2073 if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2074 return numeric_array{_mm256_xor_si256(lhs.reg(), rhs.reg())};
2077#if defined(HI_HAS_AVX)
2078 if constexpr (is_f64x4) {
2079 return numeric_array{_mm256_xor_pd(lhs.reg(), rhs.reg())};
2080 }
else if constexpr (is_f32x8) {
2081 return numeric_array{_mm256_xor_ps(lhs.reg(), rhs.reg())};
2082 }
else if constexpr (is_i64x4 or is_u64x4 or is_i32x8 or is_u32x8 or is_i16x8 or is_u16x8 or is_i8x32 or is_u8x32) {
2083 return numeric_array{
2084 _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(lhs.reg()), _mm256_castsi256_ps(rhs.reg())))};
2087#if defined(HI_HAS_SSE2)
2088 if constexpr (is_f64x2) {
2089 return numeric_array{_mm_xor_pd(lhs.reg(), rhs.reg())};
2090 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2091 return numeric_array{_mm_xor_si128(lhs.reg(), rhs.reg())};
2094#if defined(HI_HAS_SSE)
2095 if constexpr (is_f64x2) {
2096 return numeric_array{_mm_castps_pd(_mm_xor_ps(_mm_castps_ps(lhs.reg()), _mm_castps_ps(rhs.reg())))};
2098 }
else if constexpr (is_f32x4) {
2099 return numeric_array{_mm_xor_ps(lhs.reg(), rhs.reg())};
2101 }
else if constexpr (is_i64x2 or is_u64x2 or is_i32x4 or is_u32x4 or is_i16x8 or is_u16x8 or is_i8x16 or is_i8x16) {
2102 return numeric_array{_mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg())))};
2107 auto r = numeric_array{};
2109 r.v[i] = lhs.v[i] ^ rhs.v[i];
2114 [[nodiscard]]
friend constexpr numeric_array operator^(numeric_array
const& lhs, T
const& rhs)
noexcept
2116 return lhs ^ broadcast(rhs);
2119 [[nodiscard]]
friend constexpr numeric_array operator^(T
const& lhs, numeric_array
const& rhs)
noexcept
2121 return broadcast(lhs) ^ rhs;
2124 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2126 if (not std::is_constant_evaluated()) {
2127#if defined(HI_HAS_AVX2)
2128 if constexpr (is_i64x4 or is_u64x4) {
2129 return numeric_array{_mm256_add_epi64(lhs.reg(), rhs.reg())};
2130 }
else if constexpr (is_i32x8 or is_u32x8) {
2131 return numeric_array{_mm256_add_epi32(lhs.reg(), rhs.reg())};
2132 }
else if constexpr (is_i16x16 or is_u16x16) {
2133 return numeric_array{_mm256_add_epi16(lhs.reg(), rhs.reg())};
2134 }
else if constexpr (is_i8x32 or is_u8x32) {
2135 return numeric_array{_mm256_add_epi8(lhs.reg(), rhs.reg())};
2138#if defined(HI_HAS_AVX)
2139 if constexpr (is_f64x4) {
2140 return numeric_array{_mm256_add_pd(lhs.reg(), rhs.reg())};
2141 }
else if constexpr (is_f32x8) {
2142 return numeric_array{_mm256_add_ps(lhs.reg(), rhs.reg())};
2145#if defined(HI_HAS_SSE2)
2146 if constexpr (is_f64x2) {
2147 return numeric_array{_mm_add_pd(lhs.reg(), rhs.reg())};
2148 }
else if constexpr (is_i64x2 or is_u64x2) {
2149 return numeric_array{_mm_add_epi64(lhs.reg(), rhs.reg())};
2150 }
else if constexpr (is_i32x4 or is_u32x4) {
2151 return numeric_array{_mm_add_epi32(lhs.reg(), rhs.reg())};
2152 }
else if constexpr (is_i16x8 or is_u16x8) {
2153 return numeric_array{_mm_add_epi16(lhs.reg(), rhs.reg())};
2154 }
else if constexpr (is_i8x16 or is_u8x16) {
2155 return numeric_array{_mm_add_epi8(lhs.reg(), rhs.reg())};
2158#if defined(HI_HAS_SSE)
2159 if constexpr (is_f32x4) {
2160 return numeric_array{_mm_add_ps(lhs.reg(), rhs.reg())};
2165 auto r = numeric_array{};
2167 r.v[i] = lhs.v[i] + rhs.v[i];
2172 [[nodiscard]]
friend constexpr numeric_array operator+(numeric_array
const& lhs, T
const& rhs)
noexcept
2174 return lhs + broadcast(rhs);
2177 [[nodiscard]]
friend constexpr numeric_array operator+(T
const& lhs, numeric_array
const& rhs)
noexcept
2179 return broadcast(lhs) + rhs;
2182 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2184 if (not std::is_constant_evaluated()) {
2185#if defined(HI_HAS_AVX2)
2186 if constexpr (is_i64x4 or is_u64x4) {
2187 return numeric_array{_mm256_sub_epi64(lhs.reg(), rhs.reg())};
2188 }
else if constexpr (is_i32x8 or is_u32x8) {
2189 return numeric_array{_mm256_sub_epi32(lhs.reg(), rhs.reg())};
2190 }
else if constexpr (is_i16x16 or is_u16x16) {
2191 return numeric_array{_mm256_sub_epi16(lhs.reg(), rhs.reg())};
2192 }
else if constexpr (is_i8x32 or is_u8x32) {
2193 return numeric_array{_mm256_sub_epi8(lhs.reg(), rhs.reg())};
2196#if defined(HI_HAS_AVX)
2197 if constexpr (is_f64x4) {
2198 return numeric_array{_mm256_sub_pd(lhs.reg(), rhs.reg())};
2199 }
else if constexpr (is_f32x8) {
2200 return numeric_array{_mm256_sub_ps(lhs.reg(), rhs.reg())};
2203#if defined(HI_HAS_SSE2)
2204 if constexpr (is_f64x2) {
2205 return numeric_array{_mm_sub_pd(lhs.reg(), rhs.reg())};
2206 }
else if constexpr (is_i64x2 or is_u64x2) {
2207 return numeric_array{_mm_sub_epi64(lhs.reg(), rhs.reg())};
2208 }
else if constexpr (is_i32x4 or is_u32x4) {
2209 return numeric_array{_mm_sub_epi32(lhs.reg(), rhs.reg())};
2210 }
else if constexpr (is_i16x8 or is_u16x8) {
2211 return numeric_array{_mm_sub_epi16(lhs.reg(), rhs.reg())};
2212 }
else if constexpr (is_i8x16 or is_u8x16) {
2213 return numeric_array{_mm_sub_epi8(lhs.reg(), rhs.reg())};
2216#if defined(HI_HAS_SSE)
2217 if constexpr (is_f32x4) {
2218 return numeric_array{_mm_sub_ps(lhs.reg(), rhs.reg())};
2223 auto r = numeric_array{};
2225 r.v[i] = lhs.v[i] - rhs.v[i];
2230 [[nodiscard]]
friend constexpr numeric_array operator-(numeric_array
const& lhs, T
const& rhs)
noexcept
2232 return lhs - broadcast(rhs);
2235 [[nodiscard]]
friend constexpr numeric_array operator-(T
const& lhs, numeric_array
const& rhs)
noexcept
2237 return broadcast(lhs) - rhs;
2240 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2242 if (not std::is_constant_evaluated()) {
2243#if defined(HI_HAS_AVX2)
2244 if constexpr (is_i32x8) {
2245 return numeric_array{_mm256_mul_epi32(lhs.reg(), rhs.reg())};
2246 }
else if constexpr (is_u32x8) {
2247 return numeric_array{_mm256_mul_epu32(lhs.reg(), rhs.reg())};
2250#if defined(HI_HAS_AVX)
2251 if constexpr (is_f64x4) {
2252 return numeric_array{_mm256_mul_pd(lhs.reg(), rhs.reg())};
2253 }
else if constexpr (is_f32x8) {
2254 return numeric_array{_mm256_mul_ps(lhs.reg(), rhs.reg())};
2257#if defined(HI_HAS_SSE4_1)
2258 if constexpr (is_i32x4) {
2259 return numeric_array{_mm_mul_epi32(lhs.reg(), rhs.reg())};
2260 }
else if constexpr (is_f16x4) {
2261 return numeric_array{numeric_array<float, 4>{lhs} * numeric_array<float, 4>{rhs}};
2264#if defined(HI_HAS_SSE2)
2265 if constexpr (is_f64x2) {
2266 return numeric_array{_mm_mul_pd(lhs.reg(), rhs.reg())};
2269#if defined(HI_HAS_SSE)
2270 if constexpr (is_f32x4) {
2271 return numeric_array{_mm_mul_ps(lhs.reg(), rhs.reg())};
2276 auto r = numeric_array{};
2278 r.v[i] = lhs.v[i] * rhs.v[i];
2283 [[nodiscard]]
friend constexpr numeric_array operator*(numeric_array
const& lhs, T
const& rhs)
noexcept
2285 return lhs * broadcast(rhs);
2288 [[nodiscard]]
friend constexpr numeric_array operator*(T
const& lhs, numeric_array
const& rhs)
noexcept
2290 return broadcast(lhs) * rhs;
2293 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2295 if (not std::is_constant_evaluated()) {
2296#if defined(HI_HAS_AVX)
2297 if constexpr (is_f64x4) {
2298 return numeric_array{_mm256_div_pd(lhs.reg(), rhs.reg())};
2299 }
else if constexpr (is_f32x8) {
2300 return numeric_array{_mm256_div_ps(lhs.reg(), rhs.reg())};
2303#if defined(HI_HAS_SSE2)
2304 if constexpr (is_f64x2) {
2305 return numeric_array{_mm_div_pd(lhs.reg(), rhs.reg())};
2308#if defined(HI_HAS_SSE)
2309 if constexpr (is_f32x4) {
2310 return numeric_array{_mm_div_ps(lhs.reg(), rhs.reg())};
2315 auto r = numeric_array{};
2317 r.v[i] = lhs.v[i] / rhs.v[i];
2322 [[nodiscard]]
friend constexpr numeric_array operator/(numeric_array
const& lhs, T
const& rhs)
noexcept
2324 return lhs / broadcast(rhs);
2327 [[nodiscard]]
friend constexpr numeric_array operator/(T
const& lhs, numeric_array
const& rhs)
noexcept
2329 return broadcast(lhs) / rhs;
2332 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2335 return lhs - (div_result * rhs);
2338 [[nodiscard]]
friend constexpr numeric_array operator%(numeric_array
const& lhs, T
const& rhs)
noexcept
2340 return lhs % broadcast(rhs);
2343 [[nodiscard]]
friend constexpr numeric_array operator%(T
const& lhs, numeric_array
const& rhs)
noexcept
2345 return broadcast(lhs) % rhs;
2348 [[nodiscard]]
friend constexpr numeric_array
min(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2350 if (not std::is_constant_evaluated()) {
2351#if defined(HI_HAS_AVX2)
2352 if constexpr (is_i32x8) {
2353 return numeric_array{_mm256_min_epi32(lhs.reg(), rhs.reg())};
2354 }
else if constexpr (is_u32x8) {
2355 return numeric_array{_mm256_min_epu32(lhs.reg(), rhs.reg())};
2356 }
else if constexpr (is_i16x16) {
2357 return numeric_array{_mm256_min_epi16(lhs.reg(), rhs.reg())};
2358 }
else if constexpr (is_u16x16) {
2359 return numeric_array{_mm256_min_epu16(lhs.reg(), rhs.reg())};
2360 }
else if constexpr (is_i8x32) {
2361 return numeric_array{_mm256_min_epi8(lhs.reg(), rhs.reg())};
2362 }
else if constexpr (is_u8x32) {
2363 return numeric_array{_mm256_min_epu8(lhs.reg(), rhs.reg())};
2366#if defined(HI_HAS_AVX)
2367 if constexpr (is_f64x4) {
2368 return numeric_array{_mm256_min_pd(lhs.reg(), rhs.reg())};
2369 }
else if constexpr (is_f32x8) {
2370 return numeric_array{_mm256_min_ps(lhs.reg(), rhs.reg())};
2373#if defined(HI_HAS_SSE4_1)
2374 if constexpr (is_i32x4) {
2375 return numeric_array{_mm_min_epi32(lhs.reg(), rhs.reg())};
2376 }
else if constexpr (is_u32x4) {
2377 return numeric_array{_mm_min_epu32(lhs.reg(), rhs.reg())};
2378 }
else if constexpr (is_u16x8) {
2379 return numeric_array{_mm_min_epu16(lhs.reg(), rhs.reg())};
2380 }
else if constexpr (is_i8x16) {
2381 return numeric_array{_mm_min_epi8(lhs.reg(), rhs.reg())};
2384#if defined(HI_HAS_SSE2)
2385 if constexpr (is_f64x2) {
2386 return numeric_array{_mm_min_pd(lhs.reg(), rhs.reg())};
2387 }
else if constexpr (is_i16x8) {
2388 return numeric_array{_mm_min_epi16(lhs.reg(), rhs.reg())};
2389 }
else if constexpr (is_u8x16) {
2390 return numeric_array{_mm_min_epu8(lhs.reg(), rhs.reg())};
2393#if defined(HI_HAS_SSE)
2394 if constexpr (is_f32x4) {
2395 return numeric_array{_mm_min_ps(lhs.reg(), rhs.reg())};
2400 auto r = numeric_array{};
2402 r.v[i] =
std::min(lhs.v[i], rhs.v[i]);
2407 [[nodiscard]]
friend constexpr numeric_array
max(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2409 if (not std::is_constant_evaluated()) {
2410#if defined(HI_HAS_AVX2)
2411 if constexpr (is_i32x8) {
2412 return numeric_array{_mm256_max_epi32(lhs.reg(), rhs.reg())};
2413 }
else if constexpr (is_u32x8) {
2414 return numeric_array{_mm256_max_epu32(lhs.reg(), rhs.reg())};
2415 }
else if constexpr (is_i16x16) {
2416 return numeric_array{_mm256_max_epi16(lhs.reg(), rhs.reg())};
2417 }
else if constexpr (is_u16x16) {
2418 return numeric_array{_mm256_max_epu16(lhs.reg(), rhs.reg())};
2419 }
else if constexpr (is_i8x32) {
2420 return numeric_array{_mm256_max_epi8(lhs.reg(), rhs.reg())};
2421 }
else if constexpr (is_u8x32) {
2422 return numeric_array{_mm256_max_epu8(lhs.reg(), rhs.reg())};
2425#if defined(HI_HAS_AVX)
2426 if constexpr (is_f64x4) {
2427 return numeric_array{_mm256_max_pd(lhs.reg(), rhs.reg())};
2428 }
else if constexpr (is_f32x8) {
2429 return numeric_array{_mm256_max_ps(lhs.reg(), rhs.reg())};
2432#if defined(HI_HAS_SSE4_1)
2433 if constexpr (is_i32x4) {
2434 return numeric_array{_mm_max_epi32(lhs.reg(), rhs.reg())};
2435 }
else if constexpr (is_u32x4) {
2436 return numeric_array{_mm_max_epu32(lhs.reg(), rhs.reg())};
2437 }
else if constexpr (is_u16x8) {
2438 return numeric_array{_mm_max_epu16(lhs.reg(), rhs.reg())};
2439 }
else if constexpr (is_i8x16) {
2440 return numeric_array{_mm_max_epi8(lhs.reg(), rhs.reg())};
2443#if defined(HI_HAS_SSE2)
2444 if constexpr (is_f64x2) {
2445 return numeric_array{_mm_max_pd(lhs.reg(), rhs.reg())};
2446 }
else if constexpr (is_i16x8) {
2447 return numeric_array{_mm_max_epi16(lhs.reg(), rhs.reg())};
2448 }
else if constexpr (is_u8x16) {
2449 return numeric_array{_mm_max_epu8(lhs.reg(), rhs.reg())};
2452#if defined(HI_HAS_SSE)
2453 if constexpr (is_f32x4) {
2454 return numeric_array{_mm_max_ps(lhs.reg(), rhs.reg())};
2459 auto r = numeric_array{};
2461 r.v[i] =
std::max(lhs.v[i], rhs.v[i]);
2466 [[nodiscard]]
friend constexpr numeric_array
2467 clamp(numeric_array
const& lhs, numeric_array
const& low, numeric_array
const& high)
noexcept
2469 return min(
max(lhs, low), high);
2472 [[nodiscard]]
friend constexpr numeric_array hadd(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2474 if (not std::is_constant_evaluated()) {
2475#if defined(HI_HAS_AVX2)
2476 if constexpr (is_i32x8 or is_u32x8) {
2477 return numeric_array{_mm256_hadd_epi32(lhs.reg(), rhs.reg())};
2478 }
else if constexpr (is_i16x16 or is_u16x16) {
2479 return numeric_array{_mm256_hadd_epi16(lhs.reg(), rhs.reg())};
2482#if defined(HI_HAS_AVX)
2483 if constexpr (is_f64x4) {
2484 return numeric_array{_mm256_hadd_pd(lhs.reg(), rhs.reg())};
2485 }
else if constexpr (is_f32x8) {
2486 return numeric_array{_mm256_hadd_ps(lhs.reg(), rhs.reg())};
2489#if defined(HI_HAS_SSSE3)
2490 if constexpr (is_i32x4 or is_u32x4) {
2491 return numeric_array{_mm_hadd_epi32(lhs.reg(), rhs.reg())};
2492 }
else if constexpr (is_i16x8 or is_u16x8) {
2493 return numeric_array{_mm_hadd_epi16(lhs.reg(), rhs.reg())};
2496#if defined(HI_HAS_SSE3)
2497 if constexpr (is_f64x2) {
2498 return numeric_array{_mm_hadd_pd(lhs.reg(), rhs.reg())};
2499 }
else if constexpr (is_f32x4) {
2500 return numeric_array{_mm_hadd_ps(lhs.reg(), rhs.reg())};
2505 hi_axiom(N % 2 == 0);
2507 auto r = numeric_array{};
2511 while (src_i != N) {
2512 auto tmp = lhs[src_i++];
2513 tmp += lhs[src_i++];
2518 while (src_i != N) {
2519 auto tmp = rhs[src_i++];
2520 tmp += rhs[src_i++];
2526 [[nodiscard]]
friend constexpr numeric_array hsub(numeric_array
const& lhs, numeric_array
const& rhs)
noexcept
2528 if (not std::is_constant_evaluated()) {
2529#if defined(HI_HAS_AVX2)
2530 if constexpr (is_i32x8 or is_u32x8) {
2531 return numeric_array{_mm256_hsub_epi32(lhs.reg(), rhs.reg())};
2532 }
else if constexpr (is_i16x16 or is_u16x16) {
2533 return numeric_array{_mm256_hsub_epi16(lhs.reg(), rhs.reg())};
2536#if defined(HI_HAS_AVX)
2537 if constexpr (is_f64x4) {
2538 return numeric_array{_mm256_hsub_pd(lhs.reg(), rhs.reg())};
2539 }
else if constexpr (is_f32x8) {
2540 return numeric_array{_mm256_hsub_ps(lhs.reg(), rhs.reg())};
2543#if defined(HI_HAS_SSSE3)
2544 if constexpr (is_i32x4 or is_u32x4) {
2545 return numeric_array{_mm_hsub_epi32(lhs.reg(), rhs.reg())};
2546 }
else if constexpr (is_i16x8 or is_u16x8) {
2547 return numeric_array{_mm_hsub_epi16(lhs.reg(), rhs.reg())};
2550#if defined(HI_HAS_SSE3)
2551 if constexpr (is_f64x2) {
2552 return numeric_array{_mm_hsub_pd(lhs.reg(), rhs.reg())};
2553 }
else if constexpr (is_f32x4) {
2554 return numeric_array{_mm_hsub_ps(lhs.reg(), rhs.reg())};
2559 hi_axiom(N % 2 == 0);
2561 auto r = numeric_array{};
2565 while (src_i != N) {
2566 auto tmp = lhs[src_i++];
2567 tmp -= lhs[src_i++];
2572 while (src_i != N) {
2573 auto tmp = rhs[src_i++];
2574 tmp -= rhs[src_i++];
2584 template<std::
size_t Mask>
2588 return lhs + neg<Mask ^ not_mask>(rhs);
2595 hi_axiom(rhs.z() == 0.0f && rhs.is_vector());
2603 return normalize<0b0011>(cross_2D(rhs));
2611 hilet tmp1 = rhs.yxwz();
2612 hilet tmp2 = lhs * tmp1;
2613 hilet tmp3 = hsub(tmp2, tmp2);
2614 return get<0>(tmp3);
2624 hilet a_left = lhs.yzxw();
2625 hilet b_left = rhs.zxyw();
2626 hilet left = a_left * b_left;
2628 hilet a_right = lhs.zxyw();
2629 hilet b_right = rhs.yzxw();
2630 hilet right = a_right * b_right;
2631 return left - right;
2634 [[nodiscard]]
static constexpr numeric_array byte_srl_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
2636 static_assert(std::endian::native == std::endian::little);
2638 auto r = numeric_array{};
2639 for (
auto i = 0; i != 16; ++i) {
2640 if ((i + rhs) < 16) {
2641 r[i] = narrow_cast<int8_t>(i + rhs);
2650 [[nodiscard]]
static constexpr numeric_array byte_sll_shuffle_indices(
unsigned int rhs)
requires(is_i8x16)
2652 static_assert(std::endian::native == std::endian::little);
2654 auto r = numeric_array{};
2655 for (
auto i = 0; i != 16; ++i) {
2656 if ((i - rhs) >= 0) {
2657 r[i] = narrow_cast<int8_t>(i - rhs);
2669 requires(std::is_integral_v<value_type>)
2671 if (!std::is_constant_evaluated()) {
2672#if defined(HI_HAS_SSSE3)
2673 if constexpr (is_i8x16 or is_u8x16) {
2674 return numeric_array{_mm_shuffle_epi8(lhs.reg(), rhs.reg())};
2682 r[i] = lhs[rhs[i] & 0xf];
2695 hi_axiom(p1.is_point());
2696 hi_axiom(p2.is_point());
2697 return (p1 + p2) * 0.5f;
2704 hi_axiom(p.is_point());
2705 hi_axiom(anchor.is_point());
2706 return anchor - (p - anchor);
2712 hi_warning_ignore_msvc(26494);
2713 template<
typename... Columns>
2716 static_assert(
sizeof...(Columns) == N,
"Can only transpose square matrices");
2718 if (not std::is_constant_evaluated()) {
2719#if defined(HI_HAS_SSE)
2720 if constexpr (is_f32x4 and
sizeof...(Columns) == 4) {
2722 _MM_TRANSPOSE4_PS(std::get<0>(tmp), std::get<1>(tmp), std::get<2>(tmp), std::get<3>(tmp));
2724 numeric_array{get<0>(tmp)},
2725 numeric_array{get<1>(tmp)},
2726 numeric_array{get<2>(tmp)},
2727 numeric_array{get<3>(tmp)}};
2733 auto f = [&r, &columns... ]<
std::size_t... Ints>(std::index_sequence<Ints...>)
2735 auto tf = [&r](
auto i,
auto v) {
2741 static_cast<void>((tf(Ints, columns) + ...));
2743 f(std::make_index_sequence<
sizeof...(columns)>{});
2748 [[nodiscard]]
constexpr friend numeric_array
composit(numeric_array
const& under, numeric_array
const& over)
noexcept
2749 requires(N == 4 && std::is_floating_point_v<T>)
2751 if (over.is_transparent()) {
2754 if (over.is_opaque()) {
2758 hilet over_alpha = over.wwww();
2759 hilet under_alpha = under.wwww();
2761 hilet over_color = over.xyz1();
2762 hilet under_color = under.xyz1();
2764 hilet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
2766 return output_color / output_color.www1();
2769 [[nodiscard]]
constexpr friend numeric_array
composit(numeric_array
const& under, numeric_array
const& over)
noexcept
2772 return numeric_array{
composit(
static_cast<numeric_array<float, 4>
>(under),
static_cast<numeric_array<float, 4>
>(over))};
2784 r += std::format(
"{}", rhs[i]);
2799 template<std::
size_t FromElement, std::
size_t ToElement>
2804 if (!std::is_constant_evaluated()) {
2805#if defined(HI_HAS_SSE4_1)
2806 if constexpr (is_f32x4) {
2807 constexpr uint8_t insert_mask =
static_cast<uint8_t
>((FromElement << 6) | (ToElement << 4));
2808 return numeric_array{_mm_insert_ps(lhs.reg(), rhs.reg(), insert_mask)};
2810 }
else if constexpr (is_i32x4 or is_u32x4) {
2811 constexpr uint8_t insert_mask =
static_cast<uint8_t
>((FromElement << 6) | (ToElement << 4));
2813 _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(lhs.reg()), _mm_castsi128_ps(rhs.reg()), insert_mask))};
2816#if defined(HI_HAS_SSE2)
2817 if constexpr (is_f64x2) {
2818 if constexpr (FromElement == 0 and ToElement == 0) {
2819 return numeric_array{_mm_shuffle_pd(rhs.reg(), lhs.reg(), 0b10)};
2820 }
else if constexpr (FromElement == 1 and ToElement == 0) {
2821 return numeric_array{_mm_shuffle_pd(rhs.reg(), lhs.reg(), 0b11)};
2822 }
else if constexpr (FromElement == 0 and ToElement == 1) {
2823 return numeric_array{_mm_shuffle_pd(lhs.reg(), rhs.reg(), 0b00)};
2825 return numeric_array{_mm_shuffle_pd(lhs.reg(), rhs.reg(), 0b10)};
2828 }
else if constexpr (is_i64x2 or is_u64x2) {
2829 hilet lhs_ = _mm_castsi128_pd(lhs.reg());
2830 hilet rhs_ = _mm_castsi128_pd(rhs.reg());
2832 if constexpr (FromElement == 0 and ToElement == 0) {
2833 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(rhs_, lhs_, 0b10))};
2834 }
else if constexpr (FromElement == 1 and ToElement == 0) {
2835 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(rhs_, lhs_, 0b11))};
2836 }
else if constexpr (FromElement == 0 and ToElement == 1) {
2837 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(lhs_, rhs_, 0b00))};
2839 return numeric_array{_mm_castpd_si128(_mm_shuffle_pd(lhs_, rhs_, 0b10))};
2846 r[i] = (i == ToElement) ? rhs[FromElement] : lhs[i];
2859 template<ssize_t... Elements>
2862 static_assert(
sizeof...(Elements) <= N);
2864 if (!std::is_constant_evaluated()) {
2865#if defined(HI_HAS_AVX)
2866 if constexpr (is_f64x2) {
2868 }
else if constexpr (is_f32x4) {
2870 }
else if constexpr (is_i64x2 or is_u64x2) {
2871 return numeric_array{_mm_swizzle_epi64<Elements...>(reg())};
2872 }
else if constexpr (is_i32x4 or is_u32x4) {
2873 return numeric_array{_mm_swizzle_epi32<Elements...>(reg())};
2879 swizzle_detail<0, Elements...>(r);
2883#define SWIZZLE(swizzle_name, D, ...) \
2884 [[nodiscard]] constexpr numeric_array swizzle_name() const noexcept requires(D == N) \
2886 return swizzle<__VA_ARGS__>(); \
2889#define SWIZZLE_4D_GEN1(name, ...) \
2890 SWIZZLE(name##0, 4, __VA_ARGS__, get_zero) \
2891 SWIZZLE(name##1, 4, __VA_ARGS__, get_one) \
2892 SWIZZLE(name##x, 4, __VA_ARGS__, 0) \
2893 SWIZZLE(name##y, 4, __VA_ARGS__, 1) \
2894 SWIZZLE(name##z, 4, __VA_ARGS__, 2) \
2895 SWIZZLE(name##w, 4, __VA_ARGS__, 3)
2897#define SWIZZLE_4D_GEN2(name, ...) \
2898 SWIZZLE_4D_GEN1(name##0, __VA_ARGS__, get_zero) \
2899 SWIZZLE_4D_GEN1(name##1, __VA_ARGS__, get_one) \
2900 SWIZZLE_4D_GEN1(name##x, __VA_ARGS__, 0) \
2901 SWIZZLE_4D_GEN1(name##y, __VA_ARGS__, 1) \
2902 SWIZZLE_4D_GEN1(name##z, __VA_ARGS__, 2) \
2903 SWIZZLE_4D_GEN1(name##w, __VA_ARGS__, 3)
2905#define SWIZZLE_4D_GEN3(name, ...) \
2906 SWIZZLE_4D_GEN2(name##0, __VA_ARGS__, get_zero) \
2907 SWIZZLE_4D_GEN2(name##1, __VA_ARGS__, get_one) \
2908 SWIZZLE_4D_GEN2(name##x, __VA_ARGS__, 0) \
2909 SWIZZLE_4D_GEN2(name##y, __VA_ARGS__, 1) \
2910 SWIZZLE_4D_GEN2(name##z, __VA_ARGS__, 2) \
2911 SWIZZLE_4D_GEN2(name##w, __VA_ARGS__, 3)
2913 SWIZZLE_4D_GEN3(_0, get_zero)
2914 SWIZZLE_4D_GEN3(_1, get_one)
2915 SWIZZLE_4D_GEN3(x, 0)
2916 SWIZZLE_4D_GEN3(y, 1)
2917 SWIZZLE_4D_GEN3(z, 2)
2918 SWIZZLE_4D_GEN3(w, 3)
2920#define SWIZZLE_3D_GEN1(name, ...) \
2921 SWIZZLE(name##0, 3, __VA_ARGS__, get_zero) \
2922 SWIZZLE(name##1, 3, __VA_ARGS__, get_one) \
2923 SWIZZLE(name##x, 3, __VA_ARGS__, 0) \
2924 SWIZZLE(name##y, 3, __VA_ARGS__, 1) \
2925 SWIZZLE(name##z, 3, __VA_ARGS__, 2)
2927#define SWIZZLE_3D_GEN2(name, ...) \
2928 SWIZZLE_3D_GEN1(name##0, __VA_ARGS__, get_zero) \
2929 SWIZZLE_3D_GEN1(name##1, __VA_ARGS__, get_one) \
2930 SWIZZLE_3D_GEN1(name##x, __VA_ARGS__, 0) \
2931 SWIZZLE_3D_GEN1(name##y, __VA_ARGS__, 1) \
2932 SWIZZLE_3D_GEN1(name##z, __VA_ARGS__, 2)
2934 SWIZZLE_3D_GEN2(_0, get_zero)
2935 SWIZZLE_3D_GEN2(_1, get_one)
2936 SWIZZLE_3D_GEN2(x, 0)
2937 SWIZZLE_3D_GEN2(y, 1)
2938 SWIZZLE_3D_GEN2(z, 2)
2940#define SWIZZLE_2D_GEN1(name, ...) \
2941 SWIZZLE(name##0, 2, __VA_ARGS__, get_zero) \
2942 SWIZZLE(name##1, 2, __VA_ARGS__, get_one) \
2943 SWIZZLE(name##x, 2, __VA_ARGS__, 0) \
2944 SWIZZLE(name##y, 2, __VA_ARGS__, 1)
2946 SWIZZLE_2D_GEN1(_0, get_zero)
2947 SWIZZLE_2D_GEN1(_1, get_one)
2948 SWIZZLE_2D_GEN1(x, 0)
2949 SWIZZLE_2D_GEN1(y, 1)
2952#undef SWIZZLE_4D_GEN1
2953#undef SWIZZLE_4D_GEN2
2954#undef SWIZZLE_4D_GEN3
2955#undef SWIZZLE_3D_GEN1
2956#undef SWIZZLE_3D_GEN2
2957#undef SWIZZLE_2D_GEN1
2959 template<ssize_t I, ssize_t FirstElement, ssize_t... RestElements>
2960 constexpr void swizzle_detail(
numeric_array& r)
const noexcept
2962 static_assert(I < narrow_cast<ssize_t>(N));
2963 static_assert(FirstElement >= -2 && FirstElement < narrow_cast<ssize_t>(N),
"Index out of bounds");
2965 get<I>(r) = get<FirstElement>(*
this);
2966 if constexpr (
sizeof...(RestElements) != 0) {
2967 swizzle_detail<I + 1, RestElements...>(r);