32 static_assert(std::has_single_bit(N));
40 std::conditional_t<
sizeof(T) * CHAR_BIT == 8, uint8_t,
41 std::conditional_t<
sizeof(T) * CHAR_BIT == 16, uint16_t,
42 std::conditional_t<
sizeof(T) * CHAR_BIT == 32, uint32_t,
43 std::conditional_t<
sizeof(T) * CHAR_BIT == 64, uint64_t,
void>>>>;
45 using signed_mask_type = std::make_signed_t<mask_type>;
47 [[nodiscard]] hi_force_inline
constexpr static mask_type to_mask(value_type a)
noexcept
49 return std::bit_cast<mask_type>(a);
52 [[nodiscard]] hi_force_inline
constexpr static signed_mask_type to_signed_mask(value_type a)
noexcept
54 return std::bit_cast<signed_mask_type>(a);
57 template<std::
unsigned_
integral M>
58 [[nodiscard]] hi_force_inline
constexpr static value_type to_value(M a)
noexcept
60 return std::bit_cast<value_type>(
static_cast<mask_type
>(a));
63 template<std::
signed_
integral M>
64 [[nodiscard]] hi_force_inline
constexpr static value_type to_value(M a)
noexcept
66 return std::bit_cast<value_type>(
static_cast<signed_mask_type
>(a));
69 constexpr static value_type _zero_mask = to_value(mask_type{0});
70 constexpr static value_type _ones_mask = to_value(~mask_type{0});
72 [[nodiscard]] hi_force_inline
static array_type undefined()
noexcept
74 if (not std::is_constant_evaluated()) {
75 if constexpr (
requires { intrinsic_type::undefined(); }) {
76 return intrinsic_type::undefined();
83 template<std::same_as<value_type>... Args>
84 [[nodiscard]] hi_force_inline
constexpr static array_type set(Args... args)
noexcept
85 requires(
sizeof...(Args) == N)
87 if (not std::is_constant_evaluated()) {
88 if constexpr (
requires { intrinsic_type::set(args...); }) {
89 return intrinsic_type::set(args...);
92 return array_type{args...};
95 [[nodiscard]] hi_force_inline
constexpr static array_type set(value_type arg)
noexcept
97 if (not std::is_constant_evaluated()) {
98 if constexpr (
requires { intrinsic_type::set(arg); }) {
99 return intrinsic_type::set(arg);
102 auto r = array_type{};
103 std::get<0>(r) = arg;
107 [[nodiscard]] hi_force_inline
constexpr static array_type set_zero()
noexcept
109 if (not std::is_constant_evaluated()) {
110 if constexpr (
requires { intrinsic_type::set_zero(); }) {
111 return intrinsic_type::set_zero();
114 auto r = array_type{};
121 [[nodiscard]] hi_force_inline
constexpr static array_type set_all_ones()
noexcept
123 if (not std::is_constant_evaluated()) {
124 if constexpr (
requires { intrinsic_type::set_all_ones(); }) {
125 return intrinsic_type::set_all_ones();
128 auto r = array_type{};
135 [[nodiscard]] hi_force_inline
constexpr static array_type set_one()
noexcept
137 if (not std::is_constant_evaluated()) {
138 if constexpr (
requires { intrinsic_type::set_one(); }) {
139 return intrinsic_type::set_one();
142 auto r = array_type{};
144 r[i] = value_type{1};
149 [[nodiscard]] hi_force_inline
constexpr static array_type broadcast(value_type a)
noexcept
151 if (not std::is_constant_evaluated()) {
152 if constexpr (
requires { intrinsic_type::broadcast(a); }) {
153 return intrinsic_type::broadcast(a);
156 auto r = array_type{};
163 [[nodiscard]] hi_force_inline
constexpr static array_type broadcast(array_type a)
noexcept
165 if (not std::is_constant_evaluated()) {
166 if constexpr (
requires { intrinsic_type::broadcast(a); }) {
167 return intrinsic_type::broadcast(a);
170 auto r = array_type{};
172 r[i] = std::get<0>(a);
178 [[nodiscard]] hi_force_inline
constexpr static value_type get(array_type a)
noexcept
180 if (not std::is_constant_evaluated()) {
181 if constexpr (
requires { intrinsic_type::template get<I>(a); }) {
182 return intrinsic_type::template get<I>(a);
185 return std::get<I>(a);
194 if (not std::is_constant_evaluated()) {
195 if constexpr (
requires { intrinsic_type::set_mask(mask); }) {
196 return intrinsic_type::set_mask(mask);
201 r[i] = mask & 1 ? _ones_mask : _zero_mask;
213 if (not std::is_constant_evaluated()) {
214 if constexpr (
requires { intrinsic_type::get_mask(a); }) {
215 return intrinsic_type::get_mask(a);
220 auto const tmp = to_signed_mask(a[i]) < 0 ?
size_t{1} :
size_t{0};
226 template<array_
generic_convertible_to<value_type> O>
227 [[nodiscard]] hi_force_inline
constexpr static array_type convert(
std::array<O, N> a)
noexcept
229 if (not std::is_constant_evaluated()) {
230 if constexpr (
requires { intrinsic_type::convert(a); }) {
231 return intrinsic_type::convert(a);
235 auto r = array_type{};
237 r[i] =
static_cast<value_type
>(a[i]);
242 [[nodiscard]] hi_force_inline
constexpr static array_type neg(array_type a)
noexcept
244 if (not std::is_constant_evaluated()) {
245 if constexpr (
requires { intrinsic_type::neg(a); }) {
246 return intrinsic_type::neg(a);
256 template<std::
size_t Mask>
257 [[nodiscard]] hi_force_inline
constexpr static array_type neg_mask(array_type a)
noexcept
259 if (not std::is_constant_evaluated()) {
260 if constexpr (
requires { intrinsic_type::template neg_mask<Mask>(a); }) {
261 return intrinsic_type::template neg_mask<Mask>(a);
265 return blend<Mask>(a, neg(a));
268 [[nodiscard]] hi_force_inline
constexpr static array_type inv(array_type a)
noexcept
270 if (not std::is_constant_evaluated()) {
271 if constexpr (
requires { intrinsic_type::inv(a); }) {
272 return intrinsic_type::inv(a);
277 a[i] = to_value(~to_mask(a[i]));
282 [[nodiscard]] hi_force_inline
constexpr static array_type rcp(array_type a)
noexcept
284 if (not std::is_constant_evaluated()) {
285 if constexpr (
requires { intrinsic_type::rcp(a); }) {
286 return intrinsic_type::rcp(a);
291 a[i] = value_type{1} / a[i];
296 [[nodiscard]] hi_force_inline
static array_type sqrt(array_type a)
noexcept
298 if (not std::is_constant_evaluated()) {
299 if constexpr (
requires { intrinsic_type::sqrt(a); }) {
300 return intrinsic_type::sqrt(a);
310 [[nodiscard]] hi_force_inline
static array_type rsqrt(array_type a)
noexcept
312 if (not std::is_constant_evaluated()) {
313 if constexpr (
requires { intrinsic_type::rsqrt(a); }) {
314 return intrinsic_type::rsqrt(a);
324 [[nodiscard]] hi_force_inline
constexpr static array_type abs(array_type a)
noexcept
326 if (not std::is_constant_evaluated()) {
327 if constexpr (
requires { intrinsic_type::abs(a); }) {
328 return intrinsic_type::abs(a);
333 a[i] = std::abs(a[i]);
338 [[nodiscard]] hi_force_inline
constexpr static array_type
round(array_type a)
noexcept
340 if (not std::is_constant_evaluated()) {
341 if constexpr (
requires { intrinsic_type::round(a); }) {
342 return intrinsic_type::round(a);
352 [[nodiscard]] hi_force_inline
constexpr static array_type floor(array_type a)
noexcept
354 if (not std::is_constant_evaluated()) {
355 if constexpr (
requires { intrinsic_type::floor(a); }) {
356 return intrinsic_type::floor(a);
366 [[nodiscard]] hi_force_inline
constexpr static array_type ceil(array_type a)
noexcept
368 if (not std::is_constant_evaluated()) {
369 if constexpr (
requires { intrinsic_type::ceil(a); }) {
370 return intrinsic_type::ceil(a);
380 [[nodiscard]] hi_force_inline
constexpr static array_type add(array_type a, array_type b)
noexcept
382 if (not std::is_constant_evaluated()) {
383 if constexpr (
requires { intrinsic_type::add(a, b); }) {
384 return intrinsic_type::add(a, b);
394 [[nodiscard]] hi_force_inline
constexpr static array_type sub(array_type a, array_type b)
noexcept
396 if (not std::is_constant_evaluated()) {
397 if constexpr (
requires { intrinsic_type::sub(a, b); }) {
398 return intrinsic_type::sub(a, b);
412 template<std::
size_t Mask>
415 if (not std::is_constant_evaluated()) {
416 if constexpr (
requires { intrinsic_type::template addsub_mask<Mask>(a, b); }) {
417 return intrinsic_type::template addsub_mask<Mask>(a, b);
421 return blend<Mask>(sub(a, b), add(a, b));
424 [[nodiscard]] hi_force_inline
constexpr static array_type mul(array_type a, array_type b)
noexcept
426 if (not std::is_constant_evaluated()) {
427 if constexpr (
requires { intrinsic_type::mul(a, b); }) {
428 return intrinsic_type::mul(a, b);
438 [[nodiscard]] hi_force_inline
constexpr static array_type div(array_type a, array_type b)
noexcept
440 if (not std::is_constant_evaluated()) {
441 if constexpr (
requires { intrinsic_type::div(a, b); }) {
442 return intrinsic_type::div(a, b);
452 [[nodiscard]] hi_force_inline
constexpr static array_type mod(array_type a, array_type b)
noexcept
454 if (not std::is_constant_evaluated()) {
455 if constexpr (
requires { intrinsic_type::mod(a, b); }) {
456 return intrinsic_type::mod(a, b);
461 if constexpr (std::floating_point<T>) {
470 [[nodiscard]] hi_force_inline
constexpr static array_type eq(array_type a, array_type b)
noexcept
472 if (not std::is_constant_evaluated()) {
473 if constexpr (
requires { intrinsic_type::eq(a, b); }) {
474 return intrinsic_type::eq(a, b);
479 a[i] = a[i] == b[i] ? _ones_mask : _zero_mask;
484 [[nodiscard]] hi_force_inline
constexpr static array_type ne(array_type a, array_type b)
noexcept
486 if (not std::is_constant_evaluated()) {
487 if constexpr (
requires { intrinsic_type::ne(a, b); }) {
488 return intrinsic_type::ne(a, b);
493 a[i] = a[i] != b[i] ? _ones_mask : _zero_mask;
498 [[nodiscard]] hi_force_inline
constexpr static array_type lt(array_type a, array_type b)
noexcept
500 if (not std::is_constant_evaluated()) {
501 if constexpr (
requires { intrinsic_type::lt(a, b); }) {
502 return intrinsic_type::lt(a, b);
507 a[i] = a[i] < b[i] ? _ones_mask : _zero_mask;
512 [[nodiscard]] hi_force_inline
constexpr static array_type gt(array_type a, array_type b)
noexcept
514 if (not std::is_constant_evaluated()) {
515 if constexpr (
requires { intrinsic_type::gt(a, b); }) {
516 return intrinsic_type::gt(a, b);
521 a[i] = a[i] > b[i] ? _ones_mask : _zero_mask;
526 [[nodiscard]] hi_force_inline
constexpr static array_type le(array_type a, array_type b)
noexcept
528 if (not std::is_constant_evaluated()) {
529 if constexpr (
requires { intrinsic_type::le(a, b); }) {
530 return intrinsic_type::le(a, b);
535 a[i] = a[i] <= b[i] ? _ones_mask : _zero_mask;
540 [[nodiscard]] hi_force_inline
constexpr static array_type ge(array_type a, array_type b)
noexcept
542 if (not std::is_constant_evaluated()) {
543 if constexpr (
requires { intrinsic_type::ge(a, b); }) {
544 return intrinsic_type::ge(a, b);
549 a[i] = a[i] >= b[i] ? _ones_mask : _zero_mask;
561 if (not std::is_constant_evaluated()) {
562 if constexpr (
requires { intrinsic_type::test(a, b); }) {
563 return intrinsic_type::test(a, b);
567 auto r = mask_type{0};
569 r |= to_mask(a[i]) & to_mask(b[i]);
581 auto const tmp = ne(a, b);
583 return test(tmp, tmp);
586 [[nodiscard]] hi_force_inline
constexpr static array_type max(array_type a, array_type b)
noexcept
588 if (not std::is_constant_evaluated()) {
589 if constexpr (
requires { intrinsic_type::max(a, b); }) {
590 return intrinsic_type::max(a, b);
600 [[nodiscard]] hi_force_inline
constexpr static array_type min(array_type a, array_type b)
noexcept
602 if (not std::is_constant_evaluated()) {
603 if constexpr (
requires { intrinsic_type::min(a, b); }) {
604 return intrinsic_type::min(a, b);
614 [[nodiscard]] hi_force_inline
constexpr static array_type clamp(array_type v, array_type lo, array_type
hi)
noexcept
616 if (not std::is_constant_evaluated()) {
617 if constexpr (
requires { intrinsic_type::clamp(v, lo,
hi); }) {
618 return intrinsic_type::clamp(v, lo,
hi);
623 v[i] = std::clamp(v[i], lo[i],
hi[i]);
628 [[nodiscard]] hi_force_inline
constexpr static array_type _or(array_type a, array_type b)
noexcept
630 if (not std::is_constant_evaluated()) {
631 if constexpr (
requires { intrinsic_type::_or(a, b); }) {
632 return intrinsic_type::_or(a, b);
637 a[i] = to_value(to_mask(a[i]) | to_mask(b[i]));
642 [[nodiscard]] hi_force_inline
constexpr static array_type _and(array_type a, array_type b)
noexcept
644 if (not std::is_constant_evaluated()) {
645 if constexpr (
requires { intrinsic_type::_and(a, b); }) {
646 return intrinsic_type::_and(a, b);
651 a[i] = to_value(to_mask(a[i]) & to_mask(b[i]));
656 [[nodiscard]] hi_force_inline
constexpr static array_type _xor(array_type a, array_type b)
noexcept
658 if (not std::is_constant_evaluated()) {
659 if constexpr (
requires { intrinsic_type::_xor(a, b); }) {
660 return intrinsic_type::_xor(a, b);
665 a[i] = to_value(to_mask(a[i]) ^ to_mask(b[i]));
678 if (not std::is_constant_evaluated()) {
679 if constexpr (
requires { intrinsic_type::andnot(a, b); }) {
680 return intrinsic_type::andnot(a, b);
685 a[i] = to_value(~to_mask(a[i]) & to_mask(b[i]));
690 [[nodiscard]] hi_force_inline
constexpr static array_type sll(array_type a,
unsigned int b)
noexcept
692 if (not std::is_constant_evaluated()) {
693 if constexpr (
requires { intrinsic_type::sll(a, b); }) {
694 return intrinsic_type::sll(a, b);
698 if (b >=
sizeof(value_type) * CHAR_BIT) {
702 a[i] = to_value(to_mask(a[0]) << b);
708 [[nodiscard]] hi_force_inline
constexpr static array_type srl(array_type a,
unsigned int b)
noexcept
710 if (not std::is_constant_evaluated()) {
711 if constexpr (
requires { intrinsic_type::srl(a, b); }) {
712 return intrinsic_type::srl(a, b);
716 if (b >=
sizeof(value_type) * CHAR_BIT) {
720 a[i] = to_value(to_mask(a[0]) >> b);
726 [[nodiscard]] hi_force_inline
constexpr static array_type sra(array_type a,
unsigned int b)
noexcept
728 if (not std::is_constant_evaluated()) {
729 if constexpr (
requires { intrinsic_type::sra(a, b); }) {
730 return intrinsic_type::sra(a, b);
734 if (b >=
sizeof(value_type) * CHAR_BIT) {
735 b =
sizeof(value_type) * CHAR_BIT - 1;
739 a[i] = to_value(to_signed_mask(a[0]) >> b);
749 if (not std::is_constant_evaluated()) {
750 if constexpr (
requires { intrinsic_type::hadd(a, b); }) {
751 return intrinsic_type::hadd(a, b);
758 for (
std::size_t src_i = 0; src_i != N; src_i += 2) {
759 r[dst_i++] = a[src_i] + a[src_i + 1];
762 for (
std::size_t src_i = 0; src_i != N; src_i += 2) {
763 r[dst_i++] = b[src_i] + b[src_i + 1];
768 [[nodiscard]] hi_force_inline
constexpr static array_type hsub(array_type a, array_type b)
noexcept
770 if (not std::is_constant_evaluated()) {
771 if constexpr (
requires { intrinsic_type::hsub(a, b); }) {
772 return intrinsic_type::hsub(a, b);
776 auto r = array_type{};
779 for (
std::size_t src_i = 0; src_i != N; src_i += 2) {
780 r[dst_i++] = a[src_i] - a[src_i + 1];
783 for (
std::size_t src_i = 0; src_i != N; src_i += 2) {
784 r[dst_i++] = b[src_i] - b[src_i + 1];
789 template<
size_t I,
int First,
int... Rest>
790 hi_force_inline
constexpr static void _shuffle(array_type& r, array_type a)
noexcept
792 static_assert(std::cmp_less(First, N));
794 if constexpr (First < 0) {
795 std::get<I>(r) = std::get<I>(a);
797 std::get<I>(r) = std::get<First>(a);
800 if constexpr (
sizeof...(Rest)) {
801 _shuffle<I + 1, Rest...>(r, a);
805 template<
size_t I,
int First,
int... Rest>
806 [[nodiscard]]
constexpr static bool _have_to_shuffle() noexcept
808 static_assert(std::cmp_less(First, N));
810 if constexpr (First >= 0 and First != I) {
814 if constexpr (
sizeof...(Rest)) {
815 return _have_to_shuffle<I + 1, Rest...>();
821 template<
int... Indices>
822 [[nodiscard]] hi_force_inline
constexpr static array_type shuffle(array_type a)
noexcept
824 static_assert(
sizeof...(Indices) == N);
826 if constexpr (not _have_to_shuffle<0, Indices...>()) {
831 if (not std::is_constant_evaluated()) {
832 if constexpr (
requires { intrinsic_type::template shuffle<Indices...>(a); }) {
833 return intrinsic_type::template shuffle<Indices...>(a);
837 auto r = array_type{};
838 _shuffle<0, Indices...>(r, a);
842 template<
size_t Mask>
843 [[nodiscard]] hi_force_inline
constexpr static array_type blend(array_type a, array_type b)
noexcept
845 if constexpr (Mask == 0) {
848 }
else if constexpr (Mask == (1ULL << N) - 1) {
853 if (not std::is_constant_evaluated()) {
854 if constexpr (
requires { intrinsic_type::template blend<Mask>(a, b); }) {
855 return intrinsic_type::template blend<Mask>(a, b);
861 a[i] = mask & 1 ? b[i] : a[i];
870 hi_warning_ignore_msvc(26494);
871 template<std::derived_from<array_type>... Columns>
873 requires(
sizeof...(Columns) == N)
875 if (not std::is_constant_evaluated()) {
876 if constexpr (
requires { intrinsic_type::transpose(columns...); }) {
877 return intrinsic_type::transpose(columns...);
882 auto f = [&r, &columns... ]<
std::size_t... Ints>(std::index_sequence<Ints...>)
884 auto tf = [&r](
auto i,
auto v) {
890 static_cast<void>((tf(Ints, columns) + ...));
892 f(std::make_index_sequence<
sizeof...(columns)>{});
897 template<
size_t I,
int Value,
int First,
int... Rest>
898 constexpr static void _make_swizzle_blend_mask(
std::size_t& r)
noexcept
900 if constexpr (First == Value) {
904 if constexpr (
sizeof...(Rest)) {
905 _make_swizzle_blend_mask<I + 1, Value, Rest...>(r);
909 template<
int Value,
int... Indices>
910 [[nodiscard]]
constexpr static std::size_t _make_swizzle_blend_mask() noexcept
913 _make_swizzle_blend_mask<0, Value, Indices...>(r);
927 template<
int... Indices>
930 constexpr auto zero_mask = _make_swizzle_blend_mask<-1, Indices...>();
931 constexpr auto one_mask = _make_swizzle_blend_mask<-2, Indices...>();
933 auto tmp = shuffle<Indices...>(a);
934 if constexpr (zero_mask != 0) {
935 tmp = blend<zero_mask>(tmp, set_zero());
937 if constexpr (one_mask != 0) {
938 tmp = blend<one_mask>(tmp, set_one());
943 [[nodiscard]] hi_force_inline
constexpr static array_type sum(array_type a)
noexcept
945 if (not std::is_constant_evaluated()) {
946 if constexpr (
requires { intrinsic_type::sum(a); }) {
947 return intrinsic_type::sum(a);
951 auto r = value_type{0};
958 template<
size_t Mask>
959 [[nodiscard]] hi_force_inline
constexpr static array_type dot(array_type a, array_type b)
noexcept
961 if (not std::is_constant_evaluated()) {
962 if constexpr (
requires { intrinsic_type::template dot<Mask>(a, b); }) {
963 return intrinsic_type::template dot<Mask>(a, b);
967 auto const tmp1 = mul(a, b);
968 auto const tmp2 = blend<Mask>(set_zero(), tmp1);
972 template<
size_t Mask>
973 [[nodiscard]] hi_force_inline
constexpr static array_type hypot(array_type a)
noexcept
975 if (not std::is_constant_evaluated()) {
976 if constexpr (
requires { intrinsic_type::template hypot<Mask>(a); }) {
977 return intrinsic_type::template hypot<Mask>(a);
981 return sqrt(dot<Mask>(a, a));
984 template<
size_t Mask>
985 [[nodiscard]] hi_force_inline
constexpr static array_type rhypot(array_type a)
noexcept
987 if (not std::is_constant_evaluated()) {
988 if constexpr (
requires { intrinsic_type::template rhypot<Mask>(a); }) {
989 return intrinsic_type::template rhypot<Mask>(a);
993 return rsqrt(dot<Mask>(a, a));
996 template<
size_t Mask>
997 [[nodiscard]] hi_force_inline
constexpr static array_type normalize(array_type a)
noexcept
999 if (not std::is_constant_evaluated()) {
1000 if constexpr (
requires { intrinsic_type::template normalize<Mask>(a); }) {
1001 return intrinsic_type::template normalize<Mask>(a);
1005 return mul(rhypot<Mask>(a), a);