16namespace hi::inline v1 {
18template<s
size_t A, s
size_t B, s
size_t C, s
size_t D>
19[[nodiscard]]
constexpr static int _mm_swizzle_ps_permute_mask() noexcept
21 static_assert(A >= -3 && A < 4);
22 static_assert(B >= -3 && B < 4);
23 static_assert(C >= -3 && C < 4);
24 static_assert(D >= -3 && D < 4);
28 case 0: r |= 0b00'00'00'00;
break;
29 case 1: r |= 0b00'00'00'01;
break;
30 case 2: r |= 0b00'00'00'10;
break;
31 case 3: r |= 0b00'00'00'11;
break;
32 case -1: r |= 0b00'00'00'00;
break;
33 case -2: r |= 0b00'00'00'00;
break;
36 case 0: r |= 0b00'00'00'00;
break;
37 case 1: r |= 0b00'00'01'00;
break;
38 case 2: r |= 0b00'00'10'00;
break;
39 case 3: r |= 0b00'00'11'00;
break;
40 case -1: r |= 0b00'00'01'00;
break;
41 case -2: r |= 0b00'00'01'00;
break;
44 case 0: r |= 0b00'00'00'00;
break;
45 case 1: r |= 0b00'01'00'00;
break;
46 case 2: r |= 0b00'10'00'00;
break;
47 case 3: r |= 0b00'11'00'00;
break;
48 case -1: r |= 0b00'10'00'00;
break;
49 case -2: r |= 0b00'10'00'00;
break;
52 case 0: r |= 0b00'00'00'00;
break;
53 case 1: r |= 0b01'00'00'00;
break;
54 case 2: r |= 0b10'00'00'00;
break;
55 case 3: r |= 0b11'00'00'00;
break;
56 case -1: r |= 0b11'00'00'00;
break;
57 case -2: r |= 0b11'00'00'00;
break;
62template<s
size_t A, s
size_t B, s
size_t C, s
size_t D>
63[[nodiscard]]
constexpr static int _mm_swizzle_ps_not_one_mask() noexcept
65 static_assert(A >= -3 && A < 4);
66 static_assert(B >= -3 && B < 4);
67 static_assert(C >= -3 && C < 4);
68 static_assert(D >= -3 && D < 4);
71 r |= (A == -2) ? 0 : 0b0001;
72 r |= (B == -2) ? 0 : 0b0010;
73 r |= (C == -2) ? 0 : 0b0100;
74 r |= (D == -2) ? 0 : 0b1000;
78template<s
size_t A, s
size_t B, s
size_t C, s
size_t D>
79[[nodiscard]]
constexpr static int _mm_swizzle_ps_number_mask() noexcept
81 static_assert(A >= -3 && A < 4);
82 static_assert(B >= -3 && B < 4);
83 static_assert(C >= -3 && C < 4);
84 static_assert(D >= -3 && D < 4);
87 r |= A < 0 ? 0b0001 : 0;
88 r |= B < 0 ? 0b0010 : 0;
89 r |= C < 0 ? 0b0100 : 0;
90 r |= D < 0 ? 0b1000 : 0;
94template<s
size_t A, s
size_t B, s
size_t C, s
size_t D>
95[[nodiscard]] __m128 _mm_swizzle_ps(__m128
const &value)
noexcept
97 static_assert(A >= -3 && A < 4);
98 static_assert(B >= -3 && B < 4);
99 static_assert(C >= -3 && C < 4);
100 static_assert(D >= -3 && D < 4);
102 constexpr int permute_mask = _mm_swizzle_ps_permute_mask<A, B, C, D>();
103 constexpr int not_one_mask = _mm_swizzle_ps_not_one_mask<A, B, C, D>();
104 constexpr int number_mask = _mm_swizzle_ps_number_mask<A, B, C, D>();
108 if constexpr (permute_mask != 0b11'10'01'00) {
109 swizzled = _mm_permute_ps(value, permute_mask);
115 if constexpr (not_one_mask == 0b0000) {
116 numbers = _mm_set_ps1(1.0f);
117 }
else if constexpr (not_one_mask == 0b1111) {
118 numbers = _mm_setzero_ps();
119 }
else if constexpr (not_one_mask == 0b1110) {
120 numbers = _mm_set_ss(1.0f);
122 hilet _1111 = _mm_set_ps1(1.0f);
123 numbers = _mm_insert_ps(_1111, _1111, not_one_mask);
127 if constexpr (number_mask == 0b0000) {
129 }
else if constexpr (number_mask == 0b1111) {
131 }
else if constexpr (((not_one_mask | ~number_mask) & 0b1111) == 0b1111) {
132 result = _mm_insert_ps(swizzled, swizzled, number_mask);
134 result = _mm_blend_ps(swizzled, numbers, number_mask);
139template<s
size_t A, s
size_t B, s
size_t C, s
size_t D>
140[[nodiscard]] __m128i _mm_swizzle_epi32(__m128i
const &value)
noexcept
142 return _mm_castps_si128(_mm_swizzle_ps<A, B, C, D>(_mm_castsi128_ps(value)));
146[[nodiscard]] __m128d _mm_swizzle_pd(__m128d
const &value)
noexcept
148 constexpr auto A1 = A >= 0 ? A * 2 : A;
149 constexpr auto A2 = A >= 0 ? A1 + 1 : A1;
150 constexpr auto B1 = B >= 0 ? B * 2 : B;
151 constexpr auto B2 = B >= 0 ? B1 + 1 : B1;
153 return _mm_castps_pd(_mm_swizzle_ps<A1, A2, B1, B2>(_mm_castpd_ps(value)));
157[[nodiscard]] __m128i _mm_swizzle_epi64(__m128i
const &value)
noexcept
159 return _mm_castpd_si128(_mm_swizzle_pd<A, B>(_mm_castsi128_pd(value)));
std::ptrdiff_t ssize_t
Signed size/index into an array.
Definition required.hpp:37
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
Functions and macros for handling architectural difference between compilers, CPUs and operating syst...