HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i16x8_sse2.hpp
1
2
3#pragma once
4
5#include "native_simd_utility.hpp"
6
7namespace hi {
8inline namespace v1 {
9
10#ifdef HI_HAS_SSE2
11
12
29struct native_i16x8 {
30 using value_type = int16_t;
31 constexpr static size_t size = 8;
32 using register_type = __m128i;
33
34 register_type v;
35
36 native_i16x8(native_i16x8 const&) noexcept = default;
37 native_i16x8(native_i16x8 &&) noexcept = default;
38 native_i16x8 &operator=(native_i16x8 const &) noexcept = default;
39 native_i16x8 &operator=(native_i16x8 &&) noexcept = default;
40
43 native_i16x8() noexcept : v(_mm_setzero_si128()) {}
44
45 [[nodiscard]] explicit native_i16x8(register_type other) noexcept : v(other) {}
46
47 [[nodiscard]] explicit operator register_type () const noexcept {
48 return v;
49 }
50
58 [[nodiscard]] native_i16x8(value_type a, value_type b = value_type{0}, value_type c = value_type{0}, value_type d = value_type{0},
59 value_type e = value_type{0}, value_type f = value_type{0}, value_type g = value_type{0},
60 value_type h = value_type{0}) noexcept :
61 v(_mm_set_epi16(h, g, f, e, d, c, b, a)) {}
62
63 [[nodiscard]] explicit native_i16x8(value_type const *other) noexcept : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other))) {}
64
65 void store(value_type *out) const noexcept
66 {
68 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
69 }
70
71 [[nodiscard]] explicit native_i16x8(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other))) {}
72
73 void store(void *out) const noexcept
74 {
76 _mm_storeu_si128(static_cast<register_type *>(out), v);
77 }
78
79 [[nodiscard]] explicit native_i16x8(std::span<value_type const> other) noexcept
80 {
81 hi_axiom(other.size() >= size);
82 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
83 }
84
85 void store(std::span<value_type> out) const noexcept
86 {
87 hi_axiom(out.size() >= size);
88 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
89 }
90
91 template<size_t N>
92 [[nodiscard]] explicit native_i16x8(std::array<value_type, N> other) noexcept requires (N >= size) : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()))) {}
93
94 template<size_t N>
95 [[nodiscard]] explicit operator std::array<value_type, N> () const noexcept requires (N >= size)
96 {
98 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
99 return r;
100 }
101
102
116 [[nodiscard]] static native_i16x8 broadcast(int16_t a) noexcept
117 {
118 return native_i16x8{_mm_set1_epi16(a)};
119 }
120
134// [[nodiscard]] static native_i16x8 broadcast(native_i16x8 a) noexcept
135// {
136//#ifdef HI_HAS_AVX2
137// return native_i16x8{_mm_broadcastw_epi16(a.v)};
138//#else
139// return permute<"xxxxxxxx">(a);
140//#endif
141// }
142
145 [[nodiscard]] static native_i16x8 from_mask(size_t mask) noexcept
146 {
147 hi_axiom(mask <= 0b1111'1111);
148
149 return native_i16x8{
150 mask & 0b0000'0001 ? 0 : truncate<value_type>(0xffff),
151 mask & 0b0000'0010 ? 0 : truncate<value_type>(0xffff),
152 mask & 0b0000'0100 ? 0 : truncate<value_type>(0xffff),
153 mask & 0b0000'1000 ? 0 : truncate<value_type>(0xffff),
154 mask & 0b0001'0000 ? 0 : truncate<value_type>(0xffff),
155 mask & 0b0010'0000 ? 0 : truncate<value_type>(0xffff),
156 mask & 0b0100'0000 ? 0 : truncate<value_type>(0xffff),
157 mask & 0b1000'0000 ? 0 : truncate<value_type>(0xffff)};
158 }
159
162 [[nodiscard]] size_t mask() const noexcept
163 {
164 auto tmp = _mm_movemask_epi8(v);
165 tmp &= 0b0101'0101;
166 tmp |= tmp >> 1;
167 tmp &= 0b0011'0011;
168 tmp |= tmp >> 2;
169 tmp &= 0b0000'1111;
170 return narrow_cast<size_t>(tmp);
171 }
172
173
174 [[nodiscard]] friend native_i16x8 operator==(native_i16x8 a, native_i16x8 b) noexcept
175 {
176 return native_i16x8{_mm_cmpeq_epi16(a.v, b.v)};
177 }
178
179 [[nodiscard]] friend native_i16x8 operator!=(native_i16x8 a, native_i16x8 b) noexcept
180 {
181 return ~(a == b);
182 }
183
184 [[nodiscard]] friend native_i16x8 operator<(native_i16x8 a, native_i16x8 b) noexcept
185 {
186 return native_i16x8{_mm_cmplt_epi16(a.v, b.v)};
187 }
188
189 [[nodiscard]] friend native_i16x8 operator>(native_i16x8 a, native_i16x8 b) noexcept
190 {
191 return native_i16x8{_mm_cmpgt_epi16(a.v, b.v)};
192 }
193
194 [[nodiscard]] friend native_i16x8 operator<=(native_i16x8 a, native_i16x8 b) noexcept
195 {
196 return ~(a > b);
197 }
198
199 [[nodiscard]] friend native_i16x8 operator>=(native_i16x8 a, native_i16x8 b) noexcept
200 {
201 return ~(a < b);
202 }
203
204 [[nodiscard]] friend native_i16x8 operator+(native_i16x8 a, native_i16x8 b) noexcept
205 {
206 return native_i16x8{_mm_add_epi16(a.v, b.v)};
207 }
208
209 [[nodiscard]] friend native_i16x8 operator-(native_i16x8 a, native_i16x8 b) noexcept
210 {
211 return native_i16x8{_mm_sub_epi16(a.v, b.v)};
212 }
213
214 [[nodiscard]] friend native_i16x8 operator-(native_i16x8 a) noexcept
215 {
216 return native_i16x8{} - a;
217 }
218
219 [[nodiscard]] friend native_i16x8 operator*(native_i16x8 a, native_i16x8 b) noexcept
220 {
221 return native_i16x8{_mm_mullo_epi16(a.v, b.v)};
222 }
223
224 [[nodiscard]] friend native_i16x8 operator&(native_i16x8 a, native_i16x8 b) noexcept
225 {
226 return native_i16x8{_mm_and_si128(a.v, b.v)};
227 }
228
229 [[nodiscard]] friend native_i16x8 operator|(native_i16x8 a, native_i16x8 b) noexcept
230 {
231 return native_i16x8{_mm_or_si128(a.v, b.v)};
232 }
233
234 [[nodiscard]] friend native_i16x8 operator^(native_i16x8 a, native_i16x8 b) noexcept
235 {
236 return native_i16x8{_mm_xor_si128(a.v, b.v)};
237 }
238
239 [[nodiscard]] friend native_i16x8 operator~(native_i16x8 a) noexcept
240 {
241 auto ones = _mm_undefined_si128();
242 ones = _mm_cmpeq_epi32(ones, ones);
243 return native_i16x8{_mm_andnot_si128(a.v, ones)};
244 }
245
246 [[nodiscard]] friend native_i16x8 operator<<(native_i16x8 a, int b) noexcept
247 {
248 return native_i16x8{_mm_slli_epi16(a.v, b)};
249 }
250
251 [[nodiscard]] friend native_i16x8 operator>>(native_i16x8 a, int b) noexcept
252 {
253 return native_i16x8{_mm_srai_epi16(a.v, b)};
254 }
255
256 [[nodiscard]] friend native_i16x8 min(native_i16x8 a, native_i16x8 b) noexcept
257 {
258 return native_i16x8{_mm_min_epi16(a.v, b.v)};
259 }
260
261 [[nodiscard]] friend native_i16x8 max(native_i16x8 a, native_i16x8 b) noexcept
262 {
263 return native_i16x8{_mm_max_epi16(a.v, b.v)};
264 }
265
266 [[nodiscard]] friend native_i16x8 abs(native_i16x8 a) noexcept
267 {
268 return native_i16x8{_mm_abs_epi16(a.v)};
269 }
270
277 template<size_t Mask>
278 [[nodiscard]] friend native_i16x8 set_zero(native_i16x8 a) noexcept
279 {
280 static_assert(Mask <= 0b1111'1111);
281 hilet mask = from_mask(Mask);
282 return not_and(mask, a);
283 }
284
292 template<size_t Index>
293 [[nodiscard]] friend native_i16x8 insert(native_i16x8 a, value_type b) noexcept
294 {
295 static_assert(Index < 4);
296 return native_i16x8{_mm_insert_epi16(a, b, narrow_cast<int>(Index))};
297 }
298
305 template<size_t Index>
306 [[nodiscard]] friend float extract(native_i16x8 a) noexcept
307 {
308 return std::bit_cast<float>(_mm_extract_epi16(a, Index));
309 }
310
319 template<size_t Mask>
320 [[nodiscard]] friend native_i16x8 blend(native_i16x8 a, native_i16x8 b) noexcept
321 {
322#ifdef HI_HAS_SSE4_1
323 return native_i16x8{_mm_blend_epi16(a, b, Mask)};
324#else
325 hilet mask = from_mask(Mask);
326 return not_and(mask, a) | (mask & b);
327#endif
328 }
329
342 //template<fixed_string SourceElements>
343 //[[nodiscard]] static native_i16x8 permute(native_i16x8 a) noexcept
344 //{
345 // constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
346 //
347 // if constexpr (order == 0b111'110'101'100'011'010'001'000) {
348 // return a.v;
349 // } else {
350 // return native_i16x8{_mm_shuffle_epi16(a.v, order)};
351 // }
352 //}
353
370 template<fixed_string SourceElements>
371 [[nodiscard]] friend native_i16x8 swizzle(native_i16x8 a) noexcept
372 {
373 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
374 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
375 constexpr auto number_mask = one_mask | zero_mask;
376
377 if constexpr (number_mask == 0b1111) {
378 // Swizzle was /[01][01][01][01]/.
379 return swizzle_numbers<SourceElements>();
380
381 } else if constexpr (number_mask == 0b0000) {
382 // Swizzle was /[^01][^01][^01][^01]/.
383 return permute<SourceElements>(a);
384
385#ifdef HI_HAS_SSE4_1
386 } else if constexpr (number_mask == zero_mask) {
387 // Swizzle was /[^1][^1][^1][^1]/.
388 hilet ordered = permute<SourceElements>(a);
389 return set_zero<zero_mask>(ordered);
390#endif
391
392 } else {
393 hilet ordered = permute<SourceElements>(a);
394 hilet numbers = swizzle_numbers<SourceElements>();
395 return blend<number_mask>(ordered, numbers);
396 }
397 }
398
399#ifdef HI_HAS_SSE3
410 [[nodiscard]] friend native_i16x8 horizontal_add(native_i16x8 a, native_i16x8 b) noexcept
411 {
412 return native_i16x8{_mm_hadd_epi16(a.v, b.v)};
413 }
414#endif
415
416#ifdef HI_HAS_SSE3
427 [[nodiscard]] friend native_i16x8 horizontal_sub(native_i16x8 a, native_i16x8 b) noexcept
428 {
429 return native_i16x8{_mm_hsub_epi16(a.v, b.v)};
430 }
431#endif
432
439 //[[nodiscard]] friend native_i16x8 horizontal_sum(native_i16x8 a) noexcept
440 //{
441 // auto tmp = a + permute<"cdab">(a);
442 // return tmp + permute<"badc">(tmp);
443 //}
444
455 template<size_t SourceMask>
456 [[nodiscard]] friend native_i16x8 dot_product(native_i16x8 a, native_i16x8 b) noexcept
457 {
458 static_assert(SourceMask <= 0b1111);
459 return horizontal_sum(set_zero<~SourceMask & 0b1111>(a * b));
460 }
461
462
468 [[nodiscard]] friend native_i16x8 not_and(native_i16x8 a, native_i16x8 b) noexcept
469 {
470 return native_i16x8{_mm_andnot_si128(a.v, b.v)};
471 }
472
473 template<fixed_string SourceElements>
474 [[nodiscard]] static native_i16x8 swizzle_numbers() noexcept
475 {
476 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
477 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
478 constexpr auto number_mask = one_mask | zero_mask;
479 constexpr auto alpha_mask = ~number_mask & 0b1111;
480
481 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
482 return native_i16x8{_mm_setzero_si128()};
483
484 } else if constexpr ((one_mask | alpha_mask)== 0b1111) {
485 return native_i16x8{_mm_set1_epi16(1)};
486
487 } else {
488 return native_i16x8{_mm_set_epi16(
489 to_bool(one_mask & 0b0001) ? 1 : 0,
490 to_bool(one_mask & 0b0010) ? 1 : 0,
491 to_bool(one_mask & 0b0100) ? 1 : 0,
492 to_bool(one_mask & 0b1000) ? 1 : 0
493 )};
494 }
495
496 }
497
498};
499
500#endif
501
502
503}}
504
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:272
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
T max(T... args)
T min(T... args)
T operator!=(T... args)