HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i16x8_sse2.hpp
1
2
3#pragma once
4
5#include "native_simd_utility.hpp"
6#include "../macros.hpp"
7
8namespace hi {
9inline namespace v1 {
10
11#ifdef HI_HAS_SSE2
12
13
30struct native_i16x8 {
31 using value_type = int16_t;
32 constexpr static size_t size = 8;
33 using register_type = __m128i;
34
36
37 native_i16x8(native_i16x8 const&) noexcept = default;
41
45
46 [[nodiscard]] explicit native_i16x8(register_type other) noexcept : v(other) {}
47
48 [[nodiscard]] explicit operator register_type () const noexcept {
49 return v;
50 }
51
59 [[nodiscard]] native_i16x8(value_type a, value_type b = value_type{0}, value_type c = value_type{0}, value_type d = value_type{0},
60 value_type e = value_type{0}, value_type f = value_type{0}, value_type g = value_type{0},
61 value_type h = value_type{0}) noexcept :
62 v(_mm_set_epi16(h, g, f, e, d, c, b, a)) {}
63
64 [[nodiscard]] explicit native_i16x8(value_type const *other) noexcept : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other))) {}
65
66 void store(value_type *out) const noexcept
67 {
68 hi_axiom_not_null(out);
69 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
70 }
71
72 [[nodiscard]] explicit native_i16x8(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other))) {}
73
74 void store(void *out) const noexcept
75 {
76 hi_axiom_not_null(out);
77 _mm_storeu_si128(static_cast<register_type *>(out), v);
78 }
79
80 [[nodiscard]] explicit native_i16x8(std::span<value_type const> other) noexcept
81 {
82 hi_axiom(other.size() >= size);
83 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
84 }
85
86 void store(std::span<value_type> out) const noexcept
87 {
88 hi_axiom(out.size() >= size);
89 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
90 }
91
92 template<size_t N>
93 [[nodiscard]] explicit native_i16x8(std::array<value_type, N> other) noexcept requires (N >= size) : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()))) {}
94
95 template<size_t N>
96 [[nodiscard]] explicit operator std::array<value_type, N> () const noexcept requires (N >= size)
97 {
99 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
100 return r;
101 }
102
103
117 [[nodiscard]] static native_i16x8 broadcast(int16_t a) noexcept
118 {
119 return native_i16x8{_mm_set1_epi16(a)};
120 }
121
135// [[nodiscard]] static native_i16x8 broadcast(native_i16x8 a) noexcept
136// {
137//#ifdef HI_HAS_AVX2
138// return native_i16x8{_mm_broadcastw_epi16(a.v)};
139//#else
140// return permute<"xxxxxxxx">(a);
141//#endif
142// }
143
146 [[nodiscard]] static native_i16x8 from_mask(size_t mask) noexcept
147 {
148 hi_axiom(mask <= 0b1111'1111);
149
150 return native_i16x8{
151 truncate<value_type>(mask & 0b0000'0001 ? 0 : 0xffff),
152 truncate<value_type>(mask & 0b0000'0010 ? 0 : 0xffff),
153 truncate<value_type>(mask & 0b0000'0100 ? 0 : 0xffff),
154 truncate<value_type>(mask & 0b0000'1000 ? 0 : 0xffff),
155 truncate<value_type>(mask & 0b0001'0000 ? 0 : 0xffff),
156 truncate<value_type>(mask & 0b0010'0000 ? 0 : 0xffff),
157 truncate<value_type>(mask & 0b0100'0000 ? 0 : 0xffff),
158 truncate<value_type>(mask & 0b1000'0000 ? 0 : 0xffff)};
159 }
160
163 [[nodiscard]] size_t mask() const noexcept
164 {
165 auto tmp = _mm_movemask_epi8(v);
166 tmp &= 0b0101'0101;
167 tmp |= tmp >> 1;
168 tmp &= 0b0011'0011;
169 tmp |= tmp >> 2;
170 tmp &= 0b0000'1111;
171 return narrow_cast<size_t>(tmp);
172 }
173
174
175 [[nodiscard]] friend native_i16x8 operator==(native_i16x8 a, native_i16x8 b) noexcept
176 {
177 return native_i16x8{_mm_cmpeq_epi16(a.v, b.v)};
178 }
179
181 {
182 return ~(a == b);
183 }
184
185 [[nodiscard]] friend native_i16x8 operator<(native_i16x8 a, native_i16x8 b) noexcept
186 {
187 return native_i16x8{_mm_cmplt_epi16(a.v, b.v)};
188 }
189
191 {
192 return native_i16x8{_mm_cmpgt_epi16(a.v, b.v)};
193 }
194
196 {
197 return ~(a > b);
198 }
199
201 {
202 return ~(a < b);
203 }
204
205 [[nodiscard]] friend native_i16x8 operator+(native_i16x8 a, native_i16x8 b) noexcept
206 {
207 return native_i16x8{_mm_add_epi16(a.v, b.v)};
208 }
209
210 [[nodiscard]] friend native_i16x8 operator-(native_i16x8 a, native_i16x8 b) noexcept
211 {
212 return native_i16x8{_mm_sub_epi16(a.v, b.v)};
213 }
214
215 [[nodiscard]] friend native_i16x8 operator-(native_i16x8 a) noexcept
216 {
217 return native_i16x8{} - a;
218 }
219
220 [[nodiscard]] friend native_i16x8 operator*(native_i16x8 a, native_i16x8 b) noexcept
221 {
222 return native_i16x8{_mm_mullo_epi16(a.v, b.v)};
223 }
224
225 [[nodiscard]] friend native_i16x8 operator&(native_i16x8 a, native_i16x8 b) noexcept
226 {
227 return native_i16x8{_mm_and_si128(a.v, b.v)};
228 }
229
230 [[nodiscard]] friend native_i16x8 operator|(native_i16x8 a, native_i16x8 b) noexcept
231 {
232 return native_i16x8{_mm_or_si128(a.v, b.v)};
233 }
234
235 [[nodiscard]] friend native_i16x8 operator^(native_i16x8 a, native_i16x8 b) noexcept
236 {
237 return native_i16x8{_mm_xor_si128(a.v, b.v)};
238 }
239
240 [[nodiscard]] friend native_i16x8 operator~(native_i16x8 a) noexcept
241 {
242 auto ones = _mm_undefined_si128();
244 return native_i16x8{_mm_andnot_si128(a.v, ones)};
245 }
246
247 [[nodiscard]] friend native_i16x8 operator<<(native_i16x8 a, int b) noexcept
248 {
249 return native_i16x8{_mm_slli_epi16(a.v, b)};
250 }
251
252 [[nodiscard]] friend native_i16x8 operator>>(native_i16x8 a, int b) noexcept
253 {
254 return native_i16x8{_mm_srai_epi16(a.v, b)};
255 }
256
257 [[nodiscard]] friend native_i16x8 min(native_i16x8 a, native_i16x8 b) noexcept
258 {
259 return native_i16x8{_mm_min_epi16(a.v, b.v)};
260 }
261
262 [[nodiscard]] friend native_i16x8 max(native_i16x8 a, native_i16x8 b) noexcept
263 {
264 return native_i16x8{_mm_max_epi16(a.v, b.v)};
265 }
266
267 [[nodiscard]] friend native_i16x8 abs(native_i16x8 a) noexcept
268 {
269 return native_i16x8{_mm_abs_epi16(a.v)};
270 }
271
278 template<size_t Mask>
279 [[nodiscard]] friend native_i16x8 set_zero(native_i16x8 a) noexcept
280 {
281 static_assert(Mask <= 0b1111'1111);
282 hilet mask = from_mask(Mask);
283 return not_and(mask, a);
284 }
285
293 template<size_t Index>
294 [[nodiscard]] friend native_i16x8 insert(native_i16x8 a, value_type b) noexcept
295 {
296 static_assert(Index < 4);
298 }
299
306 template<size_t Index>
307 [[nodiscard]] friend float extract(native_i16x8 a) noexcept
308 {
309 return std::bit_cast<float>(_mm_extract_epi16(a, Index));
310 }
311
320 template<size_t Mask>
321 [[nodiscard]] friend native_i16x8 blend(native_i16x8 a, native_i16x8 b) noexcept
322 {
323#ifdef HI_HAS_SSE4_1
324 return native_i16x8{_mm_blend_epi16(a, b, Mask)};
325#else
326 hilet mask = from_mask(Mask);
327 return not_and(mask, a) | (mask & b);
328#endif
329 }
330
343 //template<fixed_string SourceElements>
344 //[[nodiscard]] static native_i16x8 permute(native_i16x8 a) noexcept
345 //{
346 // constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
347 //
348 // if constexpr (order == 0b111'110'101'100'011'010'001'000) {
349 // return a.v;
350 // } else {
351 // return native_i16x8{_mm_shuffle_epi16(a.v, order)};
352 // }
353 //}
354
371 template<fixed_string SourceElements>
372 [[nodiscard]] friend native_i16x8 swizzle(native_i16x8 a) noexcept
373 {
374 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
375 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
376 constexpr auto number_mask = one_mask | zero_mask;
377
378 if constexpr (number_mask == 0b1111) {
379 // Swizzle was /[01][01][01][01]/.
381
382 } else if constexpr (number_mask == 0b0000) {
383 // Swizzle was /[^01][^01][^01][^01]/.
384 return permute<SourceElements>(a);
385
386#ifdef HI_HAS_SSE4_1
387 } else if constexpr (number_mask == zero_mask) {
388 // Swizzle was /[^1][^1][^1][^1]/.
391#endif
392
393 } else {
397 }
398 }
399
400#ifdef HI_HAS_SSE3
412 {
413 return native_i16x8{_mm_hadd_epi16(a.v, b.v)};
414 }
415#endif
416
417#ifdef HI_HAS_SSE3
429 {
430 return native_i16x8{_mm_hsub_epi16(a.v, b.v)};
431 }
432#endif
433
440 //[[nodiscard]] friend native_i16x8 horizontal_sum(native_i16x8 a) noexcept
441 //{
442 // auto tmp = a + permute<"cdab">(a);
443 // return tmp + permute<"badc">(tmp);
444 //}
445
456 template<size_t SourceMask>
458 {
459 static_assert(SourceMask <= 0b1111);
461 }
462
463
469 [[nodiscard]] friend native_i16x8 not_and(native_i16x8 a, native_i16x8 b) noexcept
470 {
471 return native_i16x8{_mm_andnot_si128(a.v, b.v)};
472 }
473
474 template<fixed_string SourceElements>
476 {
477 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
478 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
479 constexpr auto number_mask = one_mask | zero_mask;
480 constexpr auto alpha_mask = ~number_mask & 0b1111;
481
482 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
484
485 } else if constexpr ((one_mask | alpha_mask)== 0b1111) {
486 return native_i16x8{_mm_set1_epi16(1)};
487
488 } else {
490 to_bool(one_mask & 0b0001) ? 1 : 0,
491 to_bool(one_mask & 0b0010) ? 1 : 0,
492 to_bool(one_mask & 0b0100) ? 1 : 0,
493 to_bool(one_mask & 0b1000) ? 1 : 0
494 )};
495 }
496
497 }
498
499};
500
501#endif
502
503
504}}
505
@ truncate
After the file has been opened, truncate it.
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T max(T... args)
T min(T... args)
T operator!=(T... args)