HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_f16x8_sse2.hpp
1
2
3#pragma once
4
5#include "native_simd_utility.hpp"
6#include "../utility/module.hpp"
7
8namespace hi {
9inline namespace v1 {
10
11#ifdef HI_HAS_SSE2
12
13
30template<>
31struct native_simd<float16,8> {
32 using value_type = float16;
33 constexpr static size_t size = 8;
34 using register_type = __m128i;
35
36 register_type v;
37
38 native_simd(native_simd const&) noexcept = default;
39 native_simd(native_simd &&) noexcept = default;
40 native_simd &operator=(native_simd const &) noexcept = default;
41 native_simd &operator=(native_simd &&) noexcept = default;
42
45 native_simd() noexcept : v(_mm_setzero_si128()) {}
46
47 explicit native_simd(native_simd<float,8> const &a) noexcept;
48 native_simd(native_simd<float,4> const &a, native_simd<float,4> const &b) noexcept;
49
50 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
51
52 [[nodiscard]] explicit operator register_type () const noexcept {
53 return v;
54 }
55
63 [[nodiscard]] native_simd(value_type a, value_type b = value_type{}, value_type c = value_type{}, value_type d = value_type{},
64 value_type e = value_type{}, value_type f = value_type{}, value_type g = value_type{},
65 value_type h = value_type{}) noexcept :
66 v(_mm_set_epi16(h.v, g.v, f.v, e.v, d.v, c.v, b.v, a.v)) {}
67
68 [[nodiscard]] explicit native_simd(value_type const *other) noexcept : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other))) {}
69
70 void store(value_type *out) const noexcept
71 {
73 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
74 }
75
76 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other))) {}
77
78 void store(void *out) const noexcept
79 {
81 _mm_storeu_si128(static_cast<register_type *>(out), v);
82 }
83
84 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
85 {
86 hi_axiom(other.size() >= size);
87 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
88 }
89
90 void store(std::span<value_type> out) const noexcept
91 {
92 hi_axiom(out.size() >= size);
93 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
94 }
95
96 template<size_t N>
97 [[nodiscard]] explicit native_simd(std::array<value_type, N> other) noexcept requires (N >= size) : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()))) {}
98
99 template<size_t N>
100 [[nodiscard]] explicit operator std::array<value_type, N> () const noexcept requires (N >= size)
101 {
103 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
104 return r;
105 }
106
107
121 [[nodiscard]] static native_simd broadcast(int16_t a) noexcept
122 {
123 return native_simd{_mm_set1_epi16(a)};
124 }
125
139 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
140 {
141#ifdef HI_HAS_AVX2
142 return native_simd{_mm_broadcastw_epi16(a.v)};
143#else
144 // Create a mask for 1 word each dword, AND it with a.v.
145 auto tmp = _mm_undefined_si128();
146 tmp = _mm_cmpeq_epi32(tmp, tmp);
147 tmp = _mm_slli_epi32(tmp, 16);
148 tmp = _mm_and_si128(tmp, a.v);
149
150 // Broadcast the first word to all the bytes in the first dword.
151 tmp = _mm_or_si128(tmp, _mm_slli_epi32(tmp, 16));
152
153 // Broadcast the first dword to all 4 dwords.
154 tmp = _mm_shuffle_epi32(tmp, 0b00'00'00'00);
155 return native_simd{tmp};
156#endif
157 }
158
161 [[nodiscard]] static native_simd from_mask(size_t mask) noexcept
162 {
163 hi_axiom(mask <= 0b1111'1111);
164
165 return native_simd{
166 mask & 0b0000'0001 ? value_type{} : value_type::from_uint16_t(0xffff),
167 mask & 0b0000'0010 ? value_type{} : value_type::from_uint16_t(0xffff),
168 mask & 0b0000'0100 ? value_type{} : value_type::from_uint16_t(0xffff),
169 mask & 0b0000'1000 ? value_type{} : value_type::from_uint16_t(0xffff),
170 mask & 0b0001'0000 ? value_type{} : value_type::from_uint16_t(0xffff),
171 mask & 0b0010'0000 ? value_type{} : value_type::from_uint16_t(0xffff),
172 mask & 0b0100'0000 ? value_type{} : value_type::from_uint16_t(0xffff),
173 mask & 0b1000'0000 ? value_type{} : value_type::from_uint16_t(0xffff)};
174 }
175
178 [[nodiscard]] size_t mask() const noexcept
179 {
180 auto tmp = _mm_movemask_epi8(v);
181 tmp &= 0b0101'0101;
182 tmp |= tmp >> 1;
183 tmp &= 0b0011'0011;
184 tmp |= tmp >> 2;
185 tmp &= 0b0000'1111;
186 return narrow_cast<size_t>(tmp);
187 }
188
189
190 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
191 {
192 return native_simd{_mm_cmpeq_epi16(a.v, b.v)};
193 }
194
195 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
196 {
197 return ~(a == b);
198 }
199
200 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
201 {
202 return native_simd{_mm_and_si128(a.v, b.v)};
203 }
204
205 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
206 {
207 return native_simd{_mm_or_si128(a.v, b.v)};
208 }
209
210 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
211 {
212 return native_simd{_mm_xor_si128(a.v, b.v)};
213 }
214
215 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
216 {
217 auto ones = _mm_undefined_si128();
218 ones = _mm_cmpeq_epi32(ones, ones);
219 return native_simd{_mm_andnot_si128(a.v, ones)};
220 }
221
228 template<size_t Mask>
229 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
230 {
231 static_assert(Mask <= 0b1111'1111);
232 hilet mask = from_mask(Mask);
233 return not_and(mask, a);
234 }
235
243 template<size_t Index>
244 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
245 {
246 static_assert(Index < 4);
247 return native_simd{_mm_insert_epi16(a, b.v, narrow_cast<int>(Index))};
248 }
249
256 template<size_t Index>
257 [[nodiscard]] friend value_type extract(native_simd a) noexcept
258 {
259 return std::bit_cast<value_type>(_mm_extract_epi16(a, Index));
260 }
261
270 template<size_t Mask>
271 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
272 {
273#ifdef HI_HAS_SSE4_1
274 return native_simd{_mm_blend_epi16(a, b, Mask)};
275#else
276 hilet mask = from_mask(Mask);
277 return not_and(mask, a) | (mask & b);
278#endif
279 }
280
292 //template<fixed_string SourceElements>
293 //[[nodiscard]] static native_simd permute(native_simd a) noexcept
294 //{
295 // constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
296 //
297 // if constexpr (order == 0b111'110'101'100'011'010'001'000) {
298 // return a.v;
299 // } else {
300 // auto tmp = _mm_shufflelo(a.v,
301 // return native_simd{_mm_shuffle_epi16(a.v, order)};
302 // }
303 //}
304
321 template<fixed_string SourceElements>
322 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
323 {
324 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
325 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
326 constexpr auto number_mask = one_mask | zero_mask;
327
328 if constexpr (number_mask == 0b11111111) {
329 // Swizzle was /[01][01][01][01]/.
330 return swizzle_numbers<SourceElements>();
331
332 } else if constexpr (number_mask == 0b00000000) {
333 // Swizzle was /[^01][^01][^01][^01]/.
334 return permute<SourceElements>(a);
335
336#ifdef HI_HAS_SSE4_1
337 } else if constexpr (number_mask == zero_mask) {
338 // Swizzle was /[^1][^1][^1][^1]/.
339 hilet ordered = permute<SourceElements>(a);
340 return set_zero<zero_mask>(ordered);
341#endif
342
343 } else {
344 hilet ordered = permute<SourceElements>(a);
345 hilet numbers = swizzle_numbers<SourceElements>();
346 return blend<number_mask>(ordered, numbers);
347 }
348 }
349
355 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
356 {
357 return native_simd{_mm_andnot_si128(a.v, b.v)};
358 }
359
360 template<fixed_string SourceElements>
361 [[nodiscard]] static native_simd swizzle_numbers() noexcept
362 {
363 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
364 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
365 constexpr auto number_mask = one_mask | zero_mask;
366 constexpr auto alpha_mask = ~number_mask & 0b11111111;
367
368 if constexpr ((zero_mask | alpha_mask) == 0b11111111) {
369 return native_simd{_mm_setzero_si128()};
370
371 } else if constexpr ((one_mask | alpha_mask)== 0b11111111) {
372 return native_simd::broadcast(value_type::from_uint16_t(0x3c00)); // 1.0
373
374 } else {
375 return native_simd{
376 to_bool(one_mask & 0b00000001) ? value_type::from_uint16_t(0x3c00) : value_type{},
377 to_bool(one_mask & 0b00000010) ? value_type::from_uint16_t(0x3c00) : value_type{},
378 to_bool(one_mask & 0b00000100) ? value_type::from_uint16_t(0x3c00) : value_type{},
379 to_bool(one_mask & 0b00001000) ? value_type::from_uint16_t(0x3c00) : value_type{},
380 to_bool(one_mask & 0b00010000) ? value_type::from_uint16_t(0x3c00) : value_type{},
381 to_bool(one_mask & 0b00100000) ? value_type::from_uint16_t(0x3c00) : value_type{},
382 to_bool(one_mask & 0b01000000) ? value_type::from_uint16_t(0x3c00) : value_type{},
383 to_bool(one_mask & 0b10000000) ? value_type::from_uint16_t(0x3c00) : value_type{}
384 };
385 }
386
387 }
388
389};
390
391#endif
392
393
394}}
395
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
T operator!=(T... args)