HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_f16x8_sse2.hpp
1
2
3#pragma once
4
5#include "native_simd_utility.hpp"
6#include "../utility/utility.hpp"
7#include "../macros.hpp"
8
9
10
11namespace hi {
12inline namespace v1 {
13
14#ifdef HI_HAS_SSE2
15
16
33template<>
34struct native_simd<float16,8> {
35 using value_type = float16;
36 constexpr static size_t size = 8;
37 using register_type = __m128i;
38
40
41 native_simd(native_simd const&) noexcept = default;
42 native_simd(native_simd &&) noexcept = default;
43 native_simd &operator=(native_simd const &) noexcept = default;
44 native_simd &operator=(native_simd &&) noexcept = default;
45
48 native_simd() noexcept : v(_mm_setzero_si128()) {}
49
50 explicit native_simd(native_simd<float,8> const &a) noexcept;
51 native_simd(native_simd<float,4> const &a, native_simd<float,4> const &b) noexcept;
52
53 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
54
55 [[nodiscard]] explicit operator register_type () const noexcept {
56 return v;
57 }
58
66 [[nodiscard]] native_simd(value_type a, value_type b = value_type{}, value_type c = value_type{}, value_type d = value_type{},
67 value_type e = value_type{}, value_type f = value_type{}, value_type g = value_type{},
68 value_type h = value_type{}) noexcept :
69 v(_mm_set_epi16(h.v, g.v, f.v, e.v, d.v, c.v, b.v, a.v)) {}
70
71 [[nodiscard]] explicit native_simd(value_type const *other) noexcept : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other))) {}
72
73 void store(value_type *out) const noexcept
74 {
75 hi_axiom_not_null(out);
76 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
77 }
78
79 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other))) {}
80
81 void store(void *out) const noexcept
82 {
83 hi_axiom_not_null(out);
84 _mm_storeu_si128(static_cast<register_type *>(out), v);
85 }
86
87 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
88 {
89 hi_axiom(other.size() >= size);
90 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
91 }
92
93 void store(std::span<value_type> out) const noexcept
94 {
95 hi_axiom(out.size() >= size);
96 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
97 }
98
99 template<size_t N>
100 [[nodiscard]] explicit native_simd(std::array<value_type, N> other) noexcept requires (N >= size) : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()))) {}
101
102 template<size_t N>
103 [[nodiscard]] explicit operator std::array<value_type, N> () const noexcept requires (N >= size)
104 {
106 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
107 return r;
108 }
109
110
124 [[nodiscard]] static native_simd broadcast(int16_t a) noexcept
125 {
126 return native_simd{_mm_set1_epi16(a)};
127 }
128
142 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
143 {
144#ifdef HI_HAS_AVX2
145 return native_simd{_mm_broadcastw_epi16(a.v)};
146#else
147 // Create a mask for 1 word each dword, AND it with a.v.
148 auto tmp = _mm_undefined_si128();
150 tmp = _mm_slli_epi32(tmp, 16);
151 tmp = _mm_and_si128(tmp, a.v);
152
153 // Broadcast the first word to all the bytes in the first dword.
155
156 // Broadcast the first dword to all 4 dwords.
157 tmp = _mm_shuffle_epi32(tmp, 0b00'00'00'00);
158 return native_simd{tmp};
159#endif
160 }
161
164 [[nodiscard]] static native_simd from_mask(size_t mask) noexcept
165 {
166 hi_axiom(mask <= 0b1111'1111);
167
168 return native_simd{
169 mask & 0b0000'0001 ? value_type{} : value_type::from_uint16_t(0xffff),
170 mask & 0b0000'0010 ? value_type{} : value_type::from_uint16_t(0xffff),
171 mask & 0b0000'0100 ? value_type{} : value_type::from_uint16_t(0xffff),
172 mask & 0b0000'1000 ? value_type{} : value_type::from_uint16_t(0xffff),
173 mask & 0b0001'0000 ? value_type{} : value_type::from_uint16_t(0xffff),
174 mask & 0b0010'0000 ? value_type{} : value_type::from_uint16_t(0xffff),
175 mask & 0b0100'0000 ? value_type{} : value_type::from_uint16_t(0xffff),
176 mask & 0b1000'0000 ? value_type{} : value_type::from_uint16_t(0xffff)};
177 }
178
181 [[nodiscard]] size_t mask() const noexcept
182 {
183 auto tmp = _mm_movemask_epi8(v);
184 tmp &= 0b0101'0101;
185 tmp |= tmp >> 1;
186 tmp &= 0b0011'0011;
187 tmp |= tmp >> 2;
188 tmp &= 0b0000'1111;
189 return narrow_cast<size_t>(tmp);
190 }
191
192
193 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
194 {
195 return native_simd{_mm_cmpeq_epi16(a.v, b.v)};
196 }
197
198 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
199 {
200 return ~(a == b);
201 }
202
203 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
204 {
205 return native_simd{_mm_and_si128(a.v, b.v)};
206 }
207
208 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
209 {
210 return native_simd{_mm_or_si128(a.v, b.v)};
211 }
212
213 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
214 {
215 return native_simd{_mm_xor_si128(a.v, b.v)};
216 }
217
218 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
219 {
220 auto ones = _mm_undefined_si128();
222 return native_simd{_mm_andnot_si128(a.v, ones)};
223 }
224
231 template<size_t Mask>
232 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
233 {
234 static_assert(Mask <= 0b1111'1111);
235 hilet mask = from_mask(Mask);
236 return not_and(mask, a);
237 }
238
246 template<size_t Index>
247 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
248 {
249 static_assert(Index < 4);
250 return native_simd{_mm_insert_epi16(a, b.v, narrow_cast<int>(Index))};
251 }
252
259 template<size_t Index>
260 [[nodiscard]] friend value_type extract(native_simd a) noexcept
261 {
262 return std::bit_cast<value_type>(_mm_extract_epi16(a, Index));
263 }
264
273 template<size_t Mask>
274 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
275 {
276#ifdef HI_HAS_SSE4_1
277 return native_simd{_mm_blend_epi16(a, b, Mask)};
278#else
279 hilet mask = from_mask(Mask);
280 return not_and(mask, a) | (mask & b);
281#endif
282 }
283
295 //template<fixed_string SourceElements>
296 //[[nodiscard]] static native_simd permute(native_simd a) noexcept
297 //{
298 // constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
299 //
300 // if constexpr (order == 0b111'110'101'100'011'010'001'000) {
301 // return a.v;
302 // } else {
303 // auto tmp = _mm_shufflelo(a.v,
304 // return native_simd{_mm_shuffle_epi16(a.v, order)};
305 // }
306 //}
307
324 template<fixed_string SourceElements>
325 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
326 {
327 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
328 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
329 constexpr auto number_mask = one_mask | zero_mask;
330
331 if constexpr (number_mask == 0b11111111) {
332 // Swizzle was /[01][01][01][01]/.
334
335 } else if constexpr (number_mask == 0b00000000) {
336 // Swizzle was /[^01][^01][^01][^01]/.
337 return permute<SourceElements>(a);
338
339#ifdef HI_HAS_SSE4_1
340 } else if constexpr (number_mask == zero_mask) {
341 // Swizzle was /[^1][^1][^1][^1]/.
344#endif
345
346 } else {
350 }
351 }
352
358 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
359 {
360 return native_simd{_mm_andnot_si128(a.v, b.v)};
361 }
362
363 template<fixed_string SourceElements>
364 [[nodiscard]] static native_simd swizzle_numbers() noexcept
365 {
366 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
367 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
368 constexpr auto number_mask = one_mask | zero_mask;
369 constexpr auto alpha_mask = ~number_mask & 0b11111111;
370
371 if constexpr ((zero_mask | alpha_mask) == 0b11111111) {
372 return native_simd{_mm_setzero_si128()};
373
374 } else if constexpr ((one_mask | alpha_mask)== 0b11111111) {
375 return native_simd::broadcast(value_type::from_uint16_t(0x3c00)); // 1.0
376
377 } else {
378 return native_simd{
379 to_bool(one_mask & 0b00000001) ? value_type::from_uint16_t(0x3c00) : value_type{},
380 to_bool(one_mask & 0b00000010) ? value_type::from_uint16_t(0x3c00) : value_type{},
381 to_bool(one_mask & 0b00000100) ? value_type::from_uint16_t(0x3c00) : value_type{},
382 to_bool(one_mask & 0b00001000) ? value_type::from_uint16_t(0x3c00) : value_type{},
383 to_bool(one_mask & 0b00010000) ? value_type::from_uint16_t(0x3c00) : value_type{},
384 to_bool(one_mask & 0b00100000) ? value_type::from_uint16_t(0x3c00) : value_type{},
385 to_bool(one_mask & 0b01000000) ? value_type::from_uint16_t(0x3c00) : value_type{},
386 to_bool(one_mask & 0b10000000) ? value_type::from_uint16_t(0x3c00) : value_type{}
387 };
388 }
389
390 }
391
392};
393
394#endif
395
396
397}}
398
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T operator!=(T... args)