HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_u32x4_sse2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/utility.hpp"
9#include "../macros.hpp"
10#include <array>
11#include <ostream>
12
13
14
15hi_warning_push();
16// Ignore "C26490: Don't use reinterpret_cast", needed for intrinsic loads and stores.
17hi_warning_ignore_msvc(26490);
18
19namespace hi { inline namespace v1 {
20
21#ifdef HI_HAS_SSE2
22
39template<>
40struct native_simd<uint32_t,4> {
41 using value_type = uint32_t;
42 constexpr static size_t size = 4;
43 using register_type = __m128i;
44 using array_type = std::array<value_type, size>;
45
47
48 native_simd(native_simd const&) noexcept = default;
49 native_simd(native_simd&&) noexcept = default;
50 native_simd& operator=(native_simd const&) noexcept = default;
51 native_simd& operator=(native_simd&&) noexcept = default;
52
55 native_simd() noexcept : v(_mm_setzero_si128()) {}
56
57 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
58
59 [[nodiscard]] explicit operator register_type() const noexcept
60 {
61 return v;
62 }
63
71 [[nodiscard]] native_simd(
72 value_type a,
73 value_type b = value_type{0},
74 value_type c = value_type{0},
75 value_type d = value_type{0}) noexcept :
77 std::bit_cast<int32_t>(d),
78 std::bit_cast<int32_t>(c),
79 std::bit_cast<int32_t>(b),
80 std::bit_cast<int32_t>(a)))
81 {
82 }
83
84 [[nodiscard]] explicit native_simd(value_type const *other) noexcept :
85 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other)))
86 {
87 }
88
89 void store(value_type *out) const noexcept
90 {
91 hi_axiom_not_null(out);
92 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
93 }
94
95 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other)))
96 {
97 }
98
99 void store(void *out) const noexcept
100 {
101 hi_axiom_not_null(out);
102 _mm_storeu_si128(static_cast<register_type *>(out), v);
103 }
104
105 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
106 {
107 hi_axiom(other.size() >= size);
108 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
109 }
110
111 void store(std::span<value_type> out) const noexcept
112 {
113 hi_axiom(out.size() >= size);
114 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
115 }
116
117 [[nodiscard]] explicit native_simd(array_type other) noexcept :
118 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data())))
119 {
120 }
121
122 [[nodiscard]] explicit operator array_type() const noexcept
123 {
124 auto r = array_type{};
125 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
126 return r;
127 }
128
129 [[nodiscard]] explicit native_simd(native_simd<int32_t,4> const &a) noexcept;
130
140 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
141 {
142 return native_simd{_mm_set1_epi32(std::bit_cast<int32_t>(a))};
143 }
144
154 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
155 {
156#ifdef HI_HAS_AVX2
157 return native_simd{_mm_broadcastd_epi32(a.v)};
158#else
159 return native_simd{_mm_shuffle_epi32(a.v, 0b00'00'00'00)};
160#endif
161 }
162
163 [[nodiscard]] static native_simd ones() noexcept
164 {
165 hilet tmp = _mm_undefined_si128();
166 return native_simd{_mm_cmpeq_epi32(tmp, tmp)};
167 }
168
171 [[nodiscard]] static native_simd from_mask(size_t mask) noexcept
172 {
173 hi_axiom(mask <= 0b1111);
174
175 constexpr auto ones_ = std::bit_cast<value_type>(0xffff'ffffU);
176 return native_simd{
177 mask & 0b0001 ? ones_ : 0, mask & 0b0010 ? ones_ : 0, mask & 0b0100 ? ones_ : 0, mask & 0b1000 ? ones_ : 0};
178 }
179
182 [[nodiscard]] size_t mask() const noexcept
183 {
185 }
186
187 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
188 {
189 return (a == b).mask() == 0b1111;
190 }
191
192 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
193 {
194 return native_simd{_mm_cmpeq_epi32(a.v, b.v)};
195 }
196
197 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
198 {
199 return ~(a == b);
200 }
201
202 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
203 {
204 return a;
205 }
206
207 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
208 {
209 return native_simd{_mm_add_epi32(a.v, b.v)};
210 }
211
212 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
213 {
214 return native_simd{_mm_sub_epi32(a.v, b.v)};
215 }
216
217 [[nodiscard]] friend native_simd operator*(native_simd a, native_simd b) noexcept
218 {
219 return native_simd{_mm_mullo_epi32(a.v, b.v)};
220 }
221
222 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
223 {
224 return native_simd{_mm_and_si128(a.v, b.v)};
225 }
226
227 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
228 {
229 return native_simd{_mm_or_si128(a.v, b.v)};
230 }
231
232 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
233 {
234 return native_simd{_mm_xor_si128(a.v, b.v)};
235 }
236
237 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
238 {
239 auto ones = _mm_undefined_si128();
241 return native_simd{_mm_andnot_si128(a.v, ones)};
242 }
243
244 [[nodiscard]] friend native_simd operator<<(native_simd a, unsigned int b) noexcept
245 {
246 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
247 return native_simd{_mm_slli_epi32(a.v, b)};
248 }
249
250 [[nodiscard]] friend native_simd operator>>(native_simd a, unsigned int b) noexcept
251 {
252 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
253 return native_simd{_mm_srli_epi32(a.v, b)};
254 }
255
256 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
257 {
258 return native_simd{_mm_min_epu32(a.v, b.v)};
259 }
260
261 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
262 {
263 return native_simd{_mm_max_epu32(a.v, b.v)};
264 }
265
272 template<size_t Mask>
273 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
274 {
275 static_assert(Mask <= 0b1111);
276#ifdef HI_HAS_SSE4_1
277 return native_simd{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
278#else
279 hilet mask = from_mask(Mask);
280 return not_and(mask, a);
281#endif
282 }
283
291 template<size_t Index>
292 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
293 {
294 static_assert(Index < 4);
295
296#ifdef HI_HAS_SSE4_1
297 return native_simd{_mm_insert_epi32(a.v, std::bit_cast<int32_t>(b), Index)};
298#else
299 hilet mask = from_mask(1_uz << Index);
300 return not_and(mask, a) | (mask & broadcast(b));
301#endif
302 }
303
310 template<size_t Index>
311 [[nodiscard]] friend value_type get(native_simd a) noexcept
312 {
313#ifdef HI_HAS_SSE4_1
314 return std::bit_cast<value_type>(_mm_extract_epi32(a.v, Index));
315#else
316 auto r = static_cast<array_type>(a);
317 return std::get<Index>(r);
318#endif
319 }
320
329 template<size_t Mask>
330 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
331 {
332#ifdef HI_HAS_SSE4_1
333 return native_simd{_mm_blend_epi32(a.v, b.v, Mask)};
334#else
335 hilet mask = from_mask(Mask);
336 return not_and(mask, a) | (mask & b);
337#endif
338 }
339
352 template<fixed_string SourceElements>
353 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
354 {
355 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
356
357 if constexpr (order == 0b11'10'01'00) {
358 return a;
359 } else if constexpr (order == 0b00'00'00'00) {
360 return broadcast(a);
361 } else {
362 return native_simd{_mm_shuffle_epi32(a.v, order)};
363 }
364 }
365
382 template<fixed_string SourceElements>
383 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
384 {
385 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
386 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
387 constexpr auto number_mask = one_mask | zero_mask;
388
389 if constexpr (number_mask == 0b1111) {
390 // Swizzle was /[01][01][01][01]/.
392
393 } else if constexpr (number_mask == 0b0000) {
394 // Swizzle was /[^01][^01][^01][^01]/.
395 return permute<SourceElements>(a);
396
397#ifdef HI_HAS_SSE4_1
398 } else if constexpr (number_mask == zero_mask) {
399 // Swizzle was /[^1][^1][^1][^1]/.
402#endif
403
404 } else {
408 }
409 }
410
411#ifdef HI_HAS_SSE3
422 [[nodiscard]] friend native_simd horizontal_add(native_simd a, native_simd b) noexcept
423 {
424 return native_simd{_mm_hadd_epi32(a.v, b.v)};
425 }
426#endif
427
428#ifdef HI_HAS_SSE3
439 [[nodiscard]] friend native_simd horizontal_sub(native_simd a, native_simd b) noexcept
440 {
441 return native_simd{_mm_hsub_epi32(a.v, b.v)};
442 }
443#endif
444
451 [[nodiscard]] friend native_simd horizontal_sum(native_simd a) noexcept
452 {
453 hilet tmp = a + permute<"cdab">(a);
454 return tmp + permute<"badc">(tmp);
455 }
456
462 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
463 {
464 return native_simd{_mm_andnot_si128(a.v, b.v)};
465 }
466
467 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
468 {
469 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
470 }
471
472 template<fixed_string SourceElements>
473 [[nodiscard]] static native_simd swizzle_numbers() noexcept
474 {
475 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
476 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
477 constexpr auto number_mask = one_mask | zero_mask;
478 constexpr auto alpha_mask = ~number_mask & 0b1111;
479
480 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
481 return native_simd{_mm_setzero_si128()};
482
483 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
484 return native_simd{_mm_set1_epi32(1)};
485
486 } else {
487 return native_simd{
488 to_bool(one_mask & 0b0001) ? 1 : 0,
489 to_bool(one_mask & 0b0010) ? 1 : 0,
490 to_bool(one_mask & 0b0100) ? 1 : 0,
491 to_bool(one_mask & 0b1000) ? 1 : 0};
492 }
493 }
494};
495
496#endif
497
498}} // namespace hi::v1
499
500hi_warning_pop();
@ other
The gui_event does not have associated data.
STL namespace.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)