HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_u32x4_sse2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
9#include <array>
10#include <ostream>
11
12hi_warning_push();
13// Ignore "C26490: Don't use reinterpret_cast", needed for intrinsic loads and stores.
14hi_warning_ignore_msvc(26490);
15
16namespace hi { inline namespace v1 {
17
18#ifdef HI_HAS_SSE2
19
36template<>
37struct native_simd<uint32_t,4> {
38 using value_type = uint32_t;
39 constexpr static size_t size = 4;
40 using register_type = __m128i;
41 using array_type = std::array<value_type, size>;
42
43 register_type v;
44
45 native_simd(native_simd const&) noexcept = default;
46 native_simd(native_simd&&) noexcept = default;
47 native_simd& operator=(native_simd const&) noexcept = default;
48 native_simd& operator=(native_simd&&) noexcept = default;
49
52 native_simd() noexcept : v(_mm_setzero_si128()) {}
53
54 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
55
56 [[nodiscard]] explicit operator register_type() const noexcept
57 {
58 return v;
59 }
60
68 [[nodiscard]] native_simd(
69 value_type a,
70 value_type b = value_type{0},
71 value_type c = value_type{0},
72 value_type d = value_type{0}) noexcept :
73 v(_mm_set_epi32(
74 std::bit_cast<int32_t>(d),
75 std::bit_cast<int32_t>(c),
76 std::bit_cast<int32_t>(b),
77 std::bit_cast<int32_t>(a)))
78 {
79 }
80
81 [[nodiscard]] explicit native_simd(value_type const *other) noexcept :
82 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other)))
83 {
84 }
85
86 void store(value_type *out) const noexcept
87 {
89 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
90 }
91
92 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other)))
93 {
94 }
95
96 void store(void *out) const noexcept
97 {
99 _mm_storeu_si128(static_cast<register_type *>(out), v);
100 }
101
102 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
103 {
104 hi_axiom(other.size() >= size);
105 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
106 }
107
108 void store(std::span<value_type> out) const noexcept
109 {
110 hi_axiom(out.size() >= size);
111 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
112 }
113
114 [[nodiscard]] explicit native_simd(array_type other) noexcept :
115 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data())))
116 {
117 }
118
119 [[nodiscard]] explicit operator array_type() const noexcept
120 {
121 auto r = array_type{};
122 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
123 return r;
124 }
125
126 [[nodiscard]] explicit native_simd(native_simd<int32_t,4> const &a) noexcept;
127
137 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
138 {
139 return native_simd{_mm_set1_epi32(std::bit_cast<int32_t>(a))};
140 }
141
151 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
152 {
153#ifdef HI_HAS_AVX2
154 return native_simd{_mm_broadcastd_epi32(a.v)};
155#else
156 return native_simd{_mm_shuffle_epi32(a.v, 0b00'00'00'00)};
157#endif
158 }
159
160 [[nodiscard]] static native_simd ones() noexcept
161 {
162 hilet tmp = _mm_undefined_si128();
163 return native_simd{_mm_cmpeq_epi32(tmp, tmp)};
164 }
165
168 [[nodiscard]] static native_simd from_mask(size_t mask) noexcept
169 {
170 hi_axiom(mask <= 0b1111);
171
172 constexpr auto ones_ = std::bit_cast<value_type>(0xffff'ffffU);
173 return native_simd{
174 mask & 0b0001 ? ones_ : 0, mask & 0b0010 ? ones_ : 0, mask & 0b0100 ? ones_ : 0, mask & 0b1000 ? ones_ : 0};
175 }
176
179 [[nodiscard]] size_t mask() const noexcept
180 {
181 return narrow_cast<size_t>(_mm_movemask_ps(_mm_castsi128_ps(v)));
182 }
183
184 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
185 {
186 return (a == b).mask() == 0b1111;
187 }
188
189 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
190 {
191 return native_simd{_mm_cmpeq_epi32(a.v, b.v)};
192 }
193
194 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
195 {
196 return ~(a == b);
197 }
198
199 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
200 {
201 return a;
202 }
203
204 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
205 {
206 return native_simd{_mm_add_epi32(a.v, b.v)};
207 }
208
209 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
210 {
211 return native_simd{_mm_sub_epi32(a.v, b.v)};
212 }
213
214 [[nodiscard]] friend native_simd operator*(native_simd a, native_simd b) noexcept
215 {
216 return native_simd{_mm_mullo_epi32(a.v, b.v)};
217 }
218
219 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
220 {
221 return native_simd{_mm_and_si128(a.v, b.v)};
222 }
223
224 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
225 {
226 return native_simd{_mm_or_si128(a.v, b.v)};
227 }
228
229 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
230 {
231 return native_simd{_mm_xor_si128(a.v, b.v)};
232 }
233
234 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
235 {
236 auto ones = _mm_undefined_si128();
237 ones = _mm_cmpeq_epi32(ones, ones);
238 return native_simd{_mm_andnot_si128(a.v, ones)};
239 }
240
241 [[nodiscard]] friend native_simd operator<<(native_simd a, unsigned int b) noexcept
242 {
243 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
244 return native_simd{_mm_slli_epi32(a.v, b)};
245 }
246
247 [[nodiscard]] friend native_simd operator>>(native_simd a, unsigned int b) noexcept
248 {
249 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
250 return native_simd{_mm_srli_epi32(a.v, b)};
251 }
252
253 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
254 {
255 return native_simd{_mm_min_epu32(a.v, b.v)};
256 }
257
258 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
259 {
260 return native_simd{_mm_max_epu32(a.v, b.v)};
261 }
262
269 template<size_t Mask>
270 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
271 {
272 static_assert(Mask <= 0b1111);
273#ifdef HI_HAS_SSE4_1
274 return native_simd{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
275#else
276 hilet mask = from_mask(Mask);
277 return not_and(mask, a);
278#endif
279 }
280
288 template<size_t Index>
289 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
290 {
291 static_assert(Index < 4);
292
293#ifdef HI_HAS_SSE4_1
294 return native_simd{_mm_insert_epi32(a.v, std::bit_cast<int32_t>(b), Index)};
295#else
296 hilet mask = from_mask(1_uz << Index);
297 return not_and(mask, a) | (mask & broadcast(b));
298#endif
299 }
300
307 template<size_t Index>
308 [[nodiscard]] friend value_type get(native_simd a) noexcept
309 {
310#ifdef HI_HAS_SSE4_1
311 return std::bit_cast<value_type>(_mm_extract_epi32(a.v, Index));
312#else
313 auto r = static_cast<array_type>(a);
314 return std::get<Index>(r);
315#endif
316 }
317
326 template<size_t Mask>
327 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
328 {
329#ifdef HI_HAS_SSE4_1
330 return native_simd{_mm_blend_epi32(a.v, b.v, Mask)};
331#else
332 hilet mask = from_mask(Mask);
333 return not_and(mask, a) | (mask & b);
334#endif
335 }
336
349 template<fixed_string SourceElements>
350 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
351 {
352 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
353
354 if constexpr (order == 0b11'10'01'00) {
355 return a;
356 } else if constexpr (order == 0b00'00'00'00) {
357 return broadcast(a);
358 } else {
359 return native_simd{_mm_shuffle_epi32(a.v, order)};
360 }
361 }
362
379 template<fixed_string SourceElements>
380 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
381 {
382 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
383 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
384 constexpr auto number_mask = one_mask | zero_mask;
385
386 if constexpr (number_mask == 0b1111) {
387 // Swizzle was /[01][01][01][01]/.
388 return swizzle_numbers<SourceElements>();
389
390 } else if constexpr (number_mask == 0b0000) {
391 // Swizzle was /[^01][^01][^01][^01]/.
392 return permute<SourceElements>(a);
393
394#ifdef HI_HAS_SSE4_1
395 } else if constexpr (number_mask == zero_mask) {
396 // Swizzle was /[^1][^1][^1][^1]/.
397 hilet ordered = permute<SourceElements>(a);
398 return set_zero<zero_mask>(ordered);
399#endif
400
401 } else {
402 hilet ordered = permute<SourceElements>(a);
403 hilet numbers = swizzle_numbers<SourceElements>();
404 return blend<number_mask>(ordered, numbers);
405 }
406 }
407
408#ifdef HI_HAS_SSE3
419 [[nodiscard]] friend native_simd horizontal_add(native_simd a, native_simd b) noexcept
420 {
421 return native_simd{_mm_hadd_epi32(a.v, b.v)};
422 }
423#endif
424
425#ifdef HI_HAS_SSE3
436 [[nodiscard]] friend native_simd horizontal_sub(native_simd a, native_simd b) noexcept
437 {
438 return native_simd{_mm_hsub_epi32(a.v, b.v)};
439 }
440#endif
441
448 [[nodiscard]] friend native_simd horizontal_sum(native_simd a) noexcept
449 {
450 hilet tmp = a + permute<"cdab">(a);
451 return tmp + permute<"badc">(tmp);
452 }
453
459 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
460 {
461 return native_simd{_mm_andnot_si128(a.v, b.v)};
462 }
463
464 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
465 {
466 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
467 }
468
469 template<fixed_string SourceElements>
470 [[nodiscard]] static native_simd swizzle_numbers() noexcept
471 {
472 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
473 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
474 constexpr auto number_mask = one_mask | zero_mask;
475 constexpr auto alpha_mask = ~number_mask & 0b1111;
476
477 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
478 return native_simd{_mm_setzero_si128()};
479
480 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
481 return native_simd{_mm_set1_epi32(1)};
482
483 } else {
484 return native_simd{
485 to_bool(one_mask & 0b0001) ? 1 : 0,
486 to_bool(one_mask & 0b0010) ? 1 : 0,
487 to_bool(one_mask & 0b0100) ? 1 : 0,
488 to_bool(one_mask & 0b1000) ? 1 : 0};
489 }
490 }
491};
492
493#endif
494
495}} // namespace hi::v1
496
497hi_warning_pop();
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:249
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
STL namespace.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)