HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i32x4_sse2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
9#include <array>
10#include <ostream>
11
12hi_warning_push();
13// Ignore "C26490: Don't use reinterpret_cast", needed for intrinsic loads and stores.
14hi_warning_ignore_msvc(26490);
15
16namespace hi { inline namespace v1 {
17
18#ifdef HI_HAS_SSE2
19
36template<>
37struct native_simd<int32_t, 4> {
38 using value_type = int32_t;
39 constexpr static size_t size = 4;
40 using register_type = __m128i;
41 using array_type = std::array<value_type, size>;
42
43 register_type v;
44
45 native_simd(native_simd const&) noexcept = default;
46 native_simd(native_simd&&) noexcept = default;
47 native_simd& operator=(native_simd const&) noexcept = default;
48 native_simd& operator=(native_simd&&) noexcept = default;
49
52 native_simd() noexcept : v(_mm_setzero_si128()) {}
53
54 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
55
56 [[nodiscard]] explicit operator register_type() const noexcept
57 {
58 return v;
59 }
60
68 [[nodiscard]] native_simd(
69 value_type a,
70 value_type b = value_type{0},
71 value_type c = value_type{0},
72 value_type d = value_type{0}) noexcept :
73 v(_mm_set_epi32(d, c, b, a))
74 {
75 }
76
77 [[nodiscard]] explicit native_simd(value_type const *other) noexcept :
78 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other)))
79 {
80 }
81
82 void store(value_type *out) const noexcept
83 {
85 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
86 }
87
88 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other)))
89 {
90 }
91
92 void store(void *out) const noexcept
93 {
95 _mm_storeu_si128(static_cast<register_type *>(out), v);
96 }
97
98 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
99 {
100 hi_axiom(other.size() >= size);
101 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
102 }
103
104 void store(std::span<value_type> out) const noexcept
105 {
106 hi_axiom(out.size() >= size);
107 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
108 }
109
110 [[nodiscard]] explicit native_simd(array_type other) noexcept :
111 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data())))
112 {
113 }
114
115 [[nodiscard]] explicit operator array_type() const noexcept
116 {
117 auto r = array_type{};
118 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
119 return r;
120 }
121
122 [[nodiscard]] explicit native_simd(native_simd<float, 4> const& a) noexcept;
123 [[nodiscard]] explicit native_simd(native_simd<uint32_t, 4> const& a) noexcept;
124#ifdef HI_HAS_AVX
125 [[nodiscard]] explicit native_simd(native_simd<double, 4> const& a) noexcept;
126#endif
127
137 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
138 {
139 return native_simd{_mm_set1_epi32(a)};
140 }
141
151 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
152 {
153#ifdef HI_HAS_AVX2
154 return native_simd{_mm_broadcastd_epi32(a.v)};
155#else
156 return native_simd{_mm_shuffle_epi32(a.v, 0b00'00'00'00)};
157#endif
158 }
159
160 [[nodiscard]] static native_simd ones() noexcept
161 {
162 hilet tmp = _mm_undefined_si128();
163 return native_simd{_mm_cmpeq_epi32(tmp, tmp)};
164 }
165
166 template<size_t Mask>
167 [[nodiscard]] static native_simd from_mask() noexcept
168 {
169 return native_simd{
170 to_bool(Mask & 0b0001) ? static_cast<value_type>(0xffff'ffff) : 0,
171 to_bool(Mask & 0b0010) ? static_cast<value_type>(0xffff'ffff) : 0,
172 to_bool(Mask & 0b0100) ? static_cast<value_type>(0xffff'ffff) : 0,
173 to_bool(Mask & 0b1000) ? static_cast<value_type>(0xffff'ffff) : 0};
174 }
175
178 [[nodiscard]] static native_simd from_mask(size_t a) noexcept
179 {
180 hi_axiom(a <= 0b1111);
181
182 uint64_t a_ = a;
183
184 a_ <<= 31;
185 auto tmp = _mm_cvtsi32_si128(truncate<uint32_t>(a_));
186 a_ >>= 1;
187 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 1);
188 a_ >>= 1;
189 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 2);
190 a_ >>= 1;
191 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 3);
192
193 tmp = _mm_srai_epi32(tmp, 31);
194 return native_simd{tmp};
195 }
196
199 [[nodiscard]] size_t mask() const noexcept
200 {
201 return narrow_cast<size_t>(_mm_movemask_ps(_mm_castsi128_ps(v)));
202 }
203
204 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
205 {
206 return (a == b).mask() == 0b1111;
207 }
208
209 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
210 {
211 return native_simd{_mm_cmpeq_epi32(a.v, b.v)};
212 }
213
214 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
215 {
216 return ~(a == b);
217 }
218
219 [[nodiscard]] friend native_simd operator<(native_simd a, native_simd b) noexcept
220 {
221 return native_simd{_mm_cmplt_epi32(a.v, b.v)};
222 }
223
224 [[nodiscard]] friend native_simd operator>(native_simd a, native_simd b) noexcept
225 {
226 return native_simd{_mm_cmpgt_epi32(a.v, b.v)};
227 }
228
229 [[nodiscard]] friend native_simd operator<=(native_simd a, native_simd b) noexcept
230 {
231 return ~(a > b);
232 }
233
234 [[nodiscard]] friend native_simd operator>=(native_simd a, native_simd b) noexcept
235 {
236 return ~(a < b);
237 }
238
239 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
240 {
241 return a;
242 }
243
244 [[nodiscard]] friend native_simd operator-(native_simd a) noexcept
245 {
246 return native_simd{} - a;
247 }
248
249 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
250 {
251 return native_simd{_mm_add_epi32(a.v, b.v)};
252 }
253
254 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
255 {
256 return native_simd{_mm_sub_epi32(a.v, b.v)};
257 }
258
259 [[nodiscard]] friend native_simd operator*(native_simd a, native_simd b) noexcept
260 {
261 return native_simd{_mm_mullo_epi32(a.v, b.v)};
262 }
263
264 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
265 {
266 return native_simd{_mm_and_si128(a.v, b.v)};
267 }
268
269 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
270 {
271 return native_simd{_mm_or_si128(a.v, b.v)};
272 }
273
274 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
275 {
276 return native_simd{_mm_xor_si128(a.v, b.v)};
277 }
278
279 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
280 {
281 auto ones = _mm_undefined_si128();
282 ones = _mm_cmpeq_epi32(ones, ones);
283 return native_simd{_mm_andnot_si128(a.v, ones)};
284 }
285
286 [[nodiscard]] friend native_simd operator<<(native_simd a, unsigned int b) noexcept
287 {
288 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
289 return native_simd{_mm_slli_epi32(a.v, b)};
290 }
291
292 [[nodiscard]] friend native_simd operator>>(native_simd a, unsigned int b) noexcept
293 {
294 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
295 return native_simd{_mm_srai_epi32(a.v, b)};
296 }
297
298 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
299 {
300#if HI_HAS_SSE4_1
301 return native_simd{_mm_min_epi32(a.v, b.v)};
302#else
303 hilet mask = a < b;
304 return (mask & a) | not_and(mask, b);
305#endif
306 }
307
308 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
309 {
310#if HI_HAS_SSE4_1
311 return native_simd{_mm_max_epi32(a.v, b.v)};
312#else
313 hilet mask = a > b;
314 return (mask & a) | not_and(mask, b);
315#endif
316 }
317
318 [[nodiscard]] friend native_simd abs(native_simd a) noexcept
319 {
320#if HI_HAS_SSSE3
321 return native_simd{_mm_abs_epi32(a.v)};
322#else
323 hilet mask = a >= native_simd{};
324 return (mask & a) | not_and(mask, -a);
325#endif
326 }
327
334 template<size_t Mask>
335 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
336 {
337 static_assert(Mask <= 0b1111);
338#ifdef HI_HAS_SSE4_1
339 return native_simd{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
340#else
341 hilet mask = from_mask<Mask>();
342 return not_and(mask, a);
343#endif
344 }
345
353 template<size_t Index>
354 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
355 {
356 static_assert(Index < 4);
357
358#ifdef HI_HAS_SSE4_1
359 return native_simd{_mm_insert_epi32(a.v, b, Index)};
360#else
361 hilet mask = from_mask<1_uz << Index>();
362 return not_and(mask, a) | (mask & broadcast(b));
363#endif
364 }
365
372 template<size_t Index>
373 [[nodiscard]] friend value_type get(native_simd a) noexcept
374 {
375#ifdef HI_HAS_SSE4_1
376 return _mm_extract_epi32(a.v, Index);
377#else
378 auto r = static_cast<array_type>(a);
379 return std::get<Index>(r);
380#endif
381 }
382
391 template<size_t Mask>
392 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
393 {
394#ifdef HI_HAS_SSE4_1
395 return native_simd{_mm_blend_epi32(a.v, b.v, Mask)};
396#else
397 hilet mask = from_mask<Mask>();
398 return not_and(mask, a) | (mask & b);
399#endif
400 }
401
414 template<fixed_string SourceElements>
415 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
416 {
417 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
418
419 if constexpr (order == 0b11'10'01'00) {
420 return a;
421 } else if constexpr (order == 0b00'00'00'00) {
422 return broadcast(a);
423 } else {
424 return native_simd{_mm_shuffle_epi32(a.v, order)};
425 }
426 }
427
444 template<fixed_string SourceElements>
445 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
446 {
447 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
448 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
449 constexpr auto number_mask = one_mask | zero_mask;
450
451 if constexpr (number_mask == 0b1111) {
452 // Swizzle was /[01][01][01][01]/.
453 return swizzle_numbers<SourceElements>();
454
455 } else if constexpr (number_mask == 0b0000) {
456 // Swizzle was /[^01][^01][^01][^01]/.
457 return permute<SourceElements>(a);
458
459#ifdef HI_HAS_SSE4_1
460 } else if constexpr (number_mask == zero_mask) {
461 // Swizzle was /[^1][^1][^1][^1]/.
462 hilet ordered = permute<SourceElements>(a);
463 return set_zero<zero_mask>(ordered);
464#endif
465
466 } else {
467 hilet ordered = permute<SourceElements>(a);
468 hilet numbers = swizzle_numbers<SourceElements>();
469 return blend<number_mask>(ordered, numbers);
470 }
471 }
472
473#ifdef HI_HAS_SSE3
484 [[nodiscard]] friend native_simd horizontal_add(native_simd a, native_simd b) noexcept
485 {
486 return native_simd{_mm_hadd_epi32(a.v, b.v)};
487 }
488#endif
489
490#ifdef HI_HAS_SSE3
501 [[nodiscard]] friend native_simd horizontal_sub(native_simd a, native_simd b) noexcept
502 {
503 return native_simd{_mm_hsub_epi32(a.v, b.v)};
504 }
505#endif
506
513 [[nodiscard]] friend native_simd horizontal_sum(native_simd a) noexcept
514 {
515 auto tmp = a + permute<"cdab">(a);
516 return tmp + permute<"badc">(tmp);
517 }
518
529 template<size_t SourceMask>
530 [[nodiscard]] friend native_simd dot_product(native_simd a, native_simd b) noexcept
531 {
532 static_assert(SourceMask <= 0b1111);
533 return horizontal_sum(set_zero<~SourceMask & 0b1111>(a * b));
534 }
535
541 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
542 {
543 return native_simd{_mm_andnot_si128(a.v, b.v)};
544 }
545
546 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
547 {
548 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
549 }
550
551 template<fixed_string SourceElements>
552 [[nodiscard]] static native_simd swizzle_numbers() noexcept
553 {
554 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
555 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
556 constexpr auto number_mask = one_mask | zero_mask;
557 constexpr auto alpha_mask = ~number_mask & 0b1111;
558
559 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
560 return native_simd{_mm_setzero_si128()};
561
562 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
563 return native_simd{_mm_set1_epi32(1)};
564
565 } else {
566 return native_simd{
567 to_bool(one_mask & 0b0001) ? 1 : 0,
568 to_bool(one_mask & 0b0010) ? 1 : 0,
569 to_bool(one_mask & 0b0100) ? 1 : 0,
570 to_bool(one_mask & 0b1000) ? 1 : 0};
571 }
572 }
573};
574
575#endif
576
577}} // namespace hi::v1
578
579hi_warning_pop();
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:249
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)