HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i32x4_sse2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/utility.hpp"
9#include "../macros.hpp"
10#include <array>
11#include <ostream>
12
13
14
15hi_warning_push();
16// Ignore "C26490: Don't use reinterpret_cast", needed for intrinsic loads and stores.
17hi_warning_ignore_msvc(26490);
18
19namespace hi { inline namespace v1 {
20
21#ifdef HI_HAS_SSE2
22
39template<>
40struct native_simd<int32_t, 4> {
41 using value_type = int32_t;
42 constexpr static size_t size = 4;
43 using register_type = __m128i;
44 using array_type = std::array<value_type, size>;
45
47
48 native_simd(native_simd const&) noexcept = default;
49 native_simd(native_simd&&) noexcept = default;
50 native_simd& operator=(native_simd const&) noexcept = default;
51 native_simd& operator=(native_simd&&) noexcept = default;
52
55 native_simd() noexcept : v(_mm_setzero_si128()) {}
56
57 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
58
59 [[nodiscard]] explicit operator register_type() const noexcept
60 {
61 return v;
62 }
63
71 [[nodiscard]] native_simd(
72 value_type a,
73 value_type b = value_type{0},
74 value_type c = value_type{0},
75 value_type d = value_type{0}) noexcept :
76 v(_mm_set_epi32(d, c, b, a))
77 {
78 }
79
80 [[nodiscard]] explicit native_simd(value_type const *other) noexcept :
81 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other)))
82 {
83 }
84
85 void store(value_type *out) const noexcept
86 {
87 hi_axiom_not_null(out);
88 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
89 }
90
91 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other)))
92 {
93 }
94
95 void store(void *out) const noexcept
96 {
97 hi_axiom_not_null(out);
98 _mm_storeu_si128(static_cast<register_type *>(out), v);
99 }
100
101 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
102 {
103 hi_axiom(other.size() >= size);
104 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
105 }
106
107 void store(std::span<value_type> out) const noexcept
108 {
109 hi_axiom(out.size() >= size);
110 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
111 }
112
113 [[nodiscard]] explicit native_simd(array_type other) noexcept :
114 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data())))
115 {
116 }
117
118 [[nodiscard]] explicit operator array_type() const noexcept
119 {
120 auto r = array_type{};
121 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
122 return r;
123 }
124
125 [[nodiscard]] explicit native_simd(native_simd<float, 4> const& a) noexcept;
126 [[nodiscard]] explicit native_simd(native_simd<uint32_t, 4> const& a) noexcept;
127#ifdef HI_HAS_AVX
128 [[nodiscard]] explicit native_simd(native_simd<double, 4> const& a) noexcept;
129#endif
130
140 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
141 {
142 return native_simd{_mm_set1_epi32(a)};
143 }
144
154 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
155 {
156#ifdef HI_HAS_AVX2
157 return native_simd{_mm_broadcastd_epi32(a.v)};
158#else
159 return native_simd{_mm_shuffle_epi32(a.v, 0b00'00'00'00)};
160#endif
161 }
162
163 [[nodiscard]] static native_simd ones() noexcept
164 {
165 hilet tmp = _mm_undefined_si128();
166 return native_simd{_mm_cmpeq_epi32(tmp, tmp)};
167 }
168
169 template<size_t Mask>
170 [[nodiscard]] static native_simd from_mask() noexcept
171 {
172 return native_simd{
173 to_bool(Mask & 0b0001) ? static_cast<value_type>(0xffff'ffff) : 0,
174 to_bool(Mask & 0b0010) ? static_cast<value_type>(0xffff'ffff) : 0,
175 to_bool(Mask & 0b0100) ? static_cast<value_type>(0xffff'ffff) : 0,
176 to_bool(Mask & 0b1000) ? static_cast<value_type>(0xffff'ffff) : 0};
177 }
178
181 [[nodiscard]] static native_simd from_mask(size_t a) noexcept
182 {
183 hi_axiom(a <= 0b1111);
184
185 uint64_t a_ = a;
186
187 a_ <<= 31;
189 a_ >>= 1;
191 a_ >>= 1;
193 a_ >>= 1;
195
196 tmp = _mm_srai_epi32(tmp, 31);
197 return native_simd{tmp};
198 }
199
202 [[nodiscard]] size_t mask() const noexcept
203 {
205 }
206
207 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
208 {
209 return (a == b).mask() == 0b1111;
210 }
211
212 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
213 {
214 return native_simd{_mm_cmpeq_epi32(a.v, b.v)};
215 }
216
217 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
218 {
219 return ~(a == b);
220 }
221
222 [[nodiscard]] friend native_simd operator<(native_simd a, native_simd b) noexcept
223 {
224 return native_simd{_mm_cmplt_epi32(a.v, b.v)};
225 }
226
227 [[nodiscard]] friend native_simd operator>(native_simd a, native_simd b) noexcept
228 {
229 return native_simd{_mm_cmpgt_epi32(a.v, b.v)};
230 }
231
232 [[nodiscard]] friend native_simd operator<=(native_simd a, native_simd b) noexcept
233 {
234 return ~(a > b);
235 }
236
237 [[nodiscard]] friend native_simd operator>=(native_simd a, native_simd b) noexcept
238 {
239 return ~(a < b);
240 }
241
242 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
243 {
244 return a;
245 }
246
247 [[nodiscard]] friend native_simd operator-(native_simd a) noexcept
248 {
249 return native_simd{} - a;
250 }
251
252 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
253 {
254 return native_simd{_mm_add_epi32(a.v, b.v)};
255 }
256
257 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
258 {
259 return native_simd{_mm_sub_epi32(a.v, b.v)};
260 }
261
262 [[nodiscard]] friend native_simd operator*(native_simd a, native_simd b) noexcept
263 {
264 return native_simd{_mm_mullo_epi32(a.v, b.v)};
265 }
266
267 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
268 {
269 return native_simd{_mm_and_si128(a.v, b.v)};
270 }
271
272 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
273 {
274 return native_simd{_mm_or_si128(a.v, b.v)};
275 }
276
277 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
278 {
279 return native_simd{_mm_xor_si128(a.v, b.v)};
280 }
281
282 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
283 {
284 auto ones = _mm_undefined_si128();
286 return native_simd{_mm_andnot_si128(a.v, ones)};
287 }
288
289 [[nodiscard]] friend native_simd operator<<(native_simd a, unsigned int b) noexcept
290 {
291 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
292 return native_simd{_mm_slli_epi32(a.v, b)};
293 }
294
295 [[nodiscard]] friend native_simd operator>>(native_simd a, unsigned int b) noexcept
296 {
297 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
298 return native_simd{_mm_srai_epi32(a.v, b)};
299 }
300
301 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
302 {
303#if HI_HAS_SSE4_1
304 return native_simd{_mm_min_epi32(a.v, b.v)};
305#else
306 hilet mask = a < b;
307 return (mask & a) | not_and(mask, b);
308#endif
309 }
310
311 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
312 {
313#if HI_HAS_SSE4_1
314 return native_simd{_mm_max_epi32(a.v, b.v)};
315#else
316 hilet mask = a > b;
317 return (mask & a) | not_and(mask, b);
318#endif
319 }
320
321 [[nodiscard]] friend native_simd abs(native_simd a) noexcept
322 {
323#if HI_HAS_SSSE3
324 return native_simd{_mm_abs_epi32(a.v)};
325#else
326 hilet mask = a >= native_simd{};
327 return (mask & a) | not_and(mask, -a);
328#endif
329 }
330
337 template<size_t Mask>
338 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
339 {
340 static_assert(Mask <= 0b1111);
341#ifdef HI_HAS_SSE4_1
342 return native_simd{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
343#else
344 hilet mask = from_mask<Mask>();
345 return not_and(mask, a);
346#endif
347 }
348
356 template<size_t Index>
357 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
358 {
359 static_assert(Index < 4);
360
361#ifdef HI_HAS_SSE4_1
362 return native_simd{_mm_insert_epi32(a.v, b, Index)};
363#else
364 hilet mask = from_mask<1_uz << Index>();
365 return not_and(mask, a) | (mask & broadcast(b));
366#endif
367 }
368
375 template<size_t Index>
376 [[nodiscard]] friend value_type get(native_simd a) noexcept
377 {
378#ifdef HI_HAS_SSE4_1
379 return _mm_extract_epi32(a.v, Index);
380#else
381 auto r = static_cast<array_type>(a);
382 return std::get<Index>(r);
383#endif
384 }
385
394 template<size_t Mask>
395 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
396 {
397#ifdef HI_HAS_SSE4_1
398 return native_simd{_mm_blend_epi32(a.v, b.v, Mask)};
399#else
400 hilet mask = from_mask<Mask>();
401 return not_and(mask, a) | (mask & b);
402#endif
403 }
404
417 template<fixed_string SourceElements>
418 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
419 {
420 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
421
422 if constexpr (order == 0b11'10'01'00) {
423 return a;
424 } else if constexpr (order == 0b00'00'00'00) {
425 return broadcast(a);
426 } else {
427 return native_simd{_mm_shuffle_epi32(a.v, order)};
428 }
429 }
430
447 template<fixed_string SourceElements>
448 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
449 {
450 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
451 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
452 constexpr auto number_mask = one_mask | zero_mask;
453
454 if constexpr (number_mask == 0b1111) {
455 // Swizzle was /[01][01][01][01]/.
457
458 } else if constexpr (number_mask == 0b0000) {
459 // Swizzle was /[^01][^01][^01][^01]/.
460 return permute<SourceElements>(a);
461
462#ifdef HI_HAS_SSE4_1
463 } else if constexpr (number_mask == zero_mask) {
464 // Swizzle was /[^1][^1][^1][^1]/.
467#endif
468
469 } else {
473 }
474 }
475
476#ifdef HI_HAS_SSE3
487 [[nodiscard]] friend native_simd horizontal_add(native_simd a, native_simd b) noexcept
488 {
489 return native_simd{_mm_hadd_epi32(a.v, b.v)};
490 }
491#endif
492
493#ifdef HI_HAS_SSE3
504 [[nodiscard]] friend native_simd horizontal_sub(native_simd a, native_simd b) noexcept
505 {
506 return native_simd{_mm_hsub_epi32(a.v, b.v)};
507 }
508#endif
509
516 [[nodiscard]] friend native_simd horizontal_sum(native_simd a) noexcept
517 {
518 auto tmp = a + permute<"cdab">(a);
519 return tmp + permute<"badc">(tmp);
520 }
521
532 template<size_t SourceMask>
533 [[nodiscard]] friend native_simd dot_product(native_simd a, native_simd b) noexcept
534 {
535 static_assert(SourceMask <= 0b1111);
537 }
538
544 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
545 {
546 return native_simd{_mm_andnot_si128(a.v, b.v)};
547 }
548
549 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
550 {
551 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
552 }
553
554 template<fixed_string SourceElements>
555 [[nodiscard]] static native_simd swizzle_numbers() noexcept
556 {
557 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
558 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
559 constexpr auto number_mask = one_mask | zero_mask;
560 constexpr auto alpha_mask = ~number_mask & 0b1111;
561
562 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
563 return native_simd{_mm_setzero_si128()};
564
565 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
566 return native_simd{_mm_set1_epi32(1)};
567
568 } else {
569 return native_simd{
570 to_bool(one_mask & 0b0001) ? 1 : 0,
571 to_bool(one_mask & 0b0010) ? 1 : 0,
572 to_bool(one_mask & 0b0100) ? 1 : 0,
573 to_bool(one_mask & 0b1000) ? 1 : 0};
574 }
575 }
576};
577
578#endif
579
580}} // namespace hi::v1
581
582hi_warning_pop();
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)