HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i64x4_avx2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/utility.hpp"
9#include "../macros.hpp"
10#include <span>
11#include <array>
12#include <ostream>
13
14
15
16namespace hi { inline namespace v1 {
17
18#ifdef HI_HAS_AVX2
19
36template<>
37struct native_simd<int64_t,4> {
38 using value_type = int64_t;
39 constexpr static size_t size = 4;
40 using array_type = std::array<value_type, size>;
41 using register_type = __m256i;
42
44
45 native_simd(native_simd const&) noexcept = default;
46 native_simd(native_simd&&) noexcept = default;
47 native_simd& operator=(native_simd const&) noexcept = default;
48 native_simd& operator=(native_simd&&) noexcept = default;
49
52 native_simd() noexcept : v(_mm256_setzero_si256()) {}
53
54 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
55
56 [[nodiscard]] explicit operator register_type() const noexcept
57 {
58 return v;
59 }
60
68 [[nodiscard]] native_simd(
69 value_type a,
70 value_type b = value_type{0},
71 value_type c = value_type{0},
72 value_type d = value_type{0}) noexcept :
73 v(_mm256_set_epi64x(d, c, b, a))
74 {
75 }
76
77 [[nodiscard]] explicit native_simd(value_type const *other) noexcept :
78 v(_mm256_loadu_si256(reinterpret_cast<register_type const *>(other)))
79 {
80 }
81
82 void store(value_type *out) const noexcept
83 {
84 hi_axiom_not_null(out);
85 _mm256_storeu_si256(reinterpret_cast<register_type *>(out), v);
86 }
87
88 [[nodiscard]] explicit native_simd(void const *other) noexcept :
89 v(_mm256_loadu_si256(static_cast<register_type const *>(other)))
90 {
91 }
92
93 void store(void *out) const noexcept
94 {
95 hi_axiom_not_null(out);
96 _mm256_storeu_si256(static_cast<register_type *>(out), v);
97 }
98
99 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
100 {
101 hi_axiom(other.size() >= 4);
102 v = _mm256_loadu_si256(reinterpret_cast<register_type const *>(other.data()));
103 }
104
105 void store(std::span<value_type> out) const noexcept
106 {
107 hi_axiom(out.size() >= 4);
108 _mm256_storeu_si256(reinterpret_cast<register_type *>(out.data()), v);
109 }
110
111 [[nodiscard]] explicit native_simd(array_type other) noexcept :
112 v(_mm256_loadu_si256(reinterpret_cast<register_type const *>(other.data())))
113 {
114 }
115
116 [[nodiscard]] explicit operator array_type() const noexcept
117 {
118 auto r = array_type{};
119 _mm256_storeu_si256(reinterpret_cast<register_type *>(r.data()), v);
120 return r;
121 }
122
123 [[nodiscard]] explicit native_simd(native_simd<int32_t,4> const& a) noexcept;
124 [[nodiscard]] explicit native_simd(native_simd<uint32_t,4> const& a) noexcept;
125
135 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
136 {
137 return native_simd{_mm256_set1_epi64x(a)};
138 }
139
149 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
150 {
151 return native_simd{_mm256_permute4x64_epi64(a.v, 0b00'00'00'00)};
152 }
153
156 [[nodiscard]] static native_simd ones() noexcept
157 {
160 return native_simd{ones};
161 }
162
163 [[nodiscard]] static native_simd from_mask(size_t a) noexcept
164 {
165 hi_axiom(a <= 0b1111);
166
167 uint64_t a_ = a;
168
169 a_ <<= 31;
171 a_ >>= 1;
173 a_ >>= 1;
175 a_ >>= 1;
177
178 tmp = _mm_srai_epi32(tmp, 31);
179 return native_simd{_mm256_cvtepi32_epi64(tmp)};
180 }
181
184 [[nodiscard]] size_t mask() const noexcept
185 {
187 }
188
195 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
196 {
197 return (a == b).mask() == 0b1111;
198 }
199
200 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
201 {
202 return native_simd{_mm256_cmpeq_epi64(a.v, b.v)};
203 }
204
205 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
206 {
207 return ~(a == b);
208 }
209
210 [[nodiscard]] friend native_simd operator<(native_simd a, native_simd b) noexcept
211 {
212 return native_simd{_mm256_cmpgt_epi64(b.v, a.v)};
213 }
214
215 [[nodiscard]] friend native_simd operator>(native_simd a, native_simd b) noexcept
216 {
217 return native_simd{_mm256_cmpgt_epi64(a.v, b.v)};
218 }
219
220 [[nodiscard]] friend native_simd operator<=(native_simd a, native_simd b) noexcept
221 {
222 return ~(a > b);
223 }
224
225 [[nodiscard]] friend native_simd operator>=(native_simd a, native_simd b) noexcept
226 {
227 return ~(a < b);
228 }
229
230 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
231 {
232 return a;
233 }
234
235 [[nodiscard]] friend native_simd operator-(native_simd a) noexcept
236 {
237 return native_simd{} - a;
238 }
239
240 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
241 {
242 return native_simd{_mm256_add_epi64(a.v, b.v)};
243 }
244
245 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
246 {
247 return native_simd{_mm256_sub_epi64(a.v, b.v)};
248 }
249
250 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
251 {
252 return native_simd{_mm256_and_si256(a.v, b.v)};
253 }
254
255 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
256 {
257 return native_simd{_mm256_or_si256(a.v, b.v)};
258 }
259
260 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
261 {
262 return native_simd{_mm256_xor_si256(a.v, b.v)};
263 }
264
265 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
266 {
267 return not_and(a, ones());
268 }
269
270 [[nodiscard]] friend native_simd operator<<(native_simd a, unsigned int b) noexcept
271 {
272 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
273 return native_simd{_mm256_slli_epi64(a.v, b)};
274 }
275
276 [[nodiscard]] friend native_simd operator>>(native_simd a, unsigned int b) noexcept
277 {
278 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
279
280#ifdef HI_HAS_AVX512F
281 return native_simd{_mm256_srai_epi64(a.v, b)};
282
283#else
284 hilet shifted_value = _mm256_srli_epi64(a.v, b);
285 hilet zero = _mm256_setzero_si256();
287 hilet shifted_ones = _mm256_slli_epi64(ones, 63 - b);
288 hilet is_negative = _mm256_cmpgt_epi64(zero, a.v);
291#endif
292 }
293
294 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
295 {
296 hilet mask = a < b;
297 return (mask & a) | not_and(mask, b);
298 }
299
300 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
301 {
302 hilet mask = a > b;
303 return (mask & a) | not_and(mask, b);
304 }
305
306 [[nodiscard]] friend native_simd abs(native_simd a) noexcept
307 {
308 hilet mask = a >= native_simd{};
309 return (mask & a) | not_and(mask, -a);
310 }
311
318 template<size_t Mask>
319 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
320 {
321 static_assert(Mask <= 0b1111);
322
323 return blend<Mask>(a, native_simd{});
324 }
325
333 template<size_t Index>
334 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
335 {
336 static_assert(Index < 4);
337 return blend<1_uz << Index>(a, broadcast(b));
338 }
339
346 template<size_t Index>
347 [[nodiscard]] friend value_type get(native_simd a) noexcept
348 {
349 static_assert(Index < size);
350
351 return _mm256_extract_epi64(a.v, Index);
352 }
353
362 template<size_t Mask>
363 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
364 {
365 static_assert(Mask <= 0b1111);
366
367 if constexpr (Mask == 0b0000) {
368 return a;
369 } else if constexpr (Mask == 0b1111) {
370 return b;
371 } else {
372 // clang-format off
373 constexpr auto dmask =
374 (Mask & 0b0001) | ((Mask & 0b0001) << 1) |
375 ((Mask & 0b0010) << 1) | ((Mask & 0b0010) << 2) |
376 ((Mask & 0b0100) << 2) | ((Mask & 0b0100) << 3) |
377 ((Mask & 0b1000) << 3) | ((Mask & 0b1000) << 4);
378 // clang-format on
379 return native_simd{_mm256_blend_epi32(a.v, b.v, dmask)};
380 }
381 }
382
395 template<fixed_string SourceElements>
396 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
397 {
398 static_assert(SourceElements.size() == size);
399 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
400
401 if constexpr (order == 0b11'10'01'00) {
402 return a;
403 } else {
404 return native_simd{_mm256_permute4x64_epi64(a.v, order)};
405 }
406 }
407
424 template<fixed_string SourceElements>
425 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
426 {
427 static_assert(SourceElements.size() == size);
428 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
429 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
430 constexpr auto number_mask = one_mask | zero_mask;
431
432 if constexpr (number_mask == 0b1111) {
433 // Swizzle was /[01][01][01][01]/.
435
436 } else if constexpr (number_mask == 0b0000) {
437 // Swizzle was /[^01][^01][^01][^01]/.
438 return permute<SourceElements>(a);
439
440#ifdef HI_HAS_SSE4_1
441 } else if constexpr (number_mask == zero_mask) {
442 // Swizzle was /[^1][^1][^1][^1]/.
445#endif
446
447 } else {
451 }
452 }
453
459 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
460 {
461 return native_simd{_mm256_andnot_si256(a.v, b.v)};
462 }
463
464 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
465 {
466 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
467 }
468
469 template<fixed_string SourceElements>
470 [[nodiscard]] static native_simd swizzle_numbers() noexcept
471 {
472 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
473 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
474 constexpr auto number_mask = one_mask | zero_mask;
475 constexpr auto alpha_mask = ~number_mask & 0b1111;
476
477 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
478 return {};
479
480 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
481 return broadcast(1);
482
483 } else {
484 return native_simd{
485 to_bool(one_mask & 0b0001) ? 1 : 0,
486 to_bool(one_mask & 0b0010) ? 1 : 0,
487 to_bool(one_mask & 0b0100) ? 1 : 0,
488 to_bool(one_mask & 0b1000) ? 1 : 0};
489 }
490 }
491};
492
493#endif
494
495}} // namespace hi::v1
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
@ zero
The number was zero, and this means something in the current language.
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)