HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i64x4_avx2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
9#include <span>
10#include <array>
11#include <ostream>
12
13namespace hi { inline namespace v1 {
14
15#ifdef HI_HAS_AVX2
16
33template<>
34struct native_simd<int64_t,4> {
35 using value_type = int64_t;
36 constexpr static size_t size = 4;
37 using array_type = std::array<value_type, size>;
38 using register_type = __m256i;
39
40 register_type v;
41
42 native_simd(native_simd const&) noexcept = default;
43 native_simd(native_simd&&) noexcept = default;
44 native_simd& operator=(native_simd const&) noexcept = default;
45 native_simd& operator=(native_simd&&) noexcept = default;
46
49 native_simd() noexcept : v(_mm256_setzero_si256()) {}
50
51 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
52
53 [[nodiscard]] explicit operator register_type() const noexcept
54 {
55 return v;
56 }
57
65 [[nodiscard]] native_simd(
66 value_type a,
67 value_type b = value_type{0},
68 value_type c = value_type{0},
69 value_type d = value_type{0}) noexcept :
70 v(_mm256_set_epi64x(d, c, b, a))
71 {
72 }
73
74 [[nodiscard]] explicit native_simd(value_type const *other) noexcept :
75 v(_mm256_loadu_si256(reinterpret_cast<register_type const *>(other)))
76 {
77 }
78
79 void store(value_type *out) const noexcept
80 {
82 _mm256_storeu_si256(reinterpret_cast<register_type *>(out), v);
83 }
84
85 [[nodiscard]] explicit native_simd(void const *other) noexcept :
86 v(_mm256_loadu_si256(static_cast<register_type const *>(other)))
87 {
88 }
89
90 void store(void *out) const noexcept
91 {
93 _mm256_storeu_si256(static_cast<register_type *>(out), v);
94 }
95
96 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
97 {
98 hi_axiom(other.size() >= 4);
99 v = _mm256_loadu_si256(reinterpret_cast<register_type const *>(other.data()));
100 }
101
102 void store(std::span<value_type> out) const noexcept
103 {
104 hi_axiom(out.size() >= 4);
105 _mm256_storeu_si256(reinterpret_cast<register_type *>(out.data()), v);
106 }
107
108 [[nodiscard]] explicit native_simd(array_type other) noexcept :
109 v(_mm256_loadu_si256(reinterpret_cast<register_type const *>(other.data())))
110 {
111 }
112
113 [[nodiscard]] explicit operator array_type() const noexcept
114 {
115 auto r = array_type{};
116 _mm256_storeu_si256(reinterpret_cast<register_type *>(r.data()), v);
117 return r;
118 }
119
120 [[nodiscard]] explicit native_simd(native_simd<int32_t,4> const& a) noexcept;
121 [[nodiscard]] explicit native_simd(native_simd<uint32_t,4> const& a) noexcept;
122
132 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
133 {
134 return native_simd{_mm256_set1_epi64x(a)};
135 }
136
146 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
147 {
148 return native_simd{_mm256_permute4x64_epi64(a.v, 0b00'00'00'00)};
149 }
150
153 [[nodiscard]] static native_simd ones() noexcept
154 {
155 auto ones = _mm256_undefined_si256();
156 ones = _mm256_cmpeq_epi32(ones, ones);
157 return native_simd{ones};
158 }
159
160 [[nodiscard]] static native_simd from_mask(size_t a) noexcept
161 {
162 hi_axiom(a <= 0b1111);
163
164 uint64_t a_ = a;
165
166 a_ <<= 31;
167 auto tmp = _mm_cvtsi32_si128(truncate<uint32_t>(a_));
168 a_ >>= 1;
169 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 1);
170 a_ >>= 1;
171 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 2);
172 a_ >>= 1;
173 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 3);
174
175 tmp = _mm_srai_epi32(tmp, 31);
176 return native_simd{_mm256_cvtepi32_epi64(tmp)};
177 }
178
181 [[nodiscard]] size_t mask() const noexcept
182 {
183 return narrow_cast<size_t>(_mm256_movemask_pd(_mm256_castsi256_pd(v)));
184 }
185
192 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
193 {
194 return (a == b).mask() == 0b1111;
195 }
196
197 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
198 {
199 return native_simd{_mm256_cmpeq_epi64(a.v, b.v)};
200 }
201
202 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
203 {
204 return ~(a == b);
205 }
206
207 [[nodiscard]] friend native_simd operator<(native_simd a, native_simd b) noexcept
208 {
209 return native_simd{_mm256_cmpgt_epi64(b.v, a.v)};
210 }
211
212 [[nodiscard]] friend native_simd operator>(native_simd a, native_simd b) noexcept
213 {
214 return native_simd{_mm256_cmpgt_epi64(a.v, b.v)};
215 }
216
217 [[nodiscard]] friend native_simd operator<=(native_simd a, native_simd b) noexcept
218 {
219 return ~(a > b);
220 }
221
222 [[nodiscard]] friend native_simd operator>=(native_simd a, native_simd b) noexcept
223 {
224 return ~(a < b);
225 }
226
227 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
228 {
229 return a;
230 }
231
232 [[nodiscard]] friend native_simd operator-(native_simd a) noexcept
233 {
234 return native_simd{} - a;
235 }
236
237 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
238 {
239 return native_simd{_mm256_add_epi64(a.v, b.v)};
240 }
241
242 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
243 {
244 return native_simd{_mm256_sub_epi64(a.v, b.v)};
245 }
246
247 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
248 {
249 return native_simd{_mm256_and_si256(a.v, b.v)};
250 }
251
252 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
253 {
254 return native_simd{_mm256_or_si256(a.v, b.v)};
255 }
256
257 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
258 {
259 return native_simd{_mm256_xor_si256(a.v, b.v)};
260 }
261
262 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
263 {
264 return not_and(a, ones());
265 }
266
267 [[nodiscard]] friend native_simd operator<<(native_simd a, unsigned int b) noexcept
268 {
269 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
270 return native_simd{_mm256_slli_epi64(a.v, b)};
271 }
272
273 [[nodiscard]] friend native_simd operator>>(native_simd a, unsigned int b) noexcept
274 {
275 hi_axiom_bounds(b, sizeof(value_type) * CHAR_BIT);
276
277#ifdef HI_HAS_AVX512F
278 return native_simd{_mm256_srai_epi64(a.v, b)};
279
280#else
281 hilet shifted_value = _mm256_srli_epi64(a.v, b);
282 hilet zero = _mm256_setzero_si256();
283 hilet ones = _mm256_cmpeq_epi64(zero, zero);
284 hilet shifted_ones = _mm256_slli_epi64(ones, 63 - b);
285 hilet is_negative = _mm256_cmpgt_epi64(zero, a.v);
286 hilet masked_shifted_ones = _mm256_and_si256(is_negative, shifted_ones);
287 return native_simd{_mm256_or_si256(shifted_value, masked_shifted_ones)};
288#endif
289 }
290
291 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
292 {
293 hilet mask = a < b;
294 return (mask & a) | not_and(mask, b);
295 }
296
297 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
298 {
299 hilet mask = a > b;
300 return (mask & a) | not_and(mask, b);
301 }
302
303 [[nodiscard]] friend native_simd abs(native_simd a) noexcept
304 {
305 hilet mask = a >= native_simd{};
306 return (mask & a) | not_and(mask, -a);
307 }
308
315 template<size_t Mask>
316 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
317 {
318 static_assert(Mask <= 0b1111);
319
320 return blend<Mask>(a, native_simd{});
321 }
322
330 template<size_t Index>
331 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
332 {
333 static_assert(Index < 4);
334 return blend<1_uz << Index>(a, broadcast(b));
335 }
336
343 template<size_t Index>
344 [[nodiscard]] friend value_type get(native_simd a) noexcept
345 {
346 static_assert(Index < size);
347
348 return _mm256_extract_epi64(a.v, Index);
349 }
350
359 template<size_t Mask>
360 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
361 {
362 static_assert(Mask <= 0b1111);
363
364 if constexpr (Mask == 0b0000) {
365 return a;
366 } else if constexpr (Mask == 0b1111) {
367 return b;
368 } else {
369 // clang-format off
370 constexpr auto dmask =
371 (Mask & 0b0001) | ((Mask & 0b0001) << 1) |
372 ((Mask & 0b0010) << 1) | ((Mask & 0b0010) << 2) |
373 ((Mask & 0b0100) << 2) | ((Mask & 0b0100) << 3) |
374 ((Mask & 0b1000) << 3) | ((Mask & 0b1000) << 4);
375 // clang-format on
376 return native_simd{_mm256_blend_epi32(a.v, b.v, dmask)};
377 }
378 }
379
392 template<fixed_string SourceElements>
393 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
394 {
395 static_assert(SourceElements.size() == size);
396 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
397
398 if constexpr (order == 0b11'10'01'00) {
399 return a;
400 } else {
401 return native_simd{_mm256_permute4x64_epi64(a.v, order)};
402 }
403 }
404
421 template<fixed_string SourceElements>
422 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
423 {
424 static_assert(SourceElements.size() == size);
425 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
426 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
427 constexpr auto number_mask = one_mask | zero_mask;
428
429 if constexpr (number_mask == 0b1111) {
430 // Swizzle was /[01][01][01][01]/.
431 return swizzle_numbers<SourceElements>();
432
433 } else if constexpr (number_mask == 0b0000) {
434 // Swizzle was /[^01][^01][^01][^01]/.
435 return permute<SourceElements>(a);
436
437#ifdef HI_HAS_SSE4_1
438 } else if constexpr (number_mask == zero_mask) {
439 // Swizzle was /[^1][^1][^1][^1]/.
440 hilet ordered = permute<SourceElements>(a);
441 return set_zero<zero_mask>(ordered);
442#endif
443
444 } else {
445 hilet ordered = permute<SourceElements>(a);
446 hilet numbers = swizzle_numbers<SourceElements>();
447 return blend<number_mask>(ordered, numbers);
448 }
449 }
450
456 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
457 {
458 return native_simd{_mm256_andnot_si256(a.v, b.v)};
459 }
460
461 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
462 {
463 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
464 }
465
466 template<fixed_string SourceElements>
467 [[nodiscard]] static native_simd swizzle_numbers() noexcept
468 {
469 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
470 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
471 constexpr auto number_mask = one_mask | zero_mask;
472 constexpr auto alpha_mask = ~number_mask & 0b1111;
473
474 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
475 return {};
476
477 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
478 return broadcast(1);
479
480 } else {
481 return native_simd{
482 to_bool(one_mask & 0b0001) ? 1 : 0,
483 to_bool(one_mask & 0b0010) ? 1 : 0,
484 to_bool(one_mask & 0b0100) ? 1 : 0,
485 to_bool(one_mask & 0b1000) ? 1 : 0};
486 }
487 }
488};
489
490#endif
491
492}} // namespace hi::v1
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:264
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:272
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
@ zero
The number was zero, and this means something in the current language.
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)