7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
13namespace hi {
inline namespace v1 {
34struct native_simd<int64_t,4> {
35 using value_type = int64_t;
36 constexpr static size_t size = 4;
38 using register_type = __m256i;
42 native_simd(native_simd
const&)
noexcept =
default;
43 native_simd(native_simd&&) noexcept = default;
44 native_simd& operator=(native_simd const&) noexcept = default;
45 native_simd& operator=(native_simd&&) noexcept = default;
49 native_simd() noexcept : v(_mm256_setzero_si256()) {}
51 [[nodiscard]]
explicit native_simd(register_type
other) noexcept : v(
other) {}
53 [[nodiscard]]
explicit operator register_type() const noexcept
65 [[nodiscard]] native_simd(
67 value_type b = value_type{0},
68 value_type c = value_type{0},
69 value_type d = value_type{0})
noexcept :
70 v(_mm256_set_epi64x(d, c, b, a))
74 [[nodiscard]]
explicit native_simd(value_type
const *
other) noexcept :
75 v(_mm256_loadu_si256(
reinterpret_cast<register_type
const *
>(
other)))
79 void store(value_type *out)
const noexcept
82 _mm256_storeu_si256(
reinterpret_cast<register_type *
>(out), v);
85 [[nodiscard]]
explicit native_simd(
void const *
other) noexcept :
86 v(_mm256_loadu_si256(
static_cast<register_type
const *
>(
other)))
90 void store(
void *out)
const noexcept
93 _mm256_storeu_si256(
static_cast<register_type *
>(out), v);
96 [[nodiscard]]
explicit native_simd(std::span<value_type const>
other)
noexcept
99 v = _mm256_loadu_si256(
reinterpret_cast<register_type
const *
>(
other.data()));
102 void store(std::span<value_type> out)
const noexcept
105 _mm256_storeu_si256(
reinterpret_cast<register_type *
>(out.data()), v);
108 [[nodiscard]]
explicit native_simd(array_type
other) noexcept :
109 v(_mm256_loadu_si256(
reinterpret_cast<register_type
const *
>(
other.data())))
113 [[nodiscard]]
explicit operator array_type() const noexcept
115 auto r = array_type{};
116 _mm256_storeu_si256(
reinterpret_cast<register_type *
>(r.data()), v);
120 [[nodiscard]]
explicit native_simd(native_simd<int32_t,4>
const& a)
noexcept;
121 [[nodiscard]]
explicit native_simd(native_simd<uint32_t,4>
const& a)
noexcept;
132 [[nodiscard]]
static native_simd broadcast(value_type a)
noexcept
134 return native_simd{_mm256_set1_epi64x(a)};
146 [[nodiscard]]
static native_simd broadcast(native_simd a)
noexcept
148 return native_simd{_mm256_permute4x64_epi64(a.v, 0b00'00'00'00)};
153 [[nodiscard]]
static native_simd ones() noexcept
155 auto ones = _mm256_undefined_si256();
156 ones = _mm256_cmpeq_epi32(ones, ones);
157 return native_simd{ones};
160 [[nodiscard]]
static native_simd from_mask(
size_t a)
noexcept
167 auto tmp = _mm_cvtsi32_si128(truncate<uint32_t>(a_));
169 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 1);
171 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 2);
173 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 3);
175 tmp = _mm_srai_epi32(tmp, 31);
176 return native_simd{_mm256_cvtepi32_epi64(tmp)};
181 [[nodiscard]]
size_t mask() const noexcept
183 return narrow_cast<size_t>(_mm256_movemask_pd(_mm256_castsi256_pd(v)));
192 [[nodiscard]]
friend bool equal(native_simd a, native_simd b)
noexcept
194 return (a == b).mask() == 0b1111;
197 [[nodiscard]]
friend native_simd operator==(native_simd a, native_simd b)
noexcept
199 return native_simd{_mm256_cmpeq_epi64(a.v, b.v)};
202 [[nodiscard]]
friend native_simd
operator!=(native_simd a, native_simd b)
noexcept
207 [[nodiscard]]
friend native_simd operator<(native_simd a, native_simd b)
noexcept
209 return native_simd{_mm256_cmpgt_epi64(b.v, a.v)};
212 [[nodiscard]]
friend native_simd
operator>(native_simd a, native_simd b)
noexcept
214 return native_simd{_mm256_cmpgt_epi64(a.v, b.v)};
217 [[nodiscard]]
friend native_simd
operator<=(native_simd a, native_simd b)
noexcept
222 [[nodiscard]]
friend native_simd
operator>=(native_simd a, native_simd b)
noexcept
227 [[nodiscard]]
friend native_simd operator+(native_simd a)
noexcept
232 [[nodiscard]]
friend native_simd operator-(native_simd a)
noexcept
234 return native_simd{} - a;
237 [[nodiscard]]
friend native_simd operator+(native_simd a, native_simd b)
noexcept
239 return native_simd{_mm256_add_epi64(a.v, b.v)};
242 [[nodiscard]]
friend native_simd operator-(native_simd a, native_simd b)
noexcept
244 return native_simd{_mm256_sub_epi64(a.v, b.v)};
247 [[nodiscard]]
friend native_simd operator&(native_simd a, native_simd b)
noexcept
249 return native_simd{_mm256_and_si256(a.v, b.v)};
252 [[nodiscard]]
friend native_simd operator|(native_simd a, native_simd b)
noexcept
254 return native_simd{_mm256_or_si256(a.v, b.v)};
257 [[nodiscard]]
friend native_simd operator^(native_simd a, native_simd b)
noexcept
259 return native_simd{_mm256_xor_si256(a.v, b.v)};
262 [[nodiscard]]
friend native_simd operator~(native_simd a)
noexcept
264 return not_and(a, ones());
267 [[nodiscard]]
friend native_simd operator<<(native_simd a,
unsigned int b)
noexcept
270 return native_simd{_mm256_slli_epi64(a.v, b)};
273 [[nodiscard]]
friend native_simd operator>>(native_simd a,
unsigned int b)
noexcept
278 return native_simd{_mm256_srai_epi64(a.v, b)};
281 hilet shifted_value = _mm256_srli_epi64(a.v, b);
284 hilet shifted_ones = _mm256_slli_epi64(ones, 63 - b);
285 hilet is_negative = _mm256_cmpgt_epi64(
zero, a.v);
286 hilet masked_shifted_ones = _mm256_and_si256(is_negative, shifted_ones);
287 return native_simd{_mm256_or_si256(shifted_value, masked_shifted_ones)};
291 [[nodiscard]]
friend native_simd
min(native_simd a, native_simd b)
noexcept
294 return (mask & a) | not_and(mask, b);
297 [[nodiscard]]
friend native_simd
max(native_simd a, native_simd b)
noexcept
300 return (mask & a) | not_and(mask, b);
303 [[nodiscard]]
friend native_simd abs(native_simd a)
noexcept
305 hilet mask = a >= native_simd{};
306 return (mask & a) | not_and(mask, -a);
315 template<
size_t Mask>
316 [[nodiscard]]
friend native_simd set_zero(native_simd a)
noexcept
318 static_assert(Mask <= 0b1111);
320 return blend<Mask>(a, native_simd{});
330 template<
size_t Index>
331 [[nodiscard]]
friend native_simd insert(native_simd a, value_type b)
noexcept
333 static_assert(Index < 4);
334 return blend<1_uz << Index>(a, broadcast(b));
343 template<
size_t Index>
344 [[nodiscard]]
friend value_type get(native_simd a)
noexcept
346 static_assert(Index < size);
348 return _mm256_extract_epi64(a.v, Index);
359 template<
size_t Mask>
360 [[nodiscard]]
friend native_simd blend(native_simd a, native_simd b)
noexcept
362 static_assert(Mask <= 0b1111);
364 if constexpr (Mask == 0b0000) {
366 }
else if constexpr (Mask == 0b1111) {
370 constexpr auto dmask =
371 (Mask & 0b0001) | ((Mask & 0b0001) << 1) |
372 ((Mask & 0b0010) << 1) | ((Mask & 0b0010) << 2) |
373 ((Mask & 0b0100) << 2) | ((Mask & 0b0100) << 3) |
374 ((Mask & 0b1000) << 3) | ((Mask & 0b1000) << 4);
376 return native_simd{_mm256_blend_epi32(a.v, b.v, dmask)};
392 template<fixed_
string SourceElements>
393 [[nodiscard]]
friend native_simd permute(native_simd a)
noexcept
395 static_assert(SourceElements.size() == size);
396 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
398 if constexpr (order == 0b11'10'01'00) {
401 return native_simd{_mm256_permute4x64_epi64(a.v, order)};
421 template<fixed_
string SourceElements>
422 [[nodiscard]]
friend native_simd swizzle(native_simd a)
noexcept
424 static_assert(SourceElements.size() == size);
425 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
426 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
427 constexpr auto number_mask = one_mask | zero_mask;
429 if constexpr (number_mask == 0b1111) {
431 return swizzle_numbers<SourceElements>();
433 }
else if constexpr (number_mask == 0b0000) {
435 return permute<SourceElements>(a);
438 }
else if constexpr (number_mask == zero_mask) {
440 hilet ordered = permute<SourceElements>(a);
441 return set_zero<zero_mask>(ordered);
445 hilet ordered = permute<SourceElements>(a);
446 hilet numbers = swizzle_numbers<SourceElements>();
447 return blend<number_mask>(ordered, numbers);
456 [[nodiscard]]
friend native_simd not_and(native_simd a, native_simd b)
noexcept
458 return native_simd{_mm256_andnot_si256(a.v, b.v)};
463 return a <<
"(" << get<0>(b) <<
", " << get<1>(b) <<
", " << get<2>(b) <<
", " << get<3>(b) <<
")";
466 template<fixed_
string SourceElements>
467 [[nodiscard]]
static native_simd swizzle_numbers() noexcept
469 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
470 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
471 constexpr auto number_mask = one_mask | zero_mask;
472 constexpr auto alpha_mask = ~number_mask & 0b1111;
474 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
477 }
else if constexpr ((one_mask | alpha_mask) == 0b1111) {
482 to_bool(one_mask & 0b0001) ? 1 : 0,
483 to_bool(one_mask & 0b0010) ? 1 : 0,
484 to_bool(one_mask & 0b0100) ? 1 : 0,
485 to_bool(one_mask & 0b1000) ? 1 : 0};
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:264
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:272
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
@ zero
The number was zero, and this means something in the current language.