7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
14hi_warning_ignore_msvc(26490);
16namespace hi {
inline namespace v1 {
37struct native_simd<uint32_t,4> {
38 using value_type = uint32_t;
39 constexpr static size_t size = 4;
40 using register_type = __m128i;
45 native_simd(native_simd
const&)
noexcept =
default;
46 native_simd(native_simd&&) noexcept = default;
47 native_simd& operator=(native_simd const&) noexcept = default;
48 native_simd& operator=(native_simd&&) noexcept = default;
52 native_simd() noexcept : v(_mm_setzero_si128()) {}
54 [[nodiscard]]
explicit native_simd(register_type
other) noexcept : v(
other) {}
56 [[nodiscard]]
explicit operator register_type() const noexcept
68 [[nodiscard]] native_simd(
70 value_type b = value_type{0},
71 value_type
c = value_type{0},
72 value_type d = value_type{0})
noexcept :
74 std::bit_cast<int32_t>(d),
75 std::bit_cast<int32_t>(
c),
76 std::bit_cast<int32_t>(b),
77 std::bit_cast<int32_t>(a)))
81 [[nodiscard]]
explicit native_simd(value_type
const *
other) noexcept :
82 v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other)))
86 void store(value_type *out)
const noexcept
89 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out), v);
92 [[nodiscard]]
explicit native_simd(
void const *
other) noexcept : v(_mm_loadu_si128(
static_cast<register_type
const *
>(
other)))
96 void store(
void *out)
const noexcept
99 _mm_storeu_si128(
static_cast<register_type *
>(out), v);
102 [[nodiscard]]
explicit native_simd(std::span<value_type const>
other)
noexcept
105 v = _mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data()));
108 void store(std::span<value_type> out)
const noexcept
111 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out.data()), v);
114 [[nodiscard]]
explicit native_simd(array_type
other) noexcept :
115 v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data())))
119 [[nodiscard]]
explicit operator array_type() const noexcept
121 auto r = array_type{};
122 _mm_storeu_si128(
reinterpret_cast<register_type *
>(r.data()), v);
126 [[nodiscard]]
explicit native_simd(native_simd<int32_t,4>
const &a)
noexcept;
137 [[nodiscard]]
static native_simd broadcast(value_type a)
noexcept
139 return native_simd{_mm_set1_epi32(std::bit_cast<int32_t>(a))};
151 [[nodiscard]]
static native_simd broadcast(native_simd a)
noexcept
154 return native_simd{_mm_broadcastd_epi32(a.v)};
156 return native_simd{_mm_shuffle_epi32(a.v, 0b00'00'00'00)};
160 [[nodiscard]]
static native_simd ones() noexcept
162 hilet tmp = _mm_undefined_si128();
163 return native_simd{_mm_cmpeq_epi32(tmp, tmp)};
168 [[nodiscard]]
static native_simd from_mask(
size_t mask)
noexcept
172 constexpr auto ones_ = std::bit_cast<value_type>(0xffff'ffffU);
174 mask & 0b0001 ? ones_ : 0, mask & 0b0010 ? ones_ : 0, mask & 0b0100 ? ones_ : 0, mask & 0b1000 ? ones_ : 0};
179 [[nodiscard]]
size_t mask() const noexcept
181 return narrow_cast<size_t>(_mm_movemask_ps(_mm_castsi128_ps(v)));
184 [[nodiscard]]
friend bool equal(native_simd a, native_simd b)
noexcept
186 return (a == b).mask() == 0b1111;
189 [[nodiscard]]
friend native_simd operator==(native_simd a, native_simd b)
noexcept
191 return native_simd{_mm_cmpeq_epi32(a.v, b.v)};
194 [[nodiscard]]
friend native_simd
operator!=(native_simd a, native_simd b)
noexcept
199 [[nodiscard]]
friend native_simd operator+(native_simd a)
noexcept
204 [[nodiscard]]
friend native_simd operator+(native_simd a, native_simd b)
noexcept
206 return native_simd{_mm_add_epi32(a.v, b.v)};
209 [[nodiscard]]
friend native_simd operator-(native_simd a, native_simd b)
noexcept
211 return native_simd{_mm_sub_epi32(a.v, b.v)};
214 [[nodiscard]]
friend native_simd operator*(native_simd a, native_simd b)
noexcept
216 return native_simd{_mm_mullo_epi32(a.v, b.v)};
219 [[nodiscard]]
friend native_simd operator&(native_simd a, native_simd b)
noexcept
221 return native_simd{_mm_and_si128(a.v, b.v)};
224 [[nodiscard]]
friend native_simd operator|(native_simd a, native_simd b)
noexcept
226 return native_simd{_mm_or_si128(a.v, b.v)};
229 [[nodiscard]]
friend native_simd operator^(native_simd a, native_simd b)
noexcept
231 return native_simd{_mm_xor_si128(a.v, b.v)};
234 [[nodiscard]]
friend native_simd operator~(native_simd a)
noexcept
236 auto ones = _mm_undefined_si128();
237 ones = _mm_cmpeq_epi32(ones, ones);
238 return native_simd{_mm_andnot_si128(a.v, ones)};
241 [[nodiscard]]
friend native_simd operator<<(native_simd a,
unsigned int b)
noexcept
244 return native_simd{_mm_slli_epi32(a.v, b)};
247 [[nodiscard]]
friend native_simd operator>>(native_simd a,
unsigned int b)
noexcept
250 return native_simd{_mm_srli_epi32(a.v, b)};
253 [[nodiscard]]
friend native_simd
min(native_simd a, native_simd b)
noexcept
255 return native_simd{_mm_min_epu32(a.v, b.v)};
258 [[nodiscard]]
friend native_simd
max(native_simd a, native_simd b)
noexcept
260 return native_simd{_mm_max_epu32(a.v, b.v)};
269 template<
size_t Mask>
270 [[nodiscard]]
friend native_simd set_zero(native_simd a)
noexcept
272 static_assert(Mask <= 0b1111);
274 return native_simd{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
276 hilet mask = from_mask(Mask);
277 return not_and(mask, a);
288 template<
size_t Index>
289 [[nodiscard]]
friend native_simd insert(native_simd a, value_type b)
noexcept
291 static_assert(Index < 4);
294 return native_simd{_mm_insert_epi32(a.v, std::bit_cast<int32_t>(b), Index)};
296 hilet mask = from_mask(1_uz << Index);
297 return not_and(mask, a) | (mask & broadcast(b));
307 template<
size_t Index>
308 [[nodiscard]]
friend value_type get(native_simd a)
noexcept
311 return std::bit_cast<value_type>(_mm_extract_epi32(a.v, Index));
313 auto r =
static_cast<array_type
>(a);
314 return std::get<Index>(r);
326 template<
size_t Mask>
327 [[nodiscard]]
friend native_simd blend(native_simd a, native_simd b)
noexcept
330 return native_simd{_mm_blend_epi32(a.v, b.v, Mask)};
332 hilet mask = from_mask(Mask);
333 return not_and(mask, a) | (mask & b);
349 template<fixed_
string SourceElements>
350 [[nodiscard]]
friend native_simd permute(native_simd a)
noexcept
352 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
354 if constexpr (order == 0b11'10'01'00) {
356 }
else if constexpr (order == 0b00'00'00'00) {
359 return native_simd{_mm_shuffle_epi32(a.v, order)};
379 template<fixed_
string SourceElements>
380 [[nodiscard]]
friend native_simd swizzle(native_simd a)
noexcept
382 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
383 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
384 constexpr auto number_mask = one_mask | zero_mask;
386 if constexpr (number_mask == 0b1111) {
388 return swizzle_numbers<SourceElements>();
390 }
else if constexpr (number_mask == 0b0000) {
392 return permute<SourceElements>(a);
395 }
else if constexpr (number_mask == zero_mask) {
397 hilet ordered = permute<SourceElements>(a);
398 return set_zero<zero_mask>(ordered);
402 hilet ordered = permute<SourceElements>(a);
403 hilet numbers = swizzle_numbers<SourceElements>();
404 return blend<number_mask>(ordered, numbers);
419 [[nodiscard]]
friend native_simd horizontal_add(native_simd a, native_simd b)
noexcept
421 return native_simd{_mm_hadd_epi32(a.v, b.v)};
436 [[nodiscard]]
friend native_simd horizontal_sub(native_simd a, native_simd b)
noexcept
438 return native_simd{_mm_hsub_epi32(a.v, b.v)};
448 [[nodiscard]]
friend native_simd horizontal_sum(native_simd a)
noexcept
450 hilet tmp = a + permute<
"cdab">(a);
451 return tmp + permute<
"badc">(tmp);
459 [[nodiscard]]
friend native_simd not_and(native_simd a, native_simd b)
noexcept
461 return native_simd{_mm_andnot_si128(a.v, b.v)};
466 return a <<
"(" << get<0>(b) <<
", " << get<1>(b) <<
", " << get<2>(b) <<
", " << get<3>(b) <<
")";
469 template<fixed_
string SourceElements>
470 [[nodiscard]]
static native_simd swizzle_numbers() noexcept
472 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
473 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
474 constexpr auto number_mask = one_mask | zero_mask;
475 constexpr auto alpha_mask = ~number_mask & 0b1111;
477 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
478 return native_simd{_mm_setzero_si128()};
480 }
else if constexpr ((one_mask | alpha_mask) == 0b1111) {
481 return native_simd{_mm_set1_epi32(1)};
485 to_bool(one_mask & 0b0001) ? 1 : 0,
486 to_bool(one_mask & 0b0010) ? 1 : 0,
487 to_bool(one_mask & 0b0100) ? 1 : 0,
488 to_bool(one_mask & 0b1000) ? 1 : 0};
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:249
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11