7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
14hi_warning_ignore_msvc(26490);
16namespace hi {
inline namespace v1 {
37struct native_simd<int32_t, 4> {
38 using value_type = int32_t;
39 constexpr static size_t size = 4;
40 using register_type = __m128i;
45 native_simd(native_simd
const&)
noexcept =
default;
46 native_simd(native_simd&&) noexcept = default;
47 native_simd& operator=(native_simd const&) noexcept = default;
48 native_simd& operator=(native_simd&&) noexcept = default;
52 native_simd() noexcept : v(_mm_setzero_si128()) {}
54 [[nodiscard]]
explicit native_simd(register_type
other) noexcept : v(
other) {}
56 [[nodiscard]]
explicit operator register_type() const noexcept
68 [[nodiscard]] native_simd(
70 value_type b = value_type{0},
71 value_type c = value_type{0},
72 value_type d = value_type{0})
noexcept :
73 v(_mm_set_epi32(d, c, b, a))
77 [[nodiscard]]
explicit native_simd(value_type
const *
other) noexcept :
78 v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other)))
82 void store(value_type *out)
const noexcept
85 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out), v);
88 [[nodiscard]]
explicit native_simd(
void const *
other) noexcept : v(_mm_loadu_si128(
static_cast<register_type
const *
>(
other)))
92 void store(
void *out)
const noexcept
95 _mm_storeu_si128(
static_cast<register_type *
>(out), v);
98 [[nodiscard]]
explicit native_simd(std::span<value_type const>
other)
noexcept
101 v = _mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data()));
104 void store(std::span<value_type> out)
const noexcept
107 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out.data()), v);
110 [[nodiscard]]
explicit native_simd(array_type
other) noexcept :
111 v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data())))
115 [[nodiscard]]
explicit operator array_type() const noexcept
117 auto r = array_type{};
118 _mm_storeu_si128(
reinterpret_cast<register_type *
>(r.data()), v);
122 [[nodiscard]]
explicit native_simd(native_simd<float, 4>
const& a)
noexcept;
123 [[nodiscard]]
explicit native_simd(native_simd<uint32_t, 4>
const& a)
noexcept;
125 [[nodiscard]]
explicit native_simd(native_simd<double, 4>
const& a)
noexcept;
137 [[nodiscard]]
static native_simd broadcast(value_type a)
noexcept
139 return native_simd{_mm_set1_epi32(a)};
151 [[nodiscard]]
static native_simd broadcast(native_simd a)
noexcept
154 return native_simd{_mm_broadcastd_epi32(a.v)};
156 return native_simd{_mm_shuffle_epi32(a.v, 0b00'00'00'00)};
160 [[nodiscard]]
static native_simd ones() noexcept
162 hilet tmp = _mm_undefined_si128();
163 return native_simd{_mm_cmpeq_epi32(tmp, tmp)};
166 template<
size_t Mask>
167 [[nodiscard]]
static native_simd from_mask() noexcept
170 to_bool(Mask & 0b0001) ?
static_cast<value_type
>(0xffff'ffff) : 0,
171 to_bool(Mask & 0b0010) ? static_cast<value_type>(0xffff'ffff) : 0,
172 to_bool(Mask & 0b0100) ? static_cast<value_type>(0xffff'ffff) : 0,
173 to_bool(Mask & 0b1000) ? static_cast<value_type>(0xffff'ffff) : 0};
178 [[nodiscard]]
static native_simd from_mask(
size_t a)
noexcept
185 auto tmp = _mm_cvtsi32_si128(truncate<uint32_t>(a_));
187 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 1);
189 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 2);
191 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 3);
193 tmp = _mm_srai_epi32(tmp, 31);
194 return native_simd{tmp};
199 [[nodiscard]]
size_t mask() const noexcept
201 return narrow_cast<size_t>(_mm_movemask_ps(_mm_castsi128_ps(v)));
204 [[nodiscard]]
friend bool equal(native_simd a, native_simd b)
noexcept
206 return (a == b).mask() == 0b1111;
209 [[nodiscard]]
friend native_simd operator==(native_simd a, native_simd b)
noexcept
211 return native_simd{_mm_cmpeq_epi32(a.v, b.v)};
214 [[nodiscard]]
friend native_simd
operator!=(native_simd a, native_simd b)
noexcept
219 [[nodiscard]]
friend native_simd operator<(native_simd a, native_simd b)
noexcept
221 return native_simd{_mm_cmplt_epi32(a.v, b.v)};
224 [[nodiscard]]
friend native_simd
operator>(native_simd a, native_simd b)
noexcept
226 return native_simd{_mm_cmpgt_epi32(a.v, b.v)};
229 [[nodiscard]]
friend native_simd
operator<=(native_simd a, native_simd b)
noexcept
234 [[nodiscard]]
friend native_simd
operator>=(native_simd a, native_simd b)
noexcept
239 [[nodiscard]]
friend native_simd operator+(native_simd a)
noexcept
244 [[nodiscard]]
friend native_simd operator-(native_simd a)
noexcept
246 return native_simd{} - a;
249 [[nodiscard]]
friend native_simd operator+(native_simd a, native_simd b)
noexcept
251 return native_simd{_mm_add_epi32(a.v, b.v)};
254 [[nodiscard]]
friend native_simd operator-(native_simd a, native_simd b)
noexcept
256 return native_simd{_mm_sub_epi32(a.v, b.v)};
259 [[nodiscard]]
friend native_simd operator*(native_simd a, native_simd b)
noexcept
261 return native_simd{_mm_mullo_epi32(a.v, b.v)};
264 [[nodiscard]]
friend native_simd operator&(native_simd a, native_simd b)
noexcept
266 return native_simd{_mm_and_si128(a.v, b.v)};
269 [[nodiscard]]
friend native_simd operator|(native_simd a, native_simd b)
noexcept
271 return native_simd{_mm_or_si128(a.v, b.v)};
274 [[nodiscard]]
friend native_simd operator^(native_simd a, native_simd b)
noexcept
276 return native_simd{_mm_xor_si128(a.v, b.v)};
279 [[nodiscard]]
friend native_simd operator~(native_simd a)
noexcept
281 auto ones = _mm_undefined_si128();
282 ones = _mm_cmpeq_epi32(ones, ones);
283 return native_simd{_mm_andnot_si128(a.v, ones)};
286 [[nodiscard]]
friend native_simd operator<<(native_simd a,
unsigned int b)
noexcept
289 return native_simd{_mm_slli_epi32(a.v, b)};
292 [[nodiscard]]
friend native_simd operator>>(native_simd a,
unsigned int b)
noexcept
295 return native_simd{_mm_srai_epi32(a.v, b)};
298 [[nodiscard]]
friend native_simd
min(native_simd a, native_simd b)
noexcept
301 return native_simd{_mm_min_epi32(a.v, b.v)};
304 return (mask & a) | not_and(mask, b);
308 [[nodiscard]]
friend native_simd
max(native_simd a, native_simd b)
noexcept
311 return native_simd{_mm_max_epi32(a.v, b.v)};
314 return (mask & a) | not_and(mask, b);
318 [[nodiscard]]
friend native_simd abs(native_simd a)
noexcept
321 return native_simd{_mm_abs_epi32(a.v)};
323 hilet mask = a >= native_simd{};
324 return (mask & a) | not_and(mask, -a);
334 template<
size_t Mask>
335 [[nodiscard]]
friend native_simd set_zero(native_simd a)
noexcept
337 static_assert(Mask <= 0b1111);
339 return native_simd{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
341 hilet mask = from_mask<Mask>();
342 return not_and(mask, a);
353 template<
size_t Index>
354 [[nodiscard]]
friend native_simd insert(native_simd a, value_type b)
noexcept
356 static_assert(Index < 4);
359 return native_simd{_mm_insert_epi32(a.v, b, Index)};
361 hilet mask = from_mask<1_uz << Index>();
362 return not_and(mask, a) | (mask & broadcast(b));
372 template<
size_t Index>
373 [[nodiscard]]
friend value_type get(native_simd a)
noexcept
376 return _mm_extract_epi32(a.v, Index);
378 auto r =
static_cast<array_type
>(a);
379 return std::get<Index>(r);
391 template<
size_t Mask>
392 [[nodiscard]]
friend native_simd blend(native_simd a, native_simd b)
noexcept
395 return native_simd{_mm_blend_epi32(a.v, b.v, Mask)};
397 hilet mask = from_mask<Mask>();
398 return not_and(mask, a) | (mask & b);
414 template<fixed_
string SourceElements>
415 [[nodiscard]]
friend native_simd permute(native_simd a)
noexcept
417 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
419 if constexpr (order == 0b11'10'01'00) {
421 }
else if constexpr (order == 0b00'00'00'00) {
424 return native_simd{_mm_shuffle_epi32(a.v, order)};
444 template<fixed_
string SourceElements>
445 [[nodiscard]]
friend native_simd swizzle(native_simd a)
noexcept
447 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
448 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
449 constexpr auto number_mask = one_mask | zero_mask;
451 if constexpr (number_mask == 0b1111) {
453 return swizzle_numbers<SourceElements>();
455 }
else if constexpr (number_mask == 0b0000) {
457 return permute<SourceElements>(a);
460 }
else if constexpr (number_mask == zero_mask) {
462 hilet ordered = permute<SourceElements>(a);
463 return set_zero<zero_mask>(ordered);
467 hilet ordered = permute<SourceElements>(a);
468 hilet numbers = swizzle_numbers<SourceElements>();
469 return blend<number_mask>(ordered, numbers);
484 [[nodiscard]]
friend native_simd horizontal_add(native_simd a, native_simd b)
noexcept
486 return native_simd{_mm_hadd_epi32(a.v, b.v)};
501 [[nodiscard]]
friend native_simd horizontal_sub(native_simd a, native_simd b)
noexcept
503 return native_simd{_mm_hsub_epi32(a.v, b.v)};
513 [[nodiscard]]
friend native_simd horizontal_sum(native_simd a)
noexcept
515 auto tmp = a + permute<
"cdab">(a);
516 return tmp + permute<
"badc">(tmp);
529 template<
size_t SourceMask>
530 [[nodiscard]]
friend native_simd dot_product(native_simd a, native_simd b)
noexcept
532 static_assert(SourceMask <= 0b1111);
533 return horizontal_sum(set_zero<~SourceMask & 0b1111>(a * b));
541 [[nodiscard]]
friend native_simd not_and(native_simd a, native_simd b)
noexcept
543 return native_simd{_mm_andnot_si128(a.v, b.v)};
548 return a <<
"(" << get<0>(b) <<
", " << get<1>(b) <<
", " << get<2>(b) <<
", " << get<3>(b) <<
")";
551 template<fixed_
string SourceElements>
552 [[nodiscard]]
static native_simd swizzle_numbers() noexcept
554 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
555 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
556 constexpr auto number_mask = one_mask | zero_mask;
557 constexpr auto alpha_mask = ~number_mask & 0b1111;
559 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
560 return native_simd{_mm_setzero_si128()};
562 }
else if constexpr ((one_mask | alpha_mask) == 0b1111) {
563 return native_simd{_mm_set1_epi32(1)};
567 to_bool(one_mask & 0b0001) ? 1 : 0,
568 to_bool(one_mask & 0b0010) ? 1 : 0,
569 to_bool(one_mask & 0b0100) ? 1 : 0,
570 to_bool(one_mask & 0b1000) ? 1 : 0};
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:264
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:272
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11