7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
13namespace hi {
inline namespace v1 {
34struct native_simd<float, 4> {
35 using value_type = float;
36 constexpr static size_t size = 4;
39 using register_type = __m128;
43 native_simd(native_simd
const&)
noexcept =
default;
44 native_simd(native_simd&&) noexcept = default;
45 native_simd& operator=(native_simd const&) noexcept = default;
46 native_simd& operator=(native_simd&&) noexcept = default;
50 native_simd() noexcept : v(_mm_setzero_ps()) {}
52 [[nodiscard]]
explicit native_simd(register_type
other) noexcept : v(
other) {}
54 [[nodiscard]]
explicit operator register_type() const noexcept
63 [[nodiscard]]
explicit native_simd(value_type a) noexcept : v(_mm_set_ss(a)) {}
72 [[nodiscard]] native_simd(value_type a, value_type b, value_type c = value_type{0}, value_type d = value_type{0})
noexcept :
73 v(_mm_set_ps(d,
c, b, a))
77 [[nodiscard]]
explicit native_simd(value_type
const *
other) noexcept : v(_mm_loadu_ps(
other)) {}
79 void store(value_type *out)
const noexcept
82 _mm_storeu_ps(out, v);
85 [[nodiscard]]
explicit native_simd(
void const *
other) noexcept : v(_mm_loadu_ps(
static_cast<value_type
const *
>(
other))) {}
87 void store(
void *out)
const noexcept
90 _mm_storeu_ps(
static_cast<value_type *
>(out), v);
93 [[nodiscard]]
explicit native_simd(std::span<value_type const>
other)
noexcept
96 v = _mm_loadu_ps(
other.data());
99 void store(std::span<value_type> out)
const noexcept
102 _mm_storeu_ps(out.data(), v);
105 [[nodiscard]]
explicit native_simd(array_type
other) noexcept : v(_mm_loadu_ps(
other.data())) {}
107 [[nodiscard]]
explicit operator array_type() const noexcept
109 auto r = array_type{};
110 _mm_storeu_ps(r.data(), v);
115 [[nodiscard]]
explicit native_simd(native_simd<int32_t, 4>
const& a)
noexcept;
118 [[nodiscard]]
explicit native_simd(native_simd<double, 4>
const& a)
noexcept;
130 [[nodiscard]]
static native_simd broadcast(value_type a)
noexcept
132 return native_simd{_mm_set1_ps(a)};
144 [[nodiscard]]
static native_simd broadcast(native_simd a)
noexcept
147 return native_simd{_mm_broadcastss_ps(a.v)};
149 return native_simd{_mm_shuffle_ps(a.v, a.v, 0b00'00'00'00)};
155 [[nodiscard]]
static native_simd from_mask(
size_t a)
noexcept
162 auto tmp = _mm_cvtsi32_si128(truncate<uint32_t>(a_));
164 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 1);
166 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 2);
168 tmp = _mm_insert_epi32(tmp, truncate<uint32_t>(a_), 3);
170 tmp = _mm_srai_epi32(tmp, 31);
171 return native_simd{_mm_castsi128_ps(tmp)};
176 [[nodiscard]]
static native_simd ones() noexcept
179 auto ones = _mm_undefined_si128();
180 ones = _mm_cmpeq_epi32(ones, ones);
181 return native_simd{_mm_castsi128_ps(ones)};
183 auto ones = _mm_setzero_ps();
184 ones = _mm_cmpeq_ps(ones, ones);
185 return native_simd{ones};
191 [[nodiscard]]
size_t mask() const noexcept
193 return narrow_cast<size_t>(_mm_movemask_ps(v));
202 [[nodiscard]]
friend bool equal(native_simd a, native_simd b)
noexcept
205 return _mm_movemask_epi8(_mm_cmpeq_epi32(_mm_castps_si128(a.v), _mm_castps_si128(b.v))) == 0b1111'1111'1111'1111;
207 return static_cast<array_type
>(a) ==
static_cast<array_type
>(b);
211 [[nodiscard]]
friend native_simd
214 hilet abs_diff = abs(a - b);
215 return abs_diff < broadcast(epsilon);
218 [[nodiscard]]
friend bool
221 return almost_eq(a, b, epsilon).mask() == 0b1111;
224 [[nodiscard]]
friend native_simd operator==(native_simd a, native_simd b)
noexcept
226 return native_simd{_mm_cmpeq_ps(a.v, b.v)};
229 [[nodiscard]]
friend native_simd
operator!=(native_simd a, native_simd b)
noexcept
231 return native_simd{_mm_cmpneq_ps(a.v, b.v)};
234 [[nodiscard]]
friend native_simd operator<(native_simd a, native_simd b)
noexcept
236 return native_simd{_mm_cmplt_ps(a.v, b.v)};
239 [[nodiscard]]
friend native_simd
operator>(native_simd a, native_simd b)
noexcept
241 return native_simd{_mm_cmpgt_ps(a.v, b.v)};
244 [[nodiscard]]
friend native_simd
operator<=(native_simd a, native_simd b)
noexcept
246 return native_simd{_mm_cmple_ps(a.v, b.v)};
249 [[nodiscard]]
friend native_simd
operator>=(native_simd a, native_simd b)
noexcept
251 return native_simd{_mm_cmpge_ps(a.v, b.v)};
254 [[nodiscard]]
friend native_simd operator+(native_simd a)
noexcept
259 [[nodiscard]]
friend native_simd operator+(native_simd a, native_simd b)
noexcept
261 return native_simd{_mm_add_ps(a.v, b.v)};
264 [[nodiscard]]
friend native_simd operator-(native_simd a, native_simd b)
noexcept
266 return native_simd{_mm_sub_ps(a.v, b.v)};
269 [[nodiscard]]
friend native_simd operator-(native_simd a)
noexcept
271 return native_simd{} - a;
274 [[nodiscard]]
friend native_simd operator*(native_simd a, native_simd b)
noexcept
276 return native_simd{_mm_mul_ps(a.v, b.v)};
279 [[nodiscard]]
friend native_simd operator/(native_simd a, native_simd b)
noexcept
281 return native_simd{_mm_div_ps(a.v, b.v)};
284 [[nodiscard]]
friend native_simd operator&(native_simd a, native_simd b)
noexcept
286 return native_simd{_mm_and_ps(a.v, b.v)};
289 [[nodiscard]]
friend native_simd operator|(native_simd a, native_simd b)
noexcept
291 return native_simd{_mm_or_ps(a.v, b.v)};
294 [[nodiscard]]
friend native_simd operator^(native_simd a, native_simd b)
noexcept
296 return native_simd{_mm_xor_ps(a.v, b.v)};
299 [[nodiscard]]
friend native_simd operator~(native_simd a)
noexcept
301 return not_and(a, ones());
304 [[nodiscard]]
friend native_simd
min(native_simd a, native_simd b)
noexcept
306 return native_simd{_mm_min_ps(a.v, b.v)};
309 [[nodiscard]]
friend native_simd
max(native_simd a, native_simd b)
noexcept
311 return native_simd{_mm_max_ps(a.v, b.v)};
314 [[nodiscard]]
friend native_simd abs(native_simd a)
noexcept
316 return not_and(broadcast(-0.0f), a);
320 [[nodiscard]]
friend native_simd
floor(native_simd a)
noexcept
322 return native_simd{_mm_floor_ps(a.v)};
327 [[nodiscard]]
friend native_simd
ceil(native_simd a)
noexcept
329 return native_simd{_mm_ceil_ps(a.v)};
334 template<native_rounding_mode Rounding = native_rounding_mode::current>
335 [[nodiscard]]
friend native_simd
round(native_simd a)
noexcept
337 return native_simd{_mm_round_ps(a.v, to_underlying(Rounding))};
343 [[nodiscard]]
friend native_simd rcp(native_simd a)
noexcept
345 return native_simd{_mm_rcp_ps(a.v)};
350 [[nodiscard]]
friend native_simd
sqrt(native_simd a)
noexcept
352 return native_simd{_mm_sqrt_ps(a.v)};
361 [[nodiscard]]
friend native_simd rsqrt(native_simd a)
noexcept
363 return native_simd{_mm_rsqrt_ps(a.v)};
372 template<
size_t Mask>
373 [[nodiscard]]
friend native_simd set_zero(native_simd a)
noexcept
375 static_assert(Mask <= 0b1111);
376 if constexpr (Mask == 0b0000) {
378 }
else if constexpr (Mask == 0b1111) {
382 return native_simd{_mm_insert_ps(a.v, a.v, Mask)};
384 hilet mask = from_mask(Mask);
385 return not_and(mask, a);
397 template<
size_t Index>
398 [[nodiscard]]
friend native_simd insert(native_simd a, value_type b)
noexcept
400 static_assert(Index < 4);
403 return native_simd{_mm_insert_ps(a.v, _mm_set1_ps(b), narrow_cast<int>(Index << 4))};
405 hilet mask = from_mask(1_uz << Index);
406 return not_and(mask, a) | (mask & broadcast(b));
410 template<
size_t SrcIndex,
size_t DstIndex>
411 [[nodiscard]]
friend native_simd insert(native_simd a, native_simd b)
noexcept
413 static_assert(SrcIndex < size);
414 static_assert(DstIndex < size);
416 return native_simd{_mm_insert_ps(a.v, b.v, (SrcIndex << 6) | (DstIndex << 4))};
418 return insert<DstIndex>(a, get<SrcIndex>(b));
428 template<
size_t Index>
429 [[nodiscard]]
friend value_type get(native_simd a)
noexcept
431 static_assert(Index < size);
433 hilet tmp = _mm_shuffle_ps(a.v, a.v, Index);
434 return _mm_cvtss_f32(tmp);
445 template<
size_t Mask>
446 [[nodiscard]]
friend native_simd blend(native_simd a, native_simd b)
noexcept
448 static_assert(Mask <= 0b1111);
450 if constexpr (Mask == 0b0000) {
452 }
else if constexpr (Mask == 0b1111) {
456 return native_simd{_mm_blend_ps(a.v, b.v, Mask)};
458 hilet mask = from_mask(Mask);
459 return not_and(mask, a) | (mask & b);
476 template<fixed_
string SourceElements>
477 [[nodiscard]]
friend native_simd permute(native_simd a)
noexcept
479 static_assert(SourceElements.size() == size);
480 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
482 if constexpr (order == 0b11'10'01'00) {
484 }
else if constexpr (order == 0b00'00'00'00) {
488 return native_simd{_mm_permute_ps(a.v, order)};
490 return native_simd{_mm_shuffle_ps(a.v, a.v, order)};
495 [[nodiscard]]
friend native_simd permute(native_simd a, native_simd<int32_t, 4>
const& source_elements)
noexcept;
513 template<fixed_
string SourceElements>
514 [[nodiscard]]
friend native_simd swizzle(native_simd a)
noexcept
516 static_assert(SourceElements.size() == size);
517 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
518 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
519 constexpr auto number_mask = one_mask | zero_mask;
521 if constexpr (number_mask == 0b1111) {
523 return swizzle_numbers<SourceElements>();
525 }
else if constexpr (number_mask == 0b0000) {
527 return permute<SourceElements>(a);
530 }
else if constexpr (number_mask == zero_mask) {
532 hilet ordered = permute<SourceElements>(a);
533 return set_zero<zero_mask>(ordered);
537 hilet ordered = permute<SourceElements>(a);
538 hilet numbers = swizzle_numbers<SourceElements>();
539 return blend<number_mask>(ordered, numbers);
554 [[nodiscard]]
friend native_simd horizontal_add(native_simd a, native_simd b)
noexcept
556 return native_simd{_mm_hadd_ps(a.v, b.v)};
571 [[nodiscard]]
friend native_simd horizontal_sub(native_simd a, native_simd b)
noexcept
573 return native_simd{_mm_hsub_ps(a.v, b.v)};
583 [[nodiscard]]
friend native_simd horizontal_sum(native_simd a)
noexcept
585 hilet tmp = a + permute<
"cdab">(a);
586 return tmp + permute<
"badc">(tmp);
599 template<
size_t SourceMask>
600 [[nodiscard]]
friend native_simd dot_product(native_simd a, native_simd b)
noexcept
602 static_assert(SourceMask <= 0b1111);
604 return native_simd{_mm_dp_ps(a.v, b.v, (SourceMask << 4) | 0b1111)};
606 return horizontal_sum(set_zero<~SourceMask & 0b1111>(a * b));
622 [[nodiscard]]
friend native_simd interleaved_sub_add(native_simd a, native_simd b)
noexcept
624 return native_simd{_mm_addsub_ps(a.v, b.v)};
633 [[nodiscard]]
friend native_simd not_and(native_simd a, native_simd b)
noexcept
635 return native_simd{_mm_andnot_ps(a.v, b.v)};
638 [[nodiscard]]
friend std::array<native_simd, 4> transpose(native_simd a, native_simd b, native_simd c, native_simd d)
noexcept
640 _MM_TRANSPOSE4_PS(a.v, b.v,
c.v, d.v);
646 return a <<
"(" << get<0>(b) <<
", " << get<1>(b) <<
", " << get<2>(b) <<
", " << get<3>(b) <<
")";
649 template<fixed_
string SourceElements>
650 [[nodiscard]]
static native_simd swizzle_numbers() noexcept
652 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
653 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
654 constexpr auto number_mask = one_mask | zero_mask;
655 constexpr auto alpha_mask = ~number_mask & 0b1111;
657 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
660 }
else if constexpr ((one_mask | alpha_mask) == 0b1111) {
661 return broadcast(1.0f);
665 to_bool(one_mask & 0b0001) ? 1.0f : 0.0f,
666 to_bool(one_mask & 0b0010) ? 1.0f : 0.0f,
667 to_bool(one_mask & 0b0100) ? 1.0f : 0.0f,
668 to_bool(one_mask & 0b1000) ? 1.0f : 0.0f};
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ round
The end cap of the line is round.
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11