5#include "native_simd_utility.hpp"
30 using value_type = int16_t;
31 constexpr static size_t size = 8;
32 using register_type = __m128i;
36 native_i16x8(native_i16x8
const&)
noexcept =
default;
37 native_i16x8(native_i16x8 &&) noexcept = default;
38 native_i16x8 &operator=(native_i16x8 const &) noexcept = default;
39 native_i16x8 &operator=(native_i16x8 &&) noexcept = default;
43 native_i16x8() noexcept : v(_mm_setzero_si128()) {}
45 [[nodiscard]]
explicit native_i16x8(register_type other) noexcept : v(other) {}
47 [[nodiscard]]
explicit operator register_type () const noexcept {
58 [[nodiscard]] native_i16x8(value_type a, value_type b = value_type{0}, value_type c = value_type{0}, value_type d = value_type{0},
59 value_type e = value_type{0}, value_type f = value_type{0}, value_type g = value_type{0},
60 value_type h = value_type{0})
noexcept :
61 v(_mm_set_epi16(h, g, f, e, d, c, b, a)) {}
63 [[nodiscard]]
explicit native_i16x8(value_type
const *other) noexcept : v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(other))) {}
65 void store(value_type *out)
const noexcept
68 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out), v);
71 [[nodiscard]]
explicit native_i16x8(
void const *other) noexcept : v(_mm_loadu_si128(
static_cast<register_type
const *
>(other))) {}
73 void store(
void *out)
const noexcept
76 _mm_storeu_si128(
static_cast<register_type *
>(out), v);
79 [[nodiscard]]
explicit native_i16x8(std::span<value_type const> other)
noexcept
82 v = _mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data()));
85 void store(std::span<value_type> out)
const noexcept
88 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out.data()), v);
92 [[nodiscard]]
explicit native_i16x8(
std::array<value_type, N> other)
noexcept requires (N >= size) : v(_mm_loadu_si128(reinterpret_cast<register_type const *>(
other.data()))) {}
98 _mm_storeu_si128(
reinterpret_cast<register_type *
>(r.data()), v);
116 [[nodiscard]]
static native_i16x8 broadcast(int16_t a)
noexcept
118 return native_i16x8{_mm_set1_epi16(a)};
145 [[nodiscard]]
static native_i16x8 from_mask(
size_t mask)
noexcept
150 mask & 0b0000'0001 ? 0 : truncate<value_type>(0xffff),
151 mask & 0b0000'0010 ? 0 : truncate<value_type>(0xffff),
152 mask & 0b0000'0100 ? 0 : truncate<value_type>(0xffff),
153 mask & 0b0000'1000 ? 0 : truncate<value_type>(0xffff),
154 mask & 0b0001'0000 ? 0 : truncate<value_type>(0xffff),
155 mask & 0b0010'0000 ? 0 : truncate<value_type>(0xffff),
156 mask & 0b0100'0000 ? 0 : truncate<value_type>(0xffff),
157 mask & 0b1000'0000 ? 0 : truncate<value_type>(0xffff)};
162 [[nodiscard]]
size_t mask() const noexcept
164 auto tmp = _mm_movemask_epi8(v);
170 return narrow_cast<size_t>(tmp);
174 [[nodiscard]]
friend native_i16x8 operator==(native_i16x8 a, native_i16x8 b)
noexcept
176 return native_i16x8{_mm_cmpeq_epi16(a.v, b.v)};
179 [[nodiscard]]
friend native_i16x8
operator!=(native_i16x8 a, native_i16x8 b)
noexcept
184 [[nodiscard]]
friend native_i16x8 operator<(native_i16x8 a, native_i16x8 b)
noexcept
186 return native_i16x8{_mm_cmplt_epi16(a.v, b.v)};
189 [[nodiscard]]
friend native_i16x8
operator>(native_i16x8 a, native_i16x8 b)
noexcept
191 return native_i16x8{_mm_cmpgt_epi16(a.v, b.v)};
194 [[nodiscard]]
friend native_i16x8
operator<=(native_i16x8 a, native_i16x8 b)
noexcept
199 [[nodiscard]]
friend native_i16x8
operator>=(native_i16x8 a, native_i16x8 b)
noexcept
204 [[nodiscard]]
friend native_i16x8 operator+(native_i16x8 a, native_i16x8 b)
noexcept
206 return native_i16x8{_mm_add_epi16(a.v, b.v)};
209 [[nodiscard]]
friend native_i16x8 operator-(native_i16x8 a, native_i16x8 b)
noexcept
211 return native_i16x8{_mm_sub_epi16(a.v, b.v)};
214 [[nodiscard]]
friend native_i16x8 operator-(native_i16x8 a)
noexcept
216 return native_i16x8{} - a;
219 [[nodiscard]]
friend native_i16x8 operator*(native_i16x8 a, native_i16x8 b)
noexcept
221 return native_i16x8{_mm_mullo_epi16(a.v, b.v)};
224 [[nodiscard]]
friend native_i16x8 operator&(native_i16x8 a, native_i16x8 b)
noexcept
226 return native_i16x8{_mm_and_si128(a.v, b.v)};
229 [[nodiscard]]
friend native_i16x8 operator|(native_i16x8 a, native_i16x8 b)
noexcept
231 return native_i16x8{_mm_or_si128(a.v, b.v)};
234 [[nodiscard]]
friend native_i16x8 operator^(native_i16x8 a, native_i16x8 b)
noexcept
236 return native_i16x8{_mm_xor_si128(a.v, b.v)};
239 [[nodiscard]]
friend native_i16x8 operator~(native_i16x8 a)
noexcept
241 auto ones = _mm_undefined_si128();
242 ones = _mm_cmpeq_epi32(ones, ones);
243 return native_i16x8{_mm_andnot_si128(a.v, ones)};
246 [[nodiscard]]
friend native_i16x8 operator<<(native_i16x8 a,
int b)
noexcept
248 return native_i16x8{_mm_slli_epi16(a.v, b)};
251 [[nodiscard]]
friend native_i16x8 operator>>(native_i16x8 a,
int b)
noexcept
253 return native_i16x8{_mm_srai_epi16(a.v, b)};
256 [[nodiscard]]
friend native_i16x8
min(native_i16x8 a, native_i16x8 b)
noexcept
258 return native_i16x8{_mm_min_epi16(a.v, b.v)};
261 [[nodiscard]]
friend native_i16x8
max(native_i16x8 a, native_i16x8 b)
noexcept
263 return native_i16x8{_mm_max_epi16(a.v, b.v)};
266 [[nodiscard]]
friend native_i16x8 abs(native_i16x8 a)
noexcept
268 return native_i16x8{_mm_abs_epi16(a.v)};
277 template<
size_t Mask>
278 [[nodiscard]]
friend native_i16x8 set_zero(native_i16x8 a)
noexcept
280 static_assert(Mask <= 0b1111'1111);
281 hilet mask = from_mask(Mask);
282 return not_and(mask, a);
292 template<
size_t Index>
293 [[nodiscard]]
friend native_i16x8 insert(native_i16x8 a, value_type b)
noexcept
295 static_assert(Index < 4);
296 return native_i16x8{_mm_insert_epi16(a, b, narrow_cast<int>(Index))};
305 template<
size_t Index>
306 [[nodiscard]]
friend float extract(native_i16x8 a)
noexcept
308 return std::bit_cast<float>(_mm_extract_epi16(a, Index));
319 template<
size_t Mask>
320 [[nodiscard]]
friend native_i16x8 blend(native_i16x8 a, native_i16x8 b)
noexcept
323 return native_i16x8{_mm_blend_epi16(a, b, Mask)};
325 hilet mask = from_mask(Mask);
326 return not_and(mask, a) | (mask & b);
370 template<fixed_
string SourceElements>
371 [[nodiscard]]
friend native_i16x8 swizzle(native_i16x8 a)
noexcept
373 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
374 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
375 constexpr auto number_mask = one_mask | zero_mask;
377 if constexpr (number_mask == 0b1111) {
379 return swizzle_numbers<SourceElements>();
381 }
else if constexpr (number_mask == 0b0000) {
383 return permute<SourceElements>(a);
386 }
else if constexpr (number_mask == zero_mask) {
388 hilet ordered = permute<SourceElements>(a);
389 return set_zero<zero_mask>(ordered);
393 hilet ordered = permute<SourceElements>(a);
394 hilet numbers = swizzle_numbers<SourceElements>();
395 return blend<number_mask>(ordered, numbers);
410 [[nodiscard]]
friend native_i16x8 horizontal_add(native_i16x8 a, native_i16x8 b)
noexcept
412 return native_i16x8{_mm_hadd_epi16(a.v, b.v)};
427 [[nodiscard]]
friend native_i16x8 horizontal_sub(native_i16x8 a, native_i16x8 b)
noexcept
429 return native_i16x8{_mm_hsub_epi16(a.v, b.v)};
455 template<
size_t SourceMask>
456 [[nodiscard]]
friend native_i16x8 dot_product(native_i16x8 a, native_i16x8 b)
noexcept
458 static_assert(SourceMask <= 0b1111);
459 return horizontal_sum(set_zero<~SourceMask & 0b1111>(a * b));
468 [[nodiscard]]
friend native_i16x8 not_and(native_i16x8 a, native_i16x8 b)
noexcept
470 return native_i16x8{_mm_andnot_si128(a.v, b.v)};
473 template<fixed_
string SourceElements>
474 [[nodiscard]]
static native_i16x8 swizzle_numbers() noexcept
476 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
477 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
478 constexpr auto number_mask = one_mask | zero_mask;
479 constexpr auto alpha_mask = ~number_mask & 0b1111;
481 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
482 return native_i16x8{_mm_setzero_si128()};
484 }
else if constexpr ((one_mask | alpha_mask)== 0b1111) {
485 return native_i16x8{_mm_set1_epi16(1)};
488 return native_i16x8{_mm_set_epi16(
489 to_bool(one_mask & 0b0001) ? 1 : 0,
490 to_bool(one_mask & 0b0010) ? 1 : 0,
491 to_bool(one_mask & 0b0100) ? 1 : 0,
492 to_bool(one_mask & 0b1000) ? 1 : 0
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:272
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11