7#include "native_simd_utility.hpp"
8#include "../utility/module.hpp"
15hi_warning_ignore_msvc(26472);
17namespace hi {
inline namespace v1 {
37 using value_type = int8_t;
38 constexpr static size_t size = 4;
39 using register_type = __m128i;
44 native_i8x16(native_i8x16
const&)
noexcept =
default;
45 native_i8x16(native_i8x16&&) noexcept = default;
46 native_i8x16& operator=(native_i8x16 const&) noexcept = default;
47 native_i8x16& operator=(native_i8x16&&) noexcept = default;
51 native_i8x16() noexcept : v(_mm_setzero_si128()) {}
53 [[nodiscard]]
explicit native_i8x16(register_type other) noexcept : v(other) {}
55 [[nodiscard]]
explicit operator register_type() const noexcept
79 [[nodiscard]] native_i8x16(
81 value_type b = value_type{0},
82 value_type
c = value_type{0},
83 value_type d = value_type{0},
84 value_type e = value_type{0},
85 value_type f = value_type{0},
86 value_type g = value_type{0},
87 value_type h = value_type{0},
88 value_type i = value_type{0},
89 value_type j = value_type{0},
90 value_type k = value_type{0},
91 value_type l = value_type{0},
92 value_type
m = value_type{0},
93 value_type
n = value_type{0},
94 value_type
o = value_type{0},
95 value_type p = value_type{0}
97 v(_mm_set_epi8(p,
o,
n,
m, l, k, j, i, h, g, f, e, d,
c, b, a))
101 [[nodiscard]]
explicit native_i8x16(value_type
const *other) noexcept :
102 v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(other)))
106 void store(value_type *out)
const noexcept
109 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out), v);
112 [[nodiscard]]
explicit native_i8x16(
void const *other) noexcept : v(_mm_loadu_si128(
static_cast<register_type
const *
>(other)))
116 void store(
void *out)
const noexcept
119 _mm_storeu_si128(
static_cast<register_type *
>(out), v);
122 [[nodiscard]]
explicit native_i8x16(std::span<value_type const> other)
noexcept
125 v = _mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data()));
128 void store(std::span<value_type> out)
const noexcept
131 _mm_storeu_si128(
reinterpret_cast<register_type *
>(out.data()), v);
134 [[nodiscard]]
explicit native_i8x16(array_type other) noexcept :
135 v(_mm_loadu_si128(
reinterpret_cast<register_type
const *
>(
other.data())))
139 [[nodiscard]]
explicit operator array_type() const noexcept
141 auto r = array_type{};
142 _mm_storeu_si128(
reinterpret_cast<register_type *
>(r.data()), v);
147 [[nodiscard]]
explicit native_i8x16(native_f32x16
const& a)
noexcept;
148 [[nodiscard]]
explicit native_i8x16(native_u32x16
const& a)
noexcept;
160 [[nodiscard]]
static native_i8x16 broadcast(value_type a)
noexcept
162 return native_i8x16{_mm_set1_epi8(a)};
186 [[nodiscard]]
static native_i8x16 broadcast(native_i8x16 a)
noexcept
189 return native_i8x16{_mm_broadcastb_epi8(a.v)};
191 return native_i8x16{_mm_shuffle_epi8(a.v, _mm_setzero_si128())};
194 auto tmp = _mm_undefined_si128();
195 tmp = _mm_cmpeq_epi32(tmp, tmp);
196 tmp = _mm_slli_epi32(tmp, 24);
197 tmp = _mm_and_si128(tmp, a.v);
200 tmp = _mm_or_si128(tmp, _mm_slli_epi32(tmp, 8));
201 tmp = _mm_or_si128(tmp, _mm_slli_epi32(tmp,16));
204 tmp = _mm_shuffle_epi32(tmp, 0b00'00'00'00);
205 return native_i8x16{tmp};
209 [[nodiscard]]
static native_i8x16 ones() noexcept
211 hilet tmp = _mm_undefined_si128();
212 return native_i8x16{_mm_cmpeq_epi32(tmp, tmp)};
217 [[nodiscard]]
size_t mask() const noexcept
219 return narrow_cast<size_t>(_mm_movemask_epi8(v));
222 [[nodiscard]]
friend bool equal(native_i8x16 a, native_i8x16 b)
noexcept
224 return (a == b).mask() == 0b1111'1111'1111'1111;
227 [[nodiscard]]
friend native_i8x16 operator==(native_i8x16 a, native_i8x16 b)
noexcept
229 return native_i8x16{_mm_cmpeq_epi8(a.v, b.v)};
232 [[nodiscard]]
friend native_i8x16
operator!=(native_i8x16 a, native_i8x16 b)
noexcept
237 [[nodiscard]]
friend native_i8x16 operator<(native_i8x16 a, native_i8x16 b)
noexcept
239 return native_i8x16{_mm_cmplt_epi8(a.v, b.v)};
242 [[nodiscard]]
friend native_i8x16
operator>(native_i8x16 a, native_i8x16 b)
noexcept
244 return native_i8x16{_mm_cmpgt_epi8(a.v, b.v)};
247 [[nodiscard]]
friend native_i8x16
operator<=(native_i8x16 a, native_i8x16 b)
noexcept
252 [[nodiscard]]
friend native_i8x16
operator>=(native_i8x16 a, native_i8x16 b)
noexcept
257 [[nodiscard]]
friend native_i8x16 operator+(native_i8x16 a)
noexcept
262 [[nodiscard]]
friend native_i8x16 operator-(native_i8x16 a)
noexcept
264 return native_i8x16{} - a;
267 [[nodiscard]]
friend native_i8x16 operator+(native_i8x16 a, native_i8x16 b)
noexcept
269 return native_i8x16{_mm_add_epi8(a.v, b.v)};
272 [[nodiscard]]
friend native_i8x16 operator-(native_i8x16 a, native_i8x16 b)
noexcept
274 return native_i8x16{_mm_sub_epi8(a.v, b.v)};
277 [[nodiscard]]
friend native_i8x16 operator&(native_i8x16 a, native_i8x16 b)
noexcept
279 return native_i8x16{_mm_and_si128(a.v, b.v)};
282 [[nodiscard]]
friend native_i8x16 operator|(native_i8x16 a, native_i8x16 b)
noexcept
284 return native_i8x16{_mm_or_si128(a.v, b.v)};
287 [[nodiscard]]
friend native_i8x16 operator^(native_i8x16 a, native_i8x16 b)
noexcept
289 return native_i8x16{_mm_xor_si128(a.v, b.v)};
292 [[nodiscard]]
friend native_i8x16 operator~(native_i8x16 a)
noexcept
294 auto ones = _mm_undefined_si128();
295 ones = _mm_cmpeq_epi32(ones, ones);
296 return native_i8x16{_mm_andnot_si128(a.v, ones)};
299 [[nodiscard]]
friend native_i8x16
min(native_i8x16 a, native_i8x16 b)
noexcept
302 return native_i8x16{_mm_min_epi8(a.v, b.v)};
305 return (mask & a) | not_and(mask, b);
309 [[nodiscard]]
friend native_i8x16
max(native_i8x16 a, native_i8x16 b)
noexcept
312 return native_i8x16{_mm_max_epi8(a.v, b.v)};
315 return (mask & a) | not_and(mask, b);
319 [[nodiscard]]
friend native_i8x16 abs(native_i8x16 a)
noexcept
322 return native_i8x16{_mm_abs_epi8(a.v)};
324 hilet mask = a > native_i8x16{};
325 return (mask & a) | not_and(mask, -a);
335 template<
size_t Mask>
336 [[nodiscard]]
friend native_i8x16 set_zero(native_i8x16 a)
noexcept
338 static_assert(Mask <= 0b1111);
340 return native_i8x16{_mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(a.v), _mm_castsi128_ps(a.v), Mask))};
342 hilet mask = from_mask<Mask>();
343 return not_and(mask, a);
354 template<
size_t Index>
355 [[nodiscard]]
friend native_i8x16 insert(native_i8x16 a, value_type b)
noexcept
357 static_assert(Index < 4);
360 return native_i8x16{_mm_insert_epi8(a.v, b, Index)};
362 hilet mask = from_mask<1_uz << Index>();
363 return not_and(mask, a) | (mask & broadcast(b));
373 template<
size_t Index>
374 [[nodiscard]]
friend value_type get(native_i8x16 a)
noexcept
377 return static_cast<value_type
>(_mm_extract_epi8(a.v, Index));
379 auto r =
static_cast<array_type
>(a);
380 return std::get<Index>(r);
389 [[nodiscard]]
friend native_i8x16 not_and(native_i8x16 a, native_i8x16 b)
noexcept
391 return native_i8x16{_mm_andnot_si128(a.v, b.v)};
396 return a <<
"(" << get<0>(b) <<
", " << get<1>(b) <<
", " << get<2>(b) <<
", " << get<3>(b) <<
")";
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:13
@ m
Mirror but not bracket.
geometry/margins.hpp
Definition cache.hpp:11