31 using value_type =
float;
32 using register_type =
__m128;
61 [[
nodiscard]] hi_force_inline
static array_type set(
float a,
float b,
float c,
float d)
noexcept
66 [[
nodiscard]] hi_force_inline
static array_type set(
float a)
noexcept
78#if defined(HI_HAS_SSE2)
87#if defined(HI_HAS_SSE2)
96 [[
nodiscard]] hi_force_inline
static float get(array_type a)
noexcept
98 if constexpr (I == 0) {
105 [[
nodiscard]] hi_force_inline
static array_type broadcast(
float a)
noexcept
110 [[
nodiscard]] hi_force_inline
static array_type broadcast(array_type a)
noexcept
115#if defined(HI_HAS_SSE2)
120 static_cast<int32_t
>(mask) << 28,
121 static_cast<int32_t
>(mask) << 29,
122 static_cast<int32_t
>(mask) << 30,
123 static_cast<int32_t
>(mask) << 31);
137 [[
nodiscard]] hi_force_inline
static array_type neg(array_type a)
noexcept
142 template<std::
size_t Mask>
143 [[
nodiscard]] hi_force_inline
constexpr static array_type neg_mask(array_type a)
noexcept
145 if constexpr (
Mask == 0) {
147 }
else if constexpr (
Mask == 0b1111) {
149#if defined(HI_HAS_SSE3)
150 }
else if constexpr (
Mask == 0b0101) {
159 [[
nodiscard]] hi_force_inline
static array_type inv(array_type a)
noexcept
161 return _xor(set_all_ones(), a);
164 [[
nodiscard]] hi_force_inline
static array_type rcp(array_type a)
noexcept
169 [[
nodiscard]] hi_force_inline
static array_type
sqrt(array_type a)
noexcept
174 [[
nodiscard]] hi_force_inline
static array_type rsqrt(array_type a)
noexcept
179#if defined(HI_HAS_SSE2)
180 [[
nodiscard]] hi_force_inline
static array_type
round(array_type a)
noexcept
182#if defined(HI_HAS_SSE4_1)
185 auto const a_ = L(a);
198#if defined(HI_HAS_SSE4_1)
199 [[
nodiscard]] hi_force_inline
static array_type
floor(array_type a)
noexcept
204 [[
nodiscard]] hi_force_inline
static array_type
ceil(array_type a)
noexcept
210 [[
nodiscard]] hi_force_inline
static array_type add(array_type a, array_type b)
noexcept
215 [[
nodiscard]] hi_force_inline
static array_type sub(array_type a, array_type b)
noexcept
220 template<std::
size_t Mask>
221 [[
nodiscard]] hi_force_inline
constexpr static array_type addsub_mask(array_type a, array_type b)
noexcept
223 if constexpr (
Mask == 0) {
225 }
else if constexpr (
Mask == 0b1111) {
227#if defined(HI_HAS_SSE3)
228 }
else if constexpr (
Mask == 0b1010) {
236 [[
nodiscard]] hi_force_inline
static array_type mul(array_type a, array_type b)
noexcept
241 [[
nodiscard]] hi_force_inline
static array_type
div(array_type a, array_type b)
noexcept
246 [[
nodiscard]] hi_force_inline
static array_type eq(array_type a, array_type b)
noexcept
251 [[
nodiscard]] hi_force_inline
static array_type ne(array_type a, array_type b)
noexcept
256 [[
nodiscard]] hi_force_inline
static array_type lt(array_type a, array_type b)
noexcept
261 [[
nodiscard]] hi_force_inline
static array_type gt(array_type a, array_type b)
noexcept
266 [[
nodiscard]] hi_force_inline
static array_type le(array_type a, array_type b)
noexcept
271 [[
nodiscard]] hi_force_inline
static array_type ge(array_type a, array_type b)
noexcept
276 [[
nodiscard]] hi_force_inline
static bool test(array_type a, array_type b)
noexcept
278#if defined(HI_HAS_SSE4_1)
280#elif defined(HI_HAS_SSE2)
286 return (std::bit_cast<uint32_t>(std::get<0>(
tmp)) | std::bit_cast<uint32_t>(std::get<1>(
tmp)) |
287 std::bit_cast<uint32_t>(std::get<2>(
tmp)) | std::bit_cast<uint32_t>(std::get<3>(
tmp))) == 0;
291 [[
nodiscard]] hi_force_inline
static array_type
max(array_type a, array_type b)
noexcept
296 [[
nodiscard]] hi_force_inline
static array_type
min(array_type a, array_type b)
noexcept
301 [[
nodiscard]] hi_force_inline
static array_type clamp(array_type v, array_type
lo, array_type
hi)
noexcept
306 [[
nodiscard]] hi_force_inline
static array_type _or(array_type a, array_type b)
noexcept
311 [[
nodiscard]] hi_force_inline
static array_type _and(array_type a, array_type b)
noexcept
316 [[
nodiscard]] hi_force_inline
static array_type _xor(array_type a, array_type b)
noexcept
321 [[
nodiscard]] hi_force_inline
static array_type andnot(array_type a, array_type b)
noexcept
326#if defined(HI_HAS_SSE2)
327 [[
nodiscard]] hi_force_inline
static array_type sll(array_type a,
unsigned int b)
noexcept
334#if defined(HI_HAS_SSE2)
335 [[
nodiscard]] hi_force_inline
static array_type srl(array_type a,
unsigned int b)
noexcept
342#if defined(HI_HAS_SSE2)
343 [[
nodiscard]] hi_force_inline
static array_type sra(array_type a,
unsigned int b)
noexcept
350 [[
nodiscard]] hi_force_inline
static array_type hadd(array_type a, array_type b)
noexcept
352#if defined(HI_HAS_SSE3)
355 auto const a_ = L(a);
356 auto const b_ = L(b);
363 [[
nodiscard]] hi_force_inline
static array_type hsub(array_type a, array_type b)
noexcept
365#if defined(HI_HAS_SSE3)
368 auto const a_ = L(a);
369 auto const b_ = L(b);
379 static_assert(
sizeof...(Indices) == 4);
383 for (
size_t i = 0; i != 4; ++i) {
384 auto const index = indices[i] < 0 ? i : indices[i];
385 r |= index << (i * 2);
391 [[
nodiscard]] hi_force_inline
static array_type
shuffle(array_type a)
noexcept
396 template<
size_t Mask>
397 [[
nodiscard]] hi_force_inline
static array_type blend(array_type a, array_type b)
noexcept
399#if defined(HI_HAS_SSE4_1)
405 constexpr auto indices =
406 (
Mask & 0b0001 ? 0b00'00'00'01U : 0b00'00'00'00U) |
407 (
Mask & 0b0010 ? 0b00'00'11'00U : 0b00'00'10'00U) |
408 (
Mask & 0b0100 ? 0b00'01'00'00U : 0b00'00'00'00U) |
409 (
Mask & 0b1000 ? 0b11'00'00'00U : 0b10'00'00'00U);
422 return {S(
a_), S(
b_), S(
c_), S(
d_)};
425 [[
nodiscard]] hi_force_inline
static array_type sum(array_type a)
noexcept
435 template<
size_t Mask>
436 [[
nodiscard]] hi_force_inline
static array_type dot(array_type a, array_type b)
noexcept
438#if defined(HI_HAS_SSE4_1)