7#include "array_intrinsic.hpp"
8#include "float_to_half.hpp"
23hi_export_module(hikocpu : array_intrinsic_f16x4);
25hi_export
namespace hi {
28#if defined(HI_HAS_SSE2)
30struct array_intrinsic<half, 4> {
31 using value_type = half;
32 using register_type = __m128i;
37 [[nodiscard]] hi_force_inline
static register_type L(array_type a)
noexcept
39 return _mm_set_epi64x(0, std::bit_cast<int64_t>(a));
44 [[nodiscard]] hi_force_inline
static array_type S(register_type a)
noexcept
46 return std::bit_cast<array_type>(_mm_cvtsi128_si64(a));
53 return std::bit_cast<array_type>(float_to_half_f16c(a));
55 return std::bit_cast<array_type>(float_to_half_sse2(a));
59 [[nodiscard]] hi_force_inline
static array_type undefined() noexcept
61 return S(_mm_undefined_si128());
64 [[nodiscard]] hi_force_inline
static array_type set_zero() noexcept
66 return S(_mm_setzero_si128());
69 [[nodiscard]] hi_force_inline
static array_type set_all_ones() noexcept
71 return S(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
74 [[nodiscard]] hi_force_inline
static array_type set_one() noexcept
76 auto const ones = _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128());
77 return S(_mm_srli_epi16(_mm_slli_epi16(ones, 12), 2));
80 [[nodiscard]] hi_force_inline
static array_type inv(array_type a)
noexcept
82 return _xor(set_all_ones(), a);
85 [[nodiscard]] hi_force_inline
static bool test(array_type a, array_type b)
noexcept
87#if defined(HI_HAS_SSE4_1)
88 return static_cast<bool>(_mm_testz_si128(L(a), L(b)));
90 return _mm_movemask_epi8(_mm_cmpeq_epi32(_mm_and_si128(L(a), L(b)), _mm_setzero_si128())) == 0xffff;
94 [[nodiscard]] hi_force_inline
static array_type _or(array_type a, array_type b)
noexcept
96 return S(_mm_or_si128(L(a), L(b)));
99 [[nodiscard]] hi_force_inline
static array_type _and(array_type a, array_type b)
noexcept
101 return S(_mm_and_si128(L(a), L(b)));
104 [[nodiscard]] hi_force_inline
static array_type _xor(array_type a, array_type b)
noexcept
106 return S(_mm_xor_si128(L(a), L(b)));
109 [[nodiscard]] hi_force_inline
static array_type andnot(array_type a, array_type b)
noexcept
111 return S(_mm_andnot_si128(L(a), L(b)));
114 [[nodiscard]] hi_force_inline
static array_type sll(array_type a,
unsigned int b)
noexcept
116 auto const b_ = _mm_set_epi32(0, 0, 0, b);
117 return S(_mm_sll_epi16(L(a), b_));
120 [[nodiscard]] hi_force_inline
static array_type srl(array_type a,
unsigned int b)
noexcept
122 auto const b_ = _mm_set_epi32(0, 0, 0, b);
123 return S(_mm_srl_epi16(L(a), b_));
126 [[nodiscard]] hi_force_inline
static array_type sra(array_type a,
unsigned int b)
noexcept
128 auto const b_ = _mm_set_epi32(0, 0, 0, b);
129 return S(_mm_sra_epi16(L(a), b_));
132#if defined(HI_HAS_SSE4_1)
133 template<
size_t Mask>
134 [[nodiscard]] hi_force_inline
static array_type blend(array_type a, array_type b)
noexcept
136 return S(_mm_blend_epi16(L(a), L(b), Mask));
The HikoGUI namespace.
Definition array_generic.hpp:20
bool has_f16c() noexcept
This CPU has float-16 conversion instructions.
Definition cpu_id_x86.hpp:752
DOXYGEN BUG.
Definition algorithm_misc.hpp:20