7#if defined(HI_HAS_SSE4_1)
11#if defined(HI_HAS_SSSE3)
14#if defined(HI_HAS_SSE3)
17#if defined(HI_HAS_SSE2)
20#if defined(HI_HAS_SSE)
23#include "../utility/module.hpp"
25namespace hi::inline
v1 {
27inline __m128 _mm_cvtph_ps_sse2(__m128i value)
noexcept
29 hilet f32_to_f16_constants = _mm_set_epi32(0, f32_to_f16_adjustment, f32_to_f16_infinite, f32_to_f16_lowest_normal - 1);
32 auto u = _mm_unpacklo_epi16(value, _mm_setzero_si128());
35 hilet sign = _mm_slli_epi32(_mm_srli_epi32(u, 15), 31);
38 u = _mm_srli_epi32(_mm_slli_epi32(u, 17), 4);
41 hilet f32_to_f16_adjustment_ = _mm_shuffle_epi32(f32_to_f16_constants, 0b10'10'10'10);
42 u = _mm_add_epi32(u, f32_to_f16_adjustment_);
45 hilet f32_to_f16_lowest_normal_ = _mm_shuffle_epi32(f32_to_f16_constants, 0b00'00'00'00);
46 hilet is_normal = _mm_cmpgt_epi32(u, f32_to_f16_lowest_normal_);
49 u = _mm_or_si128(u, sign);
52 u = _mm_and_si128(u, is_normal);
54 return _mm_castsi128_ps(u);
57inline __m128i _mm_cvtps_ph_sse4_1(__m128 value)
noexcept
59 hilet f32_to_f16_constants = _mm_set_epi32(0, f32_to_f16_adjustment, f32_to_f16_infinite, f32_to_f16_lowest_normal - 1);
62 auto u = _mm_castps_si128(value);
65 hilet sign = _mm_slli_epi32(_mm_srai_epi32(u, 31), 15);
68 u = _mm_srli_epi32(_mm_slli_epi32(u, 1), 1);
71 hilet f32_to_f16_lowest_normal_ = _mm_shuffle_epi32(f32_to_f16_constants, 0b00'00'00'00);
72 hilet is_normal = _mm_cmpgt_epi32(u, f32_to_f16_lowest_normal_);
75 hilet f32_to_f16_infinite_ = _mm_shuffle_epi32(f32_to_f16_constants, 0b01'01'01'01);
76 u = _mm_min_epi32(u, f32_to_f16_infinite_);
79 hilet f32_to_f16_adjustment_ = _mm_shuffle_epi32(f32_to_f16_constants, 0b10'10'10'10);
80 u = _mm_sub_epi32(u, f32_to_f16_adjustment_);
83 u = _mm_srli_epi32(u, 13);
86 u = _mm_and_si128(u, is_normal);
90 u = _mm_or_si128(u, sign);
93 return _mm_packs_epi32(u, u);
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13