9#include "cpu_id_x86.hpp"
11#include "cpu_id_generic.hpp"
24hi_export_module(hikocpu : half_to_float);
26hi_export
namespace hi {
inline namespace v1 {
34 auto u32 =
static_cast<uint32_t
>(u16);
36 auto sign =
static_cast<int32_t
>(u32);
42 auto exponent =
static_cast<int32_t
>(u32);
47 if (exponent == -15) {
54 auto shift = std::countl_zero(mantissa);
55 mantissa <<= shift + 1;
59 }
else if (exponent == 16) {
67 auto r = (sign << 31) | (exponent << 23) | mantissa;
68 return std::bit_cast<float>(r);
77 for (
size_t i = 0; i != 65536; ++i) {
84constexpr auto half_to_float_table = half_to_float_table_init();
89hi_target(
"sse,sse2,f16c") [[nodiscard]]
inline std::array<float, 4> half_to_float_f16c(std::array<uint16_t, 4> v)
noexcept
91 auto const r = _mm_cvtph_ps(_mm_set1_epi64x(std::bit_cast<int64_t>(v)));
93 auto r_ = std::array<float, 4>{};
94 _mm_storeu_ps(r_.data(), r);
98[[nodiscard]]
inline float half_to_float_f16c(uint16_t v)
noexcept
100 auto v_ = std::array<uint16_t, 4>{};
102 auto const r = half_to_float_f16c(v_);
103 return std::get<0>(r);
107[[nodiscard]]
constexpr std::array<float, 4> half_to_float(std::array<uint16_t, 4> v)
noexcept
109 if (not std::is_constant_evaluated()) {
112 return half_to_float_f16c(v);
117 auto r = std::array<float, 4>{};
118 for (
size_t i = 0; i != 4; ++i) {
119 r[i] = detail::half_to_float_table[v[i]];
124[[nodiscard]]
constexpr float half_to_float(uint16_t v)
noexcept
126 if (not std::is_constant_evaluated()) {
129 auto const v_ = std::array<uint16_t,4>{v, 0, 0, 0};
130 auto const r = half_to_float_f16c(v_);
131 return std::get<0>(r);
136 return detail::half_to_float_table[v];
The HikoGUI namespace.
Definition array_generic.hpp:21
The HikoGUI API version 1.
Definition array_generic.hpp:22
constexpr float half_to_float_generic(uint16_t u16) noexcept
Convert half to float.
Definition half_to_float.hpp:32
bool has_f16c() noexcept
This CPU has float-16 conversion instructions.
Definition cpu_id_x86.hpp:752