HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
half_to_float.hpp
1// Copyright Take Vos 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "macros.hpp"
8#if defined(HI_HAS_X86)
9#include "cpu_id_x86.hpp"
10#else
11#include "cpu_id_generic.hpp"
12#endif
13#include <cstdint>
14#include <bit>
15#include <type_traits>
16#include <array>
17
18#ifdef HI_HAS_X86
19#include <immintrin.h>
20#include <emmintrin.h>
21#include <smmintrin.h>
22#endif
23
24hi_export_module(hikocpu : half_to_float);
25
26hi_export namespace hi { inline namespace v1 {
27
32[[nodiscard]] constexpr float half_to_float_generic(uint16_t u16) noexcept
33{
34 auto u32 = static_cast<uint32_t>(u16);
35
36 auto sign = static_cast<int32_t>(u32);
37 sign <<= 16;
38 sign >>= 31;
39 auto mantissa = u32;
40 mantissa &= 0x3ff;
41 mantissa <<= 22;
42 auto exponent = static_cast<int32_t>(u32);
43 exponent >>= 10;
44 exponent &= 0x1f;
45 exponent -= 15;
46
47 if (exponent == -15) {
48 if (mantissa == 0) {
49 // Zero.
50 exponent = -127;
51
52 } else {
53 // Denormal, translate to normal.
54 auto shift = std::countl_zero(mantissa);
55 mantissa <<= shift + 1;
56 exponent -= shift;
57 }
58
59 } else if (exponent == 16) {
60 // Infinite or NaN.
61 exponent = 128;
62 }
63
64 mantissa >>= 9;
65 exponent += 127;
66
67 auto r = (sign << 31) | (exponent << 23) | mantissa;
68 return std::bit_cast<float>(r);
69}
70
71namespace detail {
72
73[[nodiscard]] consteval std::array<float, 65536> half_to_float_table_init() noexcept
74{
75 auto r = std::array<float, 65536>{};
76
77 for (size_t i = 0; i != 65536; ++i) {
78 r[i] = half_to_float_generic(static_cast<uint16_t>(i));
79 }
80
81 return r;
82}
83
84constexpr auto half_to_float_table = half_to_float_table_init();
85
86}
87
88#if HI_HAS_X86
89hi_target("sse,sse2,f16c") [[nodiscard]] inline std::array<float, 4> half_to_float_f16c(std::array<uint16_t, 4> v) noexcept
90{
91 auto const r = _mm_cvtph_ps(_mm_set1_epi64x(std::bit_cast<int64_t>(v)));
92
93 auto r_ = std::array<float, 4>{};
94 _mm_storeu_ps(r_.data(), r);
95 return r_;
96}
97
98[[nodiscard]] inline float half_to_float_f16c(uint16_t v) noexcept
99{
100 auto v_ = std::array<uint16_t, 4>{};
101 std::get<0>(v_) = v;
102 auto const r = half_to_float_f16c(v_);
103 return std::get<0>(r);
104}
105#endif
106
107[[nodiscard]] constexpr std::array<float, 4> half_to_float(std::array<uint16_t, 4> v) noexcept
108{
109 if (not std::is_constant_evaluated()) {
110#if HI_HAS_X86
111 if (has_f16c()) {
112 return half_to_float_f16c(v);
113 }
114#endif
115 }
116
117 auto r = std::array<float, 4>{};
118 for (size_t i = 0; i != 4; ++i) {
119 r[i] = detail::half_to_float_table[v[i]];
120 }
121 return r;
122}
123
124[[nodiscard]] constexpr float half_to_float(uint16_t v) noexcept
125{
126 if (not std::is_constant_evaluated()) {
127#if HI_HAS_X86
128 if (has_f16c()) {
129 auto const v_ = std::array<uint16_t,4>{v, 0, 0, 0};
130 auto const r = half_to_float_f16c(v_);
131 return std::get<0>(r);
132 }
133#endif
134 }
135
136 return detail::half_to_float_table[v];
137}
138
139
140}}
141
The HikoGUI namespace.
Definition array_generic.hpp:20
constexpr float half_to_float_generic(uint16_t u16) noexcept
Convert half to float.
Definition half_to_float.hpp:32
bool has_f16c() noexcept
This CPU has float-16 conversion instructions.
Definition cpu_id_x86.hpp:752
DOXYGEN BUG.
Definition algorithm_misc.hpp:20