HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
float16.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "utility.hpp"
8#include "architecture.hpp"
9#include <cstdint>
10#include <type_traits>
11
12hi_warning_push();
13// C26472: Don't use static_cast for arithmetic conversions, Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1).
14// static_cast here is used to extract bits and cause sign-extension.
15hi_warning_ignore_msvc(26472);
16
17namespace hi::inline v1 {
18
19constexpr uint32_t float16_bias = 15;
20constexpr uint32_t float32_bias = 127;
21constexpr uint32_t f32_to_f16_adjustment_exponent = float32_bias - float16_bias;
22constexpr uint32_t f32_to_f16_lowest_normal_exponent = 0x01 + f32_to_f16_adjustment_exponent;
23constexpr uint32_t f32_to_f16_infinite_exponent = 0x1f + f32_to_f16_adjustment_exponent;
24constexpr uint32_t f32_to_f16_adjustment = f32_to_f16_adjustment_exponent << 23;
25constexpr uint32_t f32_to_f16_lowest_normal = f32_to_f16_lowest_normal_exponent << 23;
26constexpr uint32_t f32_to_f16_infinite = f32_to_f16_infinite_exponent << 23;
27
28constexpr float cvtsh_ss(uint16_t value) noexcept
29{
30 // Convert the 16 bit values to 32 bit with leading zeros.
31 uint32_t u = value;
32
33 // Extract the sign bit.
34 hilet sign = (u >> 15) << 31;
35
36 // Strip the sign bit and align the exponent/mantissa boundary to a float 32.
37 u = (u << 17) >> 4;
38
39 // Adjust the bias. f32_to_f16_adjustment
40 u = u + f32_to_f16_adjustment;
41
42 // Get a mask of '1' bits when the half-float would be normal or infinite.
43 hilet is_normal = u > (f32_to_f16_lowest_normal - 1);
44
45 // Add the sign bit back in.
46 u = u | sign;
47
48 // Keep the value if normal, if denormal make it zero.
49 u = is_normal ? u : 0;
50
51 return std::bit_cast<float>(u);
52}
53
54constexpr uint16_t cvtss_sh(float value) noexcept
55{
56 // Interpret the floating point number as 32 bit-field.
57 auto u = std::bit_cast<uint32_t>(value);
58
59 // Get the sign of the floating point number as a bit mask of the upper 17 bits.
60 hilet sign = static_cast<uint32_t>(static_cast<int32_t>(u) >> 31) << 15;
61
62 // Strip sign bit.
63 u = (u << 1) >> 1;
64
65 // Get a mask of '1' bits when the half-float would be normal or infinite.
66 hilet is_normal = u > (f32_to_f16_lowest_normal - 1);
67
68 // Clamp the floating point number to where the half-float would be infinite.
69 u = std::min(u, f32_to_f16_infinite); // SSE4.1
70
71 // Convert the bias from float to half-float.
72 u = u - f32_to_f16_adjustment;
73
74 // Shift the float until it becomes a half-float. This truncates the mantissa.
75 u = u >> 13;
76
77 // Keep the value if normal, if denormal make it zero.
78 u = is_normal ? u : 0;
79
80 // Add the sign bit back in, also set the upper 16 bits so that saturated pack
81 // will work correctly when converting to int16.
82 u = u | sign;
83
84 // Saturate and pack the 32 bit integers to 16 bit integers.
85 return static_cast<uint16_t>(u);
86}
87
88class float16 {
89 uint16_t v;
90
91public:
92 constexpr float16() noexcept : v(0) {}
93 constexpr float16(float16 const &) noexcept = default;
94 constexpr float16(float16 &&) noexcept = default;
95 constexpr float16 &operator=(float16 const &) noexcept = default;
96 constexpr float16 &operator=(float16 &&) noexcept = default;
97
98 constexpr explicit float16(float other) noexcept : v(cvtss_sh(other)) {}
99 constexpr explicit float16(double other) noexcept : float16(static_cast<float>(other)) {}
100 constexpr explicit float16(long double other) noexcept : float16(static_cast<float>(other)) {}
101
102 constexpr float16 &operator=(float other) noexcept
103 {
104 v = cvtss_sh(other);
105 return *this;
106 }
107
108 constexpr operator float() const noexcept
109 {
110 return cvtsh_ss(v);
111 }
112
113 [[nodiscard]] static constexpr float16 from_uint16_t(uint16_t const rhs) noexcept
114 {
115 auto r = float16{};
116 r.v = rhs;
117 return r;
118 }
119
120 [[nodiscard]] constexpr uint16_t get() const noexcept
121 {
122 return v;
123 }
124
125 constexpr float16 &set(uint16_t rhs) noexcept
126 {
127 v = rhs;
128 return *this;
129 }
130
131 [[nodiscard]] std::size_t hash() const noexcept
132 {
133 return std::hash<uint16_t>{}(v);
134 }
135
136 [[nodiscard]] constexpr friend bool operator==(float16 const &lhs, float16 const &rhs) noexcept = default;
137
138 [[nodiscard]] constexpr friend float16 operator*(float16 const &lhs, float16 const &rhs) noexcept
139 {
140 return float16{static_cast<float>(lhs) * static_cast<float>(rhs)};
141 }
142};
143
144} // namespace hi::inline v1
145
146template<>
147struct std::hash<hi::float16> {
148 std::size_t operator()(hi::float16 const &rhs) noexcept
149 {
150 return rhs.hash();
151 }
152};
153
154
155template<>
156struct std::numeric_limits<hi::float16> {
157 using value_type = hi::float16;
158
159 static constexpr bool is_specialized = true;
160 static constexpr bool is_signed = true;
161 static constexpr bool is_integer = false;
162 static constexpr bool is_exact = false;
163 static constexpr bool has_infinity = true;
164 static constexpr bool has_quiet_NaN = true;
165 static constexpr bool has_signaling_NaN = false;
166 static constexpr float_denorm_style has_denorm = std::denorm_present;
167 static constexpr bool has_denorm_loss = false;
168 static constexpr float_round_style round_style = std::round_to_nearest;
169 static constexpr bool is_iec559 = true;
170 static constexpr bool is_bounded = true;
171 static constexpr bool is_modulo = false;
172 static constexpr int digits = 10;
173 static constexpr int digits10 = 4;
174 static constexpr int max_digits10 = 4;
175 static constexpr int min_exponent = -14;
176 static constexpr int min_exponent10 = -3;
177 static constexpr int max_exponent = 15;
178 static constexpr int max_exponent10 = 3;
179 static constexpr bool traps = false;
180 static constexpr bool tinyness_before = false;
181
182 static constexpr value_type min() noexcept
183 {
184 return hi::float16::from_uint16_t(0x0400);
185 }
186
187 static constexpr value_type lowest() noexcept
188 {
189 return hi::float16::from_uint16_t(0xfbff);
190 }
191
192 static constexpr value_type max() noexcept
193 {
194 return hi::float16::from_uint16_t(0x7bff);
195 }
196
197 static constexpr value_type epsilon() noexcept
198 {
199 return hi::float16::from_uint16_t(0xfbff);
200 }
201
202 static constexpr value_type round_error() noexcept
203 {
204 return hi::float16::from_uint16_t(0x3800); // 0.5
205 }
206
207 static constexpr value_type infinity() noexcept
208 {
209 return hi::float16::from_uint16_t(0x7c00);
210 }
211
212 static constexpr value_type quiet_NaN() noexcept
213 {
214 return hi::float16::from_uint16_t(0x7c01);
215 }
216
217 static constexpr value_type signaling_NaN() noexcept
218 {
219 return hi::float16::from_uint16_t(0x7e01);
220 }
221
222 static constexpr value_type denorm_min() noexcept
223 {
224 return hi::float16::from_uint16_t(0x0001);
225 }
226};
227
228hi_warning_pop();
Utilities used by the HikoGUI library itself.
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
Functions and macros for handling architectural difference between compilers, CPUs and operating syst...
DOXYGEN BUG.
Definition algorithm.hpp:15
The HikoGUI namespace.
Definition ascii.hpp:19
Definition float16.hpp:88
T denorm_min(T... args)
T epsilon(T... args)
T infinity(T... args)
T lowest(T... args)
T max(T... args)
T min(T... args)
T operator()(T... args)
T quiet_NaN(T... args)
T round_error(T... args)
T signaling_NaN(T... args)