HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
float16.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../macros.hpp"
8#include <cstdint>
9#include <type_traits>
10#include <bit>
11#include <algorithm>
12#include <numeric>
13#include <format>
14
15hi_export_module(hikogui.utility.float16);
16
17hi_warning_push();
18// C26472: Don't use static_cast for arithmetic conversions, Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1).
19// static_cast here is used to extract bits and cause sign-extension.
20hi_warning_ignore_msvc(26472);
21
22namespace hi::inline v1 {
23
24constexpr uint32_t float16_bias = 15;
25constexpr uint32_t float32_bias = 127;
26constexpr uint32_t f32_to_f16_adjustment_exponent = float32_bias - float16_bias;
27constexpr uint32_t f32_to_f16_lowest_normal_exponent = 0x01 + f32_to_f16_adjustment_exponent;
28constexpr uint32_t f32_to_f16_infinite_exponent = 0x1f + f32_to_f16_adjustment_exponent;
29constexpr uint32_t f32_to_f16_adjustment = f32_to_f16_adjustment_exponent << 23;
30constexpr uint32_t f32_to_f16_lowest_normal = f32_to_f16_lowest_normal_exponent << 23;
31constexpr uint32_t f32_to_f16_infinite = f32_to_f16_infinite_exponent << 23;
32
33constexpr float cvtsh_ss(uint16_t value) noexcept
34{
35 // Convert the 16 bit values to 32 bit with leading zeros.
36 uint32_t u = value;
37
38 // Extract the sign bit.
39 hilet sign = (u >> 15) << 31;
40
41 // Strip the sign bit and align the exponent/mantissa boundary to a float 32.
42 u = (u << 17) >> 4;
43
44 // Adjust the bias. f32_to_f16_adjustment
45 u = u + f32_to_f16_adjustment;
46
47 // Get a mask of '1' bits when the half-float would be normal or infinite.
48 hilet is_normal = u > (f32_to_f16_lowest_normal - 1);
49
50 // Add the sign bit back in.
51 u = u | sign;
52
53 // Keep the value if normal, if denormal make it zero.
54 u = is_normal ? u : 0;
55
56 return std::bit_cast<float>(u);
57}
58
59constexpr uint16_t cvtss_sh(float value) noexcept
60{
61 // Interpret the floating point number as 32 bit-field.
62 auto u = std::bit_cast<uint32_t>(value);
63
64 // Get the sign of the floating point number as a bit mask of the upper 17 bits.
65 hilet sign = static_cast<uint32_t>(static_cast<int32_t>(u) >> 31) << 15;
66
67 // Strip sign bit.
68 u = (u << 1) >> 1;
69
70 // Get a mask of '1' bits when the half-float would be normal or infinite.
71 hilet is_normal = u > (f32_to_f16_lowest_normal - 1);
72
73 // Clamp the floating point number to where the half-float would be infinite.
74 u = std::min(u, f32_to_f16_infinite); // SSE4.1
75
76 // Convert the bias from float to half-float.
77 u = u - f32_to_f16_adjustment;
78
79 // Shift the float until it becomes a half-float. This truncates the mantissa.
80 u = u >> 13;
81
82 // Keep the value if normal, if denormal make it zero.
83 u = is_normal ? u : 0;
84
85 // Add the sign bit back in, also set the upper 16 bits so that saturated pack
86 // will work correctly when converting to int16.
87 u = u | sign;
88
89 // Saturate and pack the 32 bit integers to 16 bit integers.
90 return static_cast<uint16_t>(u);
91}
92
93hi_export struct float16 {
94 uint16_t v = 0;
95
96 constexpr float16() noexcept = default;
97 ~float16() = default;
98 constexpr float16(float16 const&) noexcept = default;
99 constexpr float16(float16&&) noexcept = default;
100 constexpr float16& operator=(float16 const&) noexcept = default;
101 constexpr float16& operator=(float16&&) noexcept = default;
102
103 constexpr explicit float16(float other) noexcept : v(cvtss_sh(other)) {}
104 constexpr explicit float16(double other) noexcept : float16(static_cast<float>(other)) {}
105 constexpr explicit float16(long double other) noexcept : float16(static_cast<float>(other)) {}
106
107 constexpr float16& operator=(float other) noexcept
108 {
109 v = cvtss_sh(other);
110 return *this;
111 }
112
113 constexpr operator float() const noexcept
114 {
115 return cvtsh_ss(v);
116 }
117
118 [[nodiscard]] constexpr static float16 from_uint16_t(uint16_t const rhs) noexcept
119 {
120 auto r = float16{};
121 r.v = rhs;
122 return r;
123 }
124
125 [[nodiscard]] constexpr uint16_t get() const noexcept
126 {
127 return v;
128 }
129
130 constexpr float16& set(uint16_t rhs) noexcept
131 {
132 v = rhs;
133 return *this;
134 }
135
136 [[nodiscard]] std::size_t hash() const noexcept
137 {
138 return std::hash<uint16_t>{}(v);
139 }
140
141 [[nodiscard]] constexpr friend bool operator==(float16 const& lhs, float16 const& rhs) noexcept
142 {
143 return static_cast<float>(lhs) == static_cast<float>(rhs);
144 }
145
146 [[nodiscard]] constexpr friend auto operator<=>(float16 const& lhs, float16 const& rhs) noexcept
147 {
148 return static_cast<float>(lhs) <=> static_cast<float>(rhs);
149 }
150
151#define HI_X_binary_math_op(op) \
152 [[nodiscard]] constexpr friend float16 operator op(float16 const& lhs, float16 const& rhs) noexcept \
153 { \
154 return float16{static_cast<float>(lhs) op static_cast<float>(rhs)}; \
155 }
156
157 // clang-format off
158 HI_X_binary_math_op(+)
159 HI_X_binary_math_op(-)
160 HI_X_binary_math_op(*)
161 HI_X_binary_math_op(/)
162 // clang-format on
163#undef HI_X_binary_math_op
164
165 //[[nodiscard]] constexpr friend float16 operator%(float16 const& lhs, float16 const& rhs) noexcept
166 //{
167 // hilet lhs_ = static_cast<float>(lhs);
168 // hilet rhs_ = static_cast<float>(rhs);
169 // hilet div_result = std::floor(lhs_ / rhs_);
170 // return float16{lhs_ - (div_result * rhs_)};
171 //}
172
173#define HI_X_binary_bit_op(op) \
174 [[nodiscard]] constexpr friend float16 operator op(float16 const& lhs, float16 const& rhs) noexcept \
175 { \
176 return float16::from_uint16_t(lhs.v op rhs.v); \
177 }
178
179 // clang-format off
180 HI_X_binary_bit_op(|)
181 HI_X_binary_bit_op(&)
182 HI_X_binary_bit_op(^)
183 // clang-format on
184#undef HI_X_binary_bit_op
185};
186
187// Check if float16 can be std::bit_cast<uint16_t>().
188static_assert(sizeof(float16) == sizeof(uint16_t));
189static_assert(std::is_trivially_copy_constructible_v<float16>);
190static_assert(std::is_trivially_move_constructible_v<float16>);
191static_assert(std::is_trivially_copy_assignable_v<float16>);
192static_assert(std::is_trivially_move_assignable_v<float16>);
193static_assert(std::is_trivially_destructible_v<float16>);
194
195static_assert(requires(float16 a) { std::bit_cast<uint16_t>(a); });
196static_assert(requires(uint16_t a) { std::bit_cast<float16>(a); });
197
198} // namespace hi::inline v1
199
200hi_export template<>
201struct std::hash<hi::float16> {
202 std::size_t operator()(hi::float16 const& rhs) noexcept
203 {
204 return rhs.hash();
205 }
206};
207
208hi_export template<typename CharT>
209struct std::formatter<hi::float16, CharT> : std::formatter<float, CharT> {
210 constexpr auto format(hi::float16 const& t, auto& fc) const
211 {
212 return std::formatter<float, CharT>::format(static_cast<float>(t), fc);
213 }
214};
215
216hi_export template<>
217struct std::numeric_limits<hi::float16> {
218 using value_type = hi::float16;
219
220 constexpr static bool is_specialized = true;
221 constexpr static bool is_signed = true;
222 constexpr static bool is_integer = false;
223 constexpr static bool is_exact = false;
224 constexpr static bool has_infinity = true;
225 constexpr static bool has_quiet_NaN = true;
226 constexpr static bool has_signaling_NaN = false;
227 constexpr static float_round_style round_style = std::round_to_nearest;
228 constexpr static bool is_iec559 = true;
229 constexpr static bool is_bounded = true;
230 constexpr static bool is_modulo = false;
231 constexpr static int digits = 10;
232 constexpr static int digits10 = 4;
233 constexpr static int max_digits10 = 4;
234 constexpr static int min_exponent = -14;
235 constexpr static int min_exponent10 = -3;
236 constexpr static int max_exponent = 15;
237 constexpr static int max_exponent10 = 3;
238 constexpr static bool traps = false;
239 constexpr static bool tinyness_before = false;
240
241 constexpr static value_type min() noexcept
242 {
243 return hi::float16::from_uint16_t(0x0400);
244 }
245
246 constexpr static value_type lowest() noexcept
247 {
248 return hi::float16::from_uint16_t(0xfbff);
249 }
250
251 constexpr static value_type max() noexcept
252 {
253 return hi::float16::from_uint16_t(0x7bff);
254 }
255
256 constexpr static value_type epsilon() noexcept
257 {
258 return hi::float16::from_uint16_t(0xfbff);
259 }
260
261 constexpr static value_type round_error() noexcept
262 {
263 return hi::float16::from_uint16_t(0x3800); // 0.5
264 }
265
266 constexpr static value_type infinity() noexcept
267 {
268 return hi::float16::from_uint16_t(0x7c00);
269 }
270
271 constexpr static value_type quiet_NaN() noexcept
272 {
273 return hi::float16::from_uint16_t(0x7c01);
274 }
275
276 constexpr static value_type signaling_NaN() noexcept
277 {
278 return hi::float16::from_uint16_t(0x7e01);
279 }
280
281 constexpr static value_type denorm_min() noexcept
282 {
283 return hi::float16::from_uint16_t(0x0001);
284 }
285};
286
287hi_warning_pop();
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
Definition float16.hpp:93
T denorm_min(T... args)
T epsilon(T... args)
T infinity(T... args)
T lowest(T... args)
T max(T... args)
T min(T... args)
T operator()(T... args)
T quiet_NaN(T... args)
T round_error(T... args)
T signaling_NaN(T... args)