HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
float16.hpp
1// Copyright Take Vos 2020-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "required.hpp"
8#include <cstdint>
9#include <type_traits>
10
11namespace hi::inline v1 {
12
13constexpr uint32_t float16_bias = 15;
14constexpr uint32_t float32_bias = 127;
15constexpr uint32_t f32_to_f16_adjustment_exponent = float32_bias - float16_bias;
16constexpr uint32_t f32_to_f16_lowest_normal_exponent = 0x01 + f32_to_f16_adjustment_exponent;
17constexpr uint32_t f32_to_f16_infinite_exponent = 0x1f + f32_to_f16_adjustment_exponent;
18constexpr uint32_t f32_to_f16_adjustment = f32_to_f16_adjustment_exponent << 23;
19constexpr uint32_t f32_to_f16_lowest_normal = f32_to_f16_lowest_normal_exponent << 23;
20constexpr uint32_t f32_to_f16_infinite = f32_to_f16_infinite_exponent << 23;
21
22constexpr float cvtsh_ss(uint16_t value) noexcept
23{
24 // Convert the 16 bit values to 32 bit with leading zeros.
25 uint32_t u = value;
26
27 // Extract the sign bit.
28 hilet sign = (u >> 15) << 31;
29
30 // Strip the sign bit and align the exponent/mantissa boundary to a float 32.
31 u = (u << 17) >> 4;
32
33 // Adjust the bias. f32_to_f16_adjustment
34 u = u + f32_to_f16_adjustment;
35
36 // Get a mask of '1' bits when the half-float would be normal or infinite.
37 hilet is_normal = u > (f32_to_f16_lowest_normal - 1);
38
39 // Add the sign bit back in.
40 u = u | sign;
41
42 // Keep the value if normal, if denormal make it zero.
43 u = is_normal ? u : 0;
44
45 return std::bit_cast<float>(u);
46}
47
48constexpr uint16_t cvtss_sh(float value) noexcept
49{
50 // Interpret the floating point number as 32 bit-field.
51 auto u = std::bit_cast<uint32_t>(value);
52
53 // Get the sign of the floating point number as a bit mask of the upper 17 bits.
54 hilet sign = static_cast<uint32_t>(static_cast<int32_t>(u) >> 31) << 15;
55
56 // Strip sign bit.
57 u = (u << 1) >> 1;
58
59 // Get a mask of '1' bits when the half-float would be normal or infinite.
60 hilet is_normal = u > (f32_to_f16_lowest_normal - 1);
61
62 // Clamp the floating point number to where the half-float would be infinite.
63 u = std::min(u, f32_to_f16_infinite); // SSE4.1
64
65 // Convert the bias from float to half-float.
66 u = u - f32_to_f16_adjustment;
67
68 // Shift the float until it becomes a half-float. This truncates the mantissa.
69 u = u >> 13;
70
71 // Keep the value if normal, if denormal make it zero.
72 u = is_normal ? u : 0;
73
74 // Add the sign bit back in, also set the upper 16 bits so that saturated pack
75 // will work correctly when converting to int16.
76 u = u | sign;
77
78 // Saturate and pack the 32 bit integers to 16 bit integers.
79 return static_cast<uint16_t>(u);
80}
81
82class float16 {
83 uint16_t v;
84
85public:
86 constexpr float16() noexcept : v(0) {}
87 constexpr float16(float16 const &) noexcept = default;
88 constexpr float16(float16 &&) noexcept = default;
89 constexpr float16 &operator=(float16 const &) noexcept = default;
90 constexpr float16 &operator=(float16 &&) noexcept = default;
91
92 constexpr explicit float16(float other) noexcept : v(cvtss_sh(other)) {}
93 constexpr explicit float16(double other) noexcept : float16(static_cast<float>(other)) {}
94 constexpr explicit float16(long double other) noexcept : float16(static_cast<float>(other)) {}
95
96 constexpr float16 &operator=(float other) noexcept
97 {
98 v = cvtss_sh(other);
99 return *this;
100 }
101
102 constexpr operator float() const noexcept
103 {
104 return cvtsh_ss(v);
105 }
106
107 [[nodiscard]] static constexpr float16 from_uint16_t(uint16_t const rhs) noexcept
108 {
109 auto r = float16{};
110 r.v = rhs;
111 return r;
112 }
113
114 [[nodiscard]] constexpr uint16_t get() const noexcept
115 {
116 return v;
117 }
118
119 constexpr float16 &set(uint16_t rhs) noexcept
120 {
121 v = rhs;
122 return *this;
123 }
124
125 [[nodiscard]] std::size_t hash() const noexcept
126 {
127 return std::hash<uint16_t>{}(v);
128 }
129
130 [[nodiscard]] constexpr friend bool operator==(float16 const &lhs, float16 const &rhs) noexcept = default;
131
132 [[nodiscard]] constexpr friend float16 operator*(float16 const &lhs, float16 const &rhs) noexcept
133 {
134 return float16{static_cast<float>(lhs) * static_cast<float>(rhs)};
135 }
136};
137
138} // namespace hi::inline v1
139
140template<>
141struct std::hash<hi::float16> {
142 std::size_t operator()(hi::float16 const &rhs) noexcept
143 {
144 return rhs.hash();
145 }
146};
147
148
149template<>
150struct std::numeric_limits<hi::float16> {
151 using value_type = hi::float16;
152
153 static constexpr bool is_specialized = true;
154 static constexpr bool is_signed = true;
155 static constexpr bool is_integer = false;
156 static constexpr bool is_exact = false;
157 static constexpr bool has_infinity = true;
158 static constexpr bool has_quiet_NaN = true;
159 static constexpr bool has_signaling_NaN = false;
160 static constexpr float_denorm_style has_denorm = std::denorm_present;
161 static constexpr bool has_denorm_loss = false;
162 static constexpr float_round_style round_style = std::round_to_nearest;
163 static constexpr bool is_iec559 = true;
164 static constexpr bool is_bounded = true;
165 static constexpr bool is_modulo = false;
166 static constexpr int digits = 10;
167 static constexpr int digits10 = 4;
168 static constexpr int max_digits10 = 4;
169 static constexpr int min_exponent = -14;
170 static constexpr int min_exponent10 = -3;
171 static constexpr int max_exponent = 15;
172 static constexpr int max_exponent10 = 3;
173 static constexpr bool traps = false;
174 static constexpr bool tinyness_before = false;
175
176 static constexpr value_type min() noexcept
177 {
178 return hi::float16::from_uint16_t(0x0400);
179 }
180
181 static constexpr value_type lowest() noexcept
182 {
183 return hi::float16::from_uint16_t(0xfbff);
184 }
185
186 static constexpr value_type max() noexcept
187 {
188 return hi::float16::from_uint16_t(0x7bff);
189 }
190
191 static constexpr value_type epsilon() noexcept
192 {
193 return hi::float16::from_uint16_t(0xfbff);
194 }
195
196 static constexpr value_type round_error() noexcept
197 {
198 return hi::float16::from_uint16_t(0x3800); // 0.5
199 }
200
201 static constexpr value_type infinity() noexcept
202 {
203 return hi::float16::from_uint16_t(0x7c00);
204 }
205
206 static constexpr value_type quiet_NaN() noexcept
207 {
208 return hi::float16::from_uint16_t(0x7c01);
209 }
210
211 static constexpr value_type signaling_NaN() noexcept
212 {
213 return hi::float16::from_uint16_t(0x7e01);
214 }
215
216 static constexpr value_type denorm_min() noexcept
217 {
218 return hi::float16::from_uint16_t(0x0001);
219 }
220};
This file includes required definitions.
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
Definition float16.hpp:82
T denorm_min(T... args)
T epsilon(T... args)
T infinity(T... args)
T lowest(T... args)
T max(T... args)
T min(T... args)
T operator()(T... args)
T quiet_NaN(T... args)
T round_error(T... args)
T signaling_NaN(T... args)