HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
grapheme.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../utility/module.hpp"
8#include "../i18n/module.hpp"
9#include "../container/module.hpp"
10#include "../telemetry/log.hpp"
11#include "../strings.hpp"
12#include "unicode_normalization.hpp"
13#include "ucd_general_categories.hpp"
14#include "phrasing.hpp"
15#include <cstdint>
16#include <string>
17#include <string_view>
18#include <cstddef>
19#include <memory>
20#include <vector>
21#include <algorithm>
22#include <bit>
23
24namespace hi::inline v1 {
25namespace detail {
26
27inline auto long_graphemes = ::hi::stable_set<std::u32string>{};
28
29} // namespace detail
30
31struct composed_t {};
32
44struct grapheme {
45 using value_type = uint64_t;
46
59 value_type _value;
60
61 constexpr grapheme() noexcept = default;
62 constexpr grapheme(grapheme const&) noexcept = default;
63 constexpr grapheme(grapheme&&) noexcept = default;
64 constexpr grapheme& operator=(grapheme const&) noexcept = default;
65 constexpr grapheme& operator=(grapheme&&) noexcept = default;
66
67 constexpr grapheme(intrinsic_t, value_type value) : _value(value) {}
68
69 constexpr value_type& intrinsic() noexcept
70 {
71 return _value;
72 }
73
74 constexpr value_type const& intrinsic() const noexcept
75 {
76 return _value;
77 }
78
81 constexpr grapheme(char32_t code_point) noexcept : _value(char_cast<value_type>(code_point))
82 {
83 hi_axiom(code_point <= 0x10'ffff);
84 }
85
86 constexpr grapheme(char ascii_char) noexcept : _value(char_cast<value_type>(ascii_char))
87 {
88 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
89 }
90
93 constexpr grapheme& operator=(char32_t code_point) noexcept
94 {
95 _value = char_cast<value_type>(code_point);
96 return *this;
97 }
98
101 constexpr grapheme& operator=(char ascii_char) noexcept
102 {
103 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
104 _value = char_cast<value_type>(ascii_char);
105 return *this;
106 }
107
112 constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
113 {
114 switch (code_points.size()) {
115 case 0:
117
118 case 1:
119 _value = char_cast<value_type>(code_points[0]);
120 break;
121
122 default:
123 hilet index = detail::long_graphemes.insert(std::u32string{code_points});
124 if (index < 0x0f'0000) {
125 _value = narrow_cast<value_type>(index + 0x11'0000);
126
127 } else {
128 [[unlikely]] hi_log_error_once(
129 "grapheme::error::too-many", "Too many long graphemes encoded, replacing with U+fffd");
130 _value = char_cast<value_type>(U'\ufffd');
131 }
132 }
133 }
134
139 constexpr explicit grapheme(std::u32string_view code_points) noexcept :
140 grapheme(composed_t{}, unicode_normalize(code_points, unicode_normalize_config::NFC()))
141 {
142 }
143
146 [[nodiscard]] constexpr uint32_t index() const noexcept
147 {
148 return _value & 0x1f'ffff;
149 }
150
151 [[nodiscard]] constexpr iso_639 language() const noexcept
152 {
153 return iso_639{intrinsic_t{}, narrow_cast<uint16_t>((_value >> 21) & 0x7fff)};
154 }
155
156 constexpr void set_language(iso_639 rhs) noexcept
157 {
158 hi_axiom(rhs.intrinsic() <= 0x7fff);
159
160 constexpr auto mask = ~(value_type{0x7fff} << 21);
161 _value &= mask;
162 _value |= wide_cast<value_type>(rhs.intrinsic()) << 21;
163 }
164
165 [[nodiscard]] constexpr iso_15924 script() const noexcept
166 {
167 return iso_15924{intrinsic_t{}, narrow_cast<uint16_t>((_value >> 36) & 0x3ff)};
168 }
169
170 constexpr void set_script(iso_15924 rhs) noexcept
171 {
172 hi_axiom(rhs.intrinsic() < 1000);
173
174 constexpr auto mask = ~(value_type{0x3ff} << 36);
175 _value &= mask;
176 _value |= wide_cast<value_type>(rhs.intrinsic()) << 36;
177 }
178
179 [[nodiscard]] constexpr iso_3166 region() const noexcept
180 {
181 return iso_3166{intrinsic_t{}, narrow_cast<uint16_t>((_value >> 46) & 0x3ff)};
182 }
183
184 constexpr void set_region(iso_3166 rhs) noexcept
185 {
186 hi_axiom(rhs.intrinsic() < 1000);
187
188 constexpr auto mask = ~(value_type{0x3ff} << 46);
189 _value &= mask;
190 _value |= wide_cast<value_type>(rhs.intrinsic()) << 46;
191 }
192
193 [[nodiscard]] constexpr hi::language_tag language_tag() const noexcept
194 {
195 auto tmp = _value;
196 tmp >>= 21;
197 hilet language_ = iso_639{intrinsic_t{}, narrow_cast<uint16_t>(tmp & 0x7fff)};
198 tmp >>= 15;
199 hilet script_ = iso_15924{intrinsic_t{}, narrow_cast<uint16_t>(tmp & 0x3ff)};
200 tmp >>= 15;
201 hilet region_ = iso_3166{intrinsic_t{}, narrow_cast<uint16_t>(tmp & 0x3ff)};
202 return hi::language_tag{language_, script_, region_};
203 }
204
205 constexpr void set_language_tag(hi::language_tag rhs) noexcept
206 {
207 hi_axiom(rhs.region.intrinsic() <= 0x7fff);
208 hi_axiom(rhs.script.intrinsic() < 1000);
209 hi_axiom(rhs.language.intrinsic() < 1000);
210
211 auto tmp = wide_cast<value_type>(rhs.region.intrinsic());
212 tmp <<= 10;
213 tmp |= rhs.script.intrinsic();
214 tmp <<= 15;
215 tmp |= rhs.language.intrinsic();
216 tmp <<= 21;
217
218 constexpr auto mask = ~(uint64_t{0x7'ffff} << 21);
219 _value &= mask;
220 _value |= tmp;
221 }
222
223 [[nodiscard]] constexpr hi::phrasing phrasing() const noexcept
224 {
225 return static_cast<hi::phrasing>((_value >> 56) & 0x3f);
226 }
227
228 constexpr void set_phrasing(hi::phrasing rhs) noexcept
229 {
230 hi_axiom(std::to_underlying(rhs) <= 0x3f);
231
232 constexpr auto mask = ~(value_type{0x3f} << 56);
233 _value &= mask;
234 _value |= static_cast<value_type>(rhs) << 56;
235 }
236
237 [[nodiscard]] std::u32string const& long_grapheme() const noexcept
238 {
239 hilet i = index();
240 hi_axiom(i > 0x10'ffff and i <= 0x1f'ffff);
241 return detail::long_graphemes[i - 0x11'0000];
242 }
243
246 [[nodiscard]] constexpr std::size_t size() const noexcept
247 {
248 return index() <= 0x10'ffff ? 1_uz : long_grapheme().size();
249 }
250
257 [[nodiscard]] constexpr char32_t operator[](size_t i) const noexcept
258 {
259 if (hilet code_point = index(); code_point <= 0x10'ffff) {
260 hi_axiom(i == 0);
261 return char_cast<char32_t>(code_point);
262 } else {
263 hi_axiom_bounds(i, *this);
264 return long_grapheme()[i];
265 }
266 }
267
275 template<size_t I>
276 [[nodiscard]] friend constexpr char32_t get(grapheme const& rhs) noexcept
277 {
278 if (hilet code_point = rhs.index(); code_point <= 0x10'ffff) {
279 hi_axiom(I == 0);
280 return code_point;
281
282 } else {
283 hi_axiom_bounds(I, rhs);
284 return rhs.long_grapheme()[I];
285 }
286 }
287
290 [[nodiscard]] constexpr std::u32string composed() const noexcept
291 {
292 if (hilet code_point = index(); code_point <= 0x10'ffff) {
293 return std::u32string{char_cast<char32_t>(code_point)};
294
295 } else {
296 return long_grapheme();
297 }
298 }
299
302 [[nodiscard]] constexpr std::u32string
303 decomposed(unicode_normalize_config config = unicode_normalize_config::NFD()) const noexcept
304 {
305 return unicode_decompose(composed(), config);
306 }
307
312 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, grapheme const& rhs) noexcept
313 {
314 return lhs.index() == rhs.index();
315 }
316
317 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, char32_t const& rhs) noexcept
318 {
319 hi_axiom(char_cast<value_type>(rhs) <= 0x10'ffff);
320 return lhs.index() == char_cast<value_type>(rhs);
321 }
322
323 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, char const& rhs) noexcept
324 {
325 hi_axiom(char_cast<value_type>(rhs) <= 0x7f);
326 return lhs.index() == char_cast<value_type>(rhs);
327 }
328
331 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, grapheme const& rhs) noexcept
332 {
333 return lhs.decomposed() <=> rhs.decomposed();
334 }
335
336 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, char32_t const& rhs) noexcept
337 {
338 return lhs <=> grapheme{rhs};
339 }
340
341 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, char const& rhs) noexcept
342 {
343 return lhs <=> grapheme{rhs};
344 }
345
346 [[nodiscard]] friend constexpr std::string to_string(grapheme const& rhs) noexcept
347 {
348 return hi::to_string(rhs.composed());
349 }
350
351 [[nodiscard]] friend constexpr std::wstring to_wstring(grapheme const& rhs) noexcept
352 {
353 return hi::to_wstring(rhs.composed());
354 }
355
356 [[nodiscard]] friend constexpr std::u32string to_u32string(grapheme const& rhs) noexcept
357 {
358 return rhs.composed();
359 }
360};
361
362} // namespace hi::inline v1
363
364template<>
365struct std::hash<hi::grapheme> {
366 [[nodiscard]] std::size_t operator()(hi::grapheme const& rhs) const noexcept
367 {
368 return std::hash<hi::grapheme::value_type>{}(rhs._value);
369 }
370};
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:264
#define hi_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:279
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
constexpr std::string to_string(std::u32string_view rhs) noexcept
Conversion from UTF-32 to UTF-8.
Definition to_string.hpp:215
constexpr std::u32string to_u32string(std::u32string_view rhs) noexcept
Identity conversion from UTF-32 to UTF-32.
Definition to_string.hpp:23
constexpr std::wstring to_wstring(std::u32string_view rhs) noexcept
Conversion from UTF-32 to wide-string (UTF-16/32).
Definition to_string.hpp:155
@ grapheme
The gui_event has grapheme data.
phrasing
Phrasing.
Definition phrasing.hpp:27
DOXYGEN BUG.
Definition algorithm.hpp:13
constexpr std::u32string unicode_normalize(std::u32string_view text, unicode_normalize_config config=unicode_normalize_config::NFC()) noexcept
Convert text to a Unicode composed normal form.
Definition unicode_normalization.hpp:303
geometry/margins.hpp
Definition cache.hpp:11
ISO-639 language code.
Definition iso_639.hpp:23
Definition grapheme.hpp:31
A grapheme-cluster, what a user thinks a character is.
Definition grapheme.hpp:44
constexpr grapheme(std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:139
friend constexpr bool operator==(grapheme const &lhs, grapheme const &rhs) noexcept
Compare equivalence of two graphemes.
Definition grapheme.hpp:312
friend constexpr std::strong_ordering operator<=>(grapheme const &lhs, grapheme const &rhs) noexcept
Compare two graphemes lexicographically.
Definition grapheme.hpp:331
constexpr char32_t operator[](size_t i) const noexcept
Get the code-point at the given index.
Definition grapheme.hpp:257
constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:112
constexpr uint32_t index() const noexcept
Get the codepoint/index part of the grapheme.
Definition grapheme.hpp:146
constexpr grapheme(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:81
constexpr grapheme & operator=(char ascii_char) noexcept
Encode a single code-point.
Definition grapheme.hpp:101
value_type _value
The grapheme's value.
Definition grapheme.hpp:59
constexpr std::u32string decomposed(unicode_normalize_config config=unicode_normalize_config::NFD()) const noexcept
Get a list of code-point normalized to NFD.
Definition grapheme.hpp:303
constexpr std::u32string composed() const noexcept
Get a list of code-point normalized to NFC.
Definition grapheme.hpp:290
constexpr grapheme & operator=(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:93
constexpr std::size_t size() const noexcept
Return the number of code-points encoded in the grapheme.
Definition grapheme.hpp:246
friend constexpr char32_t get(grapheme const &rhs) noexcept
Get the code-point at the given index.
Definition grapheme.hpp:276
Definition unicode_normalization.hpp:19
T operator()(T... args)
T to_string(T... args)
T to_wstring(T... args)