HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
grapheme.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../utility/module.hpp"
8#include "../strings.hpp"
9#include "../stable_set.hpp"
10#include "../log.hpp"
11#include "unicode_normalization.hpp"
12#include "ucd_general_categories.hpp"
13#include <cstdint>
14#include <string>
15#include <string_view>
16#include <cstddef>
17#include <memory>
18#include <vector>
19#include <algorithm>
20#include <bit>
21
22namespace hi::inline v1 {
23namespace detail {
24
25inline auto long_graphemes = ::hi::stable_set<std::u32string>{};
26
27} // namespace detail
28
29struct composed_t {};
30
42struct grapheme {
43 using value_type = uint32_t;
44
53 value_type _value;
54
55 constexpr grapheme() noexcept = default;
56 constexpr grapheme(grapheme const&) noexcept = default;
57 constexpr grapheme(grapheme&&) noexcept = default;
58 constexpr grapheme& operator=(grapheme const&) noexcept = default;
59 constexpr grapheme& operator=(grapheme&&) noexcept = default;
60
61 constexpr grapheme(intrinsic_t, value_type value) : _value(value) {}
62
63 constexpr value_type& intrinsic() noexcept
64 {
65 return _value;
66 }
67
68 constexpr value_type const& intrinsic() const noexcept
69 {
70 return _value;
71 }
72
75 constexpr grapheme(char32_t code_point) noexcept : _value(truncate<value_type>(code_point))
76 {
77 hi_axiom(code_point <= 0x10'ffff);
78 }
79
80 constexpr grapheme(char ascii_char) noexcept : _value(truncate<value_type>(ascii_char))
81 {
82 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
83 }
84
87 constexpr grapheme& operator=(char32_t code_point) noexcept
88 {
89 _value = truncate<value_type>(code_point);
90 return *this;
91 }
92
95 constexpr grapheme& operator=(char ascii_char) noexcept
96 {
97 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
98 _value = truncate<value_type>(ascii_char);
99 return *this;
100 }
101
106 constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
107 {
108 switch (code_points.size()) {
109 case 0:
111
112 case 1:
113 _value = truncate<value_type>(code_points[0]);
114 break;
115
116 default:
117 hilet index = detail::long_graphemes.insert(std::u32string{code_points});
118 if (index < 0x0f'0000) {
119 _value = narrow_cast<value_type>(index + 0x11'0000);
120 } else {
121 [[unlikely]] hi_log_error_once(
122 "grapheme::error::too-many", "Too many long graphemes encoded, replacing with U+fffd");
123 _value = 0x00'fffd;
124 }
125 }
126 }
127
132 constexpr explicit grapheme(std::u32string_view code_points) noexcept :
133 grapheme(composed_t{}, unicode_normalize(code_points, unicode_normalize_config::NFC()))
134 {
135 }
136
139 [[nodiscard]] constexpr std::u32string decomposed() const noexcept
140 {
141 return unicode_decompose(composed(), unicode_normalize_config::NFD());
142 }
143
151 [[nodiscard]] constexpr bool valid() const noexcept
152 {
153 if (is_noncharacter(get<0>(*this))) {
154 return false;
155 }
156
157 hilet general_category = ucd_get_general_category(get<0>(*this));
158 if (is_C(general_category)) {
159 return false;
160 }
161 if (is_M(general_category)) {
162 return false;
163 }
164 return true;
165 }
166
167 [[nodiscard]] std::u32string const& long_grapheme() const noexcept
168 {
169 hi_assert(_value > 0x10'ffff and _value <= 0x1f'ffff);
170 return detail::long_graphemes[_value - 0x11'0000];
171 }
172
175 [[nodiscard]] constexpr std::size_t size() const noexcept
176 {
177 if (_value <= 0x10'ffff) {
178 return 1;
179
180 } else {
181 return long_grapheme().size();
182 }
183 }
184
191 [[nodiscard]] constexpr char32_t operator[](size_t i) const noexcept
192 {
193 hi_assert_bounds(i, *this);
194
195 if (_value <= 0x10'ffff) {
196 return truncate<char32_t>(_value);
197 } else {
198 return long_grapheme()[i];
199 }
200 }
201
209 template<size_t I>
210 [[nodiscard]] friend constexpr char32_t get(grapheme const& rhs) noexcept
211 {
212 hi_assert_bounds(I, rhs);
213
214 if (rhs._value <= 0x10'ffff) {
215 return rhs._value;
216 } else {
217 return rhs.long_grapheme()[I];
218 }
219 }
220
223 [[nodiscard]] constexpr std::u32string composed() const noexcept
224 {
225 if (_value <= 0x10'ffff) {
226 return std::u32string{truncate<char32_t>(_value)};
227 } else {
228 return long_grapheme();
229 }
230 }
231
234 [[nodiscard]] friend constexpr bool operator==(grapheme const&, grapheme const&) noexcept = default;
235
236 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, char32_t const& rhs) noexcept
237 {
238 return lhs._value == char_cast<value_type>(rhs);
239 }
240
241 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, char const& rhs) noexcept
242 {
243 hi_axiom(rhs <= 0x7f);
244 return lhs._value == char_cast<value_type>(rhs);
245 }
246
249 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, grapheme const& rhs) noexcept
250 {
251 return lhs.decomposed() <=> rhs.decomposed();
252 }
253
254 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, char32_t const& rhs) noexcept
255 {
256 return lhs <=> grapheme{rhs};
257 }
258
259 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, char const& rhs) noexcept
260 {
261 return lhs <=> grapheme{rhs};
262 }
263
264 [[nodiscard]] friend constexpr std::string to_string(grapheme const& rhs) noexcept
265 {
266 return hi::to_string(rhs.composed());
267 }
268
269 [[nodiscard]] friend constexpr std::wstring to_wstring(grapheme const& rhs) noexcept
270 {
271 return hi::to_wstring(rhs.composed());
272 }
273
274 [[nodiscard]] friend constexpr std::u32string to_u32string(grapheme const& rhs) noexcept
275 {
276 return rhs.composed();
277 }
278};
279
280} // namespace hi::inline v1
281
282template<>
283struct std::hash<hi::grapheme> {
284 [[nodiscard]] std::size_t operator()(hi::grapheme const& rhs) const noexcept
285 {
286 return std::hash<hi::grapheme::value_type>{}(rhs._value);
287 }
288};
#define hi_assert_bounds(x,...)
Assert if a value is within bounds.
Definition assert.hpp:225
#define hi_assert(expression,...)
Assert if expression is true.
Definition assert.hpp:199
#define hi_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:279
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
constexpr std::string to_string(std::u32string_view rhs) noexcept
Conversion from UTF-32 to UTF-8.
Definition to_string.hpp:215
constexpr std::u32string to_u32string(std::u32string_view rhs) noexcept
Identity conversion from UTF-32 to UTF-32.
Definition to_string.hpp:23
constexpr std::wstring to_wstring(std::u32string_view rhs) noexcept
Conversion from UTF-32 to wide-string (UTF-16/32).
Definition to_string.hpp:155
@ grapheme
The gui_event has grapheme data.
DOXYGEN BUG.
Definition algorithm.hpp:13
constexpr std::u32string unicode_normalize(std::u32string_view text, unicode_normalize_config config=unicode_normalize_config::NFC()) noexcept
Convert text to a Unicode composed normal form.
Definition unicode_normalization.hpp:303
geometry/margins.hpp
Definition cache.hpp:11
Definition grapheme.hpp:29
A grapheme-cluster, what a user thinks a character is.
Definition grapheme.hpp:42
constexpr grapheme(std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:132
friend constexpr std::strong_ordering operator<=>(grapheme const &lhs, grapheme const &rhs) noexcept
Compare two graphemes lexicographically.
Definition grapheme.hpp:249
constexpr char32_t operator[](size_t i) const noexcept
Get the code-point at the given index.
Definition grapheme.hpp:191
constexpr std::u32string decomposed() const noexcept
Get a list of code-point normalized to NFD.
Definition grapheme.hpp:139
constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:106
constexpr grapheme(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:75
constexpr grapheme & operator=(char ascii_char) noexcept
Encode a single code-point.
Definition grapheme.hpp:95
value_type _value
A pointer to a grapheme.
Definition grapheme.hpp:53
constexpr std::u32string composed() const noexcept
Get a list of code-point normalized to NFC.
Definition grapheme.hpp:223
constexpr bool valid() const noexcept
Check if the grapheme is valid.
Definition grapheme.hpp:151
friend constexpr bool operator==(grapheme const &, grapheme const &) noexcept=default
Compare equivalence of two graphemes.
constexpr grapheme & operator=(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:87
constexpr std::size_t size() const noexcept
Return the number of code-points encoded in the grapheme.
Definition grapheme.hpp:175
friend constexpr char32_t get(grapheme const &rhs) noexcept
Get the code-point at the given index.
Definition grapheme.hpp:210
T operator()(T... args)
T to_string(T... args)
T to_wstring(T... args)