HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
grapheme.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../utility/module.hpp"
8#include "../strings.hpp"
9#include "../stable_set.hpp"
10#include "../log.hpp"
11#include "unicode_normalization.hpp"
12#include "ucd_general_categories.hpp"
13#include <cstdint>
14#include <string>
15#include <string_view>
16#include <cstddef>
17#include <memory>
18#include <vector>
19#include <algorithm>
20#include <bit>
21
22namespace hi::inline v1 {
23namespace detail {
24
25inline auto long_graphemes = ::hi::stable_set<std::u32string>{};
26
27} // namespace detail
28
29struct composed_t {};
30
42struct grapheme {
43 using value_type = uint32_t;
44
53 value_type _value;
54
55 constexpr grapheme() noexcept = default;
56 constexpr grapheme(grapheme const&) noexcept = default;
57 constexpr grapheme(grapheme&&) noexcept = default;
58 constexpr grapheme& operator=(grapheme const&) noexcept = default;
59 constexpr grapheme& operator=(grapheme&&) noexcept = default;
60
61 constexpr grapheme(intrinsic_t, value_type value) : _value(value) {}
62
63 constexpr value_type& intrinsic() noexcept
64 {
65 return _value;
66 }
67
68 constexpr value_type const& intrinsic() const noexcept
69 {
70 return _value;
71 }
72
75 constexpr grapheme(char32_t code_point) noexcept : _value(truncate<value_type>(code_point))
76 {
77 hi_axiom(code_point <= 0x10'ffff);
78 }
79
80 constexpr grapheme(char ascii_char) noexcept : _value(truncate<value_type>(ascii_char))
81 {
82 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
83 }
84
87 constexpr grapheme& operator=(char32_t code_point) noexcept
88 {
89 _value = truncate<value_type>(code_point);
90 return *this;
91 }
92
95 constexpr grapheme& operator=(char ascii_char) noexcept
96 {
97 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
98 _value = truncate<value_type>(ascii_char);
99 return *this;
100 }
101
106 constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
107 {
108 switch (code_points.size()) {
109 case 0:
111
112 case 1:
113 _value = truncate<value_type>(code_points[0]);
114 break;
115
116 default:
117 hilet index = detail::long_graphemes.insert(std::u32string{code_points});
118 if (index < 0x0f'0000) {
119 _value = narrow_cast<value_type>(index + 0x11'0000);
120 } else {
121 [[unlikely]] hi_log_error_once(
122 "grapheme::error::too-many", "Too many long graphemes encoded, replacing with U+fffd");
123 _value = 0x00'fffd;
124 }
125 }
126 }
127
132 constexpr explicit grapheme(std::u32string_view code_points) noexcept :
133 grapheme(composed_t{}, unicode_normalize(code_points, unicode_normalize_config::NFC()))
134 {
135 }
136
139 [[nodiscard]] constexpr std::u32string decomposed() const noexcept
140 {
141 return unicode_decompose(composed(), unicode_normalize_config::NFD());
142 }
143
144 [[nodiscard]] std::u32string const& long_grapheme() const noexcept
145 {
146 hi_assert(_value > 0x10'ffff and _value <= 0x1f'ffff);
147 return detail::long_graphemes[_value - 0x11'0000];
148 }
149
152 [[nodiscard]] constexpr std::size_t size() const noexcept
153 {
154 if (_value <= 0x10'ffff) {
155 return 1;
156
157 } else {
158 return long_grapheme().size();
159 }
160 }
161
168 [[nodiscard]] constexpr char32_t operator[](size_t i) const noexcept
169 {
170 hi_assert_bounds(i, *this);
171
172 if (_value <= 0x10'ffff) {
173 return truncate<char32_t>(_value);
174 } else {
175 return long_grapheme()[i];
176 }
177 }
178
186 template<size_t I>
187 [[nodiscard]] friend constexpr char32_t get(grapheme const& rhs) noexcept
188 {
189 hi_assert_bounds(I, rhs);
190
191 if (rhs._value <= 0x10'ffff) {
192 return rhs._value;
193 } else {
194 return rhs.long_grapheme()[I];
195 }
196 }
197
200 [[nodiscard]] constexpr std::u32string composed() const noexcept
201 {
202 if (_value <= 0x10'ffff) {
203 return std::u32string{truncate<char32_t>(_value)};
204 } else {
205 return long_grapheme();
206 }
207 }
208
211 [[nodiscard]] friend constexpr bool operator==(grapheme const&, grapheme const&) noexcept = default;
212
213 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, char32_t const& rhs) noexcept
214 {
215 return lhs._value == char_cast<value_type>(rhs);
216 }
217
218 [[nodiscard]] friend constexpr bool operator==(grapheme const& lhs, char const& rhs) noexcept
219 {
220 hi_axiom(rhs <= 0x7f);
221 return lhs._value == char_cast<value_type>(rhs);
222 }
223
226 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, grapheme const& rhs) noexcept
227 {
228 return lhs.decomposed() <=> rhs.decomposed();
229 }
230
231 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, char32_t const& rhs) noexcept
232 {
233 return lhs <=> grapheme{rhs};
234 }
235
236 [[nodiscard]] friend constexpr std::strong_ordering operator<=>(grapheme const& lhs, char const& rhs) noexcept
237 {
238 return lhs <=> grapheme{rhs};
239 }
240
241 [[nodiscard]] friend constexpr std::string to_string(grapheme const& rhs) noexcept
242 {
243 return hi::to_string(rhs.composed());
244 }
245
246 [[nodiscard]] friend constexpr std::wstring to_wstring(grapheme const& rhs) noexcept
247 {
248 return hi::to_wstring(rhs.composed());
249 }
250
251 [[nodiscard]] friend constexpr std::u32string to_u32string(grapheme const& rhs) noexcept
252 {
253 return rhs.composed();
254 }
255};
256
257} // namespace hi::inline v1
258
259template<>
260struct std::hash<hi::grapheme> {
261 [[nodiscard]] std::size_t operator()(hi::grapheme const& rhs) const noexcept
262 {
263 return std::hash<hi::grapheme::value_type>{}(rhs._value);
264 }
265};
#define hi_assert_bounds(x,...)
Assert if a value is within bounds.
Definition assert.hpp:225
#define hi_assert(expression,...)
Assert if expression is true.
Definition assert.hpp:199
#define hi_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:279
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
constexpr std::string to_string(std::u32string_view rhs) noexcept
Conversion from UTF-32 to UTF-8.
Definition to_string.hpp:215
constexpr std::u32string to_u32string(std::u32string_view rhs) noexcept
Identity conversion from UTF-32 to UTF-32.
Definition to_string.hpp:23
constexpr std::wstring to_wstring(std::u32string_view rhs) noexcept
Conversion from UTF-32 to wide-string (UTF-16/32).
Definition to_string.hpp:155
@ grapheme
The gui_event has grapheme data.
DOXYGEN BUG.
Definition algorithm.hpp:13
constexpr std::u32string unicode_normalize(std::u32string_view text, unicode_normalize_config config=unicode_normalize_config::NFC()) noexcept
Convert text to a Unicode composed normal form.
Definition unicode_normalization.hpp:303
geometry/margins.hpp
Definition cache.hpp:11
Definition grapheme.hpp:29
A grapheme-cluster, what a user thinks a character is.
Definition grapheme.hpp:42
constexpr grapheme(std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:132
friend constexpr std::strong_ordering operator<=>(grapheme const &lhs, grapheme const &rhs) noexcept
Compare two graphemes lexicographically.
Definition grapheme.hpp:226
constexpr char32_t operator[](size_t i) const noexcept
Get the code-point at the given index.
Definition grapheme.hpp:168
constexpr std::u32string decomposed() const noexcept
Get a list of code-point normalized to NFD.
Definition grapheme.hpp:139
constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:106
constexpr grapheme(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:75
constexpr grapheme & operator=(char ascii_char) noexcept
Encode a single code-point.
Definition grapheme.hpp:95
value_type _value
A pointer to a grapheme.
Definition grapheme.hpp:53
constexpr std::u32string composed() const noexcept
Get a list of code-point normalized to NFC.
Definition grapheme.hpp:200
friend constexpr bool operator==(grapheme const &, grapheme const &) noexcept=default
Compare equivalence of two graphemes.
constexpr grapheme & operator=(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:87
constexpr std::size_t size() const noexcept
Return the number of code-points encoded in the grapheme.
Definition grapheme.hpp:152
friend constexpr char32_t get(grapheme const &rhs) noexcept
Get the code-point at the given index.
Definition grapheme.hpp:187
T operator()(T... args)
T to_string(T... args)
T to_wstring(T... args)