HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
grapheme.hpp
1// Copyright Take Vos 2019-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../strings.hpp"
8#include "../cast.hpp"
9#include "../hash.hpp"
10#include <array>
11
12namespace tt {
13
14// "Compatibility mappings are guaranteed to be no longer than 18 characters, although most consist of just a few characters."
15// https://unicode.org/reports/tr44/ (TR44 5.7.3)
16using long_grapheme = std::array<char32_t, 18>;
17
21class grapheme {
38 uint64_t value;
39
40public:
41 grapheme() noexcept : value(1) {}
42
43 ~grapheme()
44 {
45 delete_pointer();
46 }
47
48 grapheme(const grapheme &other) noexcept
49 {
50 tt_axiom(&other != this);
51 value = other.value;
52 if (other.has_pointer()) {
53 value = create_pointer(other.get_pointer()->data(), other.size());
54 }
55 }
56
57 grapheme &operator=(const grapheme &other) noexcept
58 {
59 tt_return_on_self_assignment(other);
60 delete_pointer();
61 value = other.value;
62 if (other.has_pointer()) {
63 value = create_pointer(other.get_pointer()->data(), other.size());
64 }
65 return *this;
66 }
67
68 grapheme(grapheme &&other) noexcept
69 {
70 tt_axiom(&other != this);
71 value = other.value;
72 other.value = 1;
73 }
74
75 grapheme &operator=(grapheme &&other) noexcept
76 {
77 // Self-assignment is allowed.
78 delete_pointer();
79 value = other.value;
80 other.value = 1;
81 return *this;
82 }
83
84 explicit grapheme(std::u32string_view codePoints) noexcept;
85
86 explicit grapheme(char32_t codePoint) noexcept : grapheme(std::u32string_view{&codePoint, 1}) {}
87
88 template<typename It>
89 explicit grapheme(It ptr, It last) noexcept : grapheme(*ptr)
90 {
91 ++ptr;
92 while (ptr != last) {
93 *this += *(ptr++);
94 }
95 }
96
97 grapheme &operator=(std::u32string_view codePoints) noexcept
98 {
99 *this = grapheme(codePoints);
100 return *this;
101 }
102
103 grapheme &operator=(char32_t codePoint) noexcept
104 {
105 *this = grapheme(codePoint);
106 return *this;
107 }
108
109 grapheme &operator+=(char32_t codePoint) noexcept;
110
111 explicit operator std::u32string() const noexcept
112 {
113 if (has_pointer()) {
114 return {get_pointer()->data(), size()};
115 } else {
116 auto r = std::u32string{};
117 auto tmp = value >> 1;
118 for (size_t i = 0; i < 3; i++, tmp >>= 21) {
119 if (auto codePoint = static_cast<char32_t>(tmp & 0x1f'ffff)) {
120 r += codePoint;
121 } else {
122 return r;
123 }
124 }
125 return r;
126 }
127 }
128
129 operator bool() const noexcept
130 {
131 return value != 1;
132 }
133
134 [[nodiscard]] size_t hash() const noexcept
135 {
136 size_t r = 0;
137 for (ssize_t i = 0; i != std::ssize(*this); ++i) {
138 r = hash_mix_two(r, std::hash<char32_t>{}((*this)[i]));
139 }
140 return r;
141 }
142
143 [[nodiscard]] size_t size() const noexcept
144 {
145 if (has_pointer()) {
146 return value >> 48;
147 } else {
148 auto tmp = value >> 1;
149 size_t i;
150 for (i = 0; i < 3; i++, tmp >>= 21) {
151 if ((tmp & 0x1f'ffff) == 0) {
152 return i;
153 }
154 }
155 return i;
156 }
157 }
158
159 [[nodiscard]] char32_t front() const noexcept
160 {
161 if (size() == 0) {
162 return 0;
163 } else {
164 return (*this)[0];
165 }
166 }
167
168 [[nodiscard]] char32_t operator[](size_t i) const noexcept
169 {
170 if (has_pointer()) {
171 tt_axiom(i < std::tuple_size_v<long_grapheme>);
172 return (*get_pointer())[i];
173
174 } else {
175 tt_axiom(i < 3);
176 return (value >> ((i * 21) + 1)) & 0x1f'ffff;
177 }
178 }
179
180 [[nodiscard]] std::u32string NFC() const noexcept
181 {
183 r.reserve(std::ssize(*this));
184 for (ssize_t i = 0; i != std::ssize(*this); ++i) {
185 r += (*this)[i];
186 }
187 return r;
188 }
189
190 [[nodiscard]] std::u32string NFD() const noexcept;
191
192 [[nodiscard]] std::u32string NFKC() const noexcept;
193
194 [[nodiscard]] std::u32string NFKD() const noexcept;
195
198 static grapheme PS() noexcept
199 {
200 return grapheme(U'\u2029');
201 }
202
205 static grapheme LS() noexcept
206 {
207 return grapheme(U'\u2028');
208 }
209
210 [[nodiscard]] friend std::string to_string(grapheme const &g) noexcept
211 {
212 return tt::to_string(g.NFC());
213 }
214
215 [[nodiscard]] friend std::u8string to_u8string(grapheme const &g) noexcept
216 {
217 return tt::to_u8string(g.NFC());
218 }
219
220 friend std::ostream &operator<<(std::ostream &lhs, grapheme const &rhs)
221 {
222 return lhs << to_string(rhs);
223 }
224
225private:
226 [[nodiscard]] bool has_pointer() const noexcept
227 {
228 return (value & 1) == 0;
229 }
230
231 [[nodiscard]] static uint64_t create_pointer(char32_t const *data, size_t size) noexcept
232 {
233 tt_assert(size <= std::tuple_size<long_grapheme>::value);
234
235 auto ptr = new long_grapheme();
236 memcpy(ptr->data(), data, size);
237
238 auto iptr = reinterpret_cast<ptrdiff_t>(ptr);
239 auto uptr = static_cast<uint64_t>(iptr << 16) >> 16;
240 return (size << 48) | uptr;
241 }
242
243 [[nodiscard]] long_grapheme *get_pointer() const noexcept
244 {
245 auto uptr = (value << 16);
246 auto iptr = static_cast<ptrdiff_t>(uptr) >> 16;
247 return std::launder(reinterpret_cast<long_grapheme *>(iptr));
248 }
249
250 void delete_pointer() noexcept
251 {
252 if (has_pointer()) {
253 delete get_pointer();
254 }
255 }
256
257 [[nodiscard]] friend bool operator<(grapheme const &a, grapheme const &b) noexcept
258 {
259 ttlet length = std::min(std::ssize(a), std::ssize(b));
260
261 for (ssize_t i = 0; i != length; ++i) {
262 if (a[i] < b[i]) {
263 return true;
264 }
265 }
266 return std::ssize(a) < std::ssize(b);
267 }
268
269 [[nodiscard]] friend bool operator==(grapheme const &a, grapheme const &b) noexcept
270 {
271 if (a.value == b.value) {
272 return true;
273 }
274
275 if (std::ssize(a) != std::ssize(b)) {
276 return false;
277 }
278
279 for (ssize_t i = 0; i != std::ssize(a); ++i) {
280 if (a[i] != b[i]) {
281 return false;
282 }
283 }
284 return true;
285 }
286
287 [[nodiscard]] friend bool operator!=(grapheme const &a, grapheme const &b) noexcept
288 {
289 return !(a == b);
290 }
291
292 [[nodiscard]] friend bool operator==(grapheme const &lhs, char32_t const &rhs) noexcept
293 {
294 return (std::ssize(lhs) == 1) && (lhs[0] == rhs);
295 }
296
297 [[nodiscard]] friend bool operator!=(grapheme const &lhs, char32_t const &rhs) noexcept
298 {
299 return !(lhs == rhs);
300 }
301
302 [[nodiscard]] friend bool operator==(grapheme const &lhs, char const &rhs) noexcept
303 {
304 return lhs == static_cast<char32_t>(rhs);
305 }
306
307 [[nodiscard]] friend bool operator!=(grapheme const &lhs, char const &rhs) noexcept
308 {
309 return !(lhs == rhs);
310 }
311};
312
313} // namespace tt
314
315namespace std {
316
317template<>
318struct hash<tt::grapheme> {
319 [[nodiscard]] size_t operator()(tt::grapheme const &rhs) const noexcept
320 {
321 return rhs.hash();
322 }
323};
324
325} // namespace std
STL namespace.
Definition grapheme.hpp:21
static grapheme PS() noexcept
Paragraph separator.
Definition grapheme.hpp:198
static grapheme LS() noexcept
Line separator.
Definition grapheme.hpp:205
T data(T... args)
T memcpy(T... args)
T min(T... args)
T operator()(T... args)
T reserve(T... args)