HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
grapheme.hpp
1// Copyright Take Vos 2019-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../strings.hpp"
8#include "../cast.hpp"
9#include "../hash.hpp"
10#include <array>
11
12namespace tt {
13
14// "Compatibility mappings are guaranteed to be no longer than 18 characters, although most consist of just a few characters."
15// https://unicode.org/reports/tr44/ (TR44 5.7.3)
16using long_grapheme = std::array<char32_t, 18>;
17
21class grapheme {
38 uint64_t value;
39
40public:
41 grapheme() noexcept : value(1) {}
42
43 ~grapheme()
44 {
45 delete_pointer();
46 }
47
48 grapheme(const grapheme &other) noexcept
49 {
50 tt_axiom(&other != this);
51 value = other.value;
52 if (other.has_pointer()) {
53 value = create_pointer(other.get_pointer()->data(), other.size());
54 }
55 }
56
57 grapheme &operator=(const grapheme &other) noexcept
58 {
59 tt_return_on_self_assignment(other);
60 delete_pointer();
61 value = other.value;
62 if (other.has_pointer()) {
63 value = create_pointer(other.get_pointer()->data(), other.size());
64 }
65 return *this;
66 }
67
68 grapheme(grapheme &&other) noexcept
69 {
70 tt_axiom(&other != this);
71 value = other.value;
72 other.value = 1;
73 }
74
75 grapheme &operator=(grapheme &&other) noexcept
76 {
77 // Self-assignment is allowed.
78 delete_pointer();
79 value = other.value;
80 other.value = 1;
81 return *this;
82 }
83
84 explicit grapheme(std::u32string_view codePoints) noexcept;
85
86 explicit grapheme(char32_t codePoint) noexcept : grapheme(std::u32string_view{&codePoint, 1}) {}
87
88 template<typename It>
89 explicit grapheme(It ptr, It last) noexcept : grapheme(*ptr)
90 {
91 ++ptr;
92 while (ptr != last) {
93 *this += *(ptr++);
94 }
95 }
96
97 grapheme &operator=(std::u32string_view codePoints) noexcept
98 {
99 *this = grapheme(codePoints);
100 return *this;
101 }
102
103 grapheme &operator=(char32_t codePoint) noexcept
104 {
105 *this = grapheme(codePoint);
106 return *this;
107 }
108
109 grapheme &operator+=(char32_t codePoint) noexcept;
110
111 explicit operator std::u32string() const noexcept
112 {
113 if (has_pointer()) {
114 return {get_pointer()->data(), size()};
115 } else {
116 auto r = std::u32string{};
117 auto tmp = value >> 1;
118 for (size_t i = 0; i < 3; i++, tmp >>= 21) {
119 if (auto codePoint = static_cast<char32_t>(tmp & 0x1f'ffff)) {
120 r += codePoint;
121 } else {
122 return r;
123 }
124 }
125 return r;
126 }
127 }
128
129 operator bool() const noexcept
130 {
131 return value != 1;
132 }
133
134 [[nodiscard]] size_t hash() const noexcept
135 {
136 size_t r = 0;
137 for (ssize_t i = 0; i != std::ssize(*this); ++i) {
138 r = hash_mix_two(r, std::hash<char32_t>{}((*this)[i]));
139 }
140 return r;
141 }
142
143 [[nodiscard]] size_t size() const noexcept
144 {
145 if (has_pointer()) {
146 return value >> 48;
147 } else {
148 auto tmp = value >> 1;
149 size_t i;
150 for (i = 0; i < 3; i++, tmp >>= 21) {
151 if ((tmp & 0x1f'ffff) == 0) {
152 return i;
153 }
154 }
155 return i;
156 }
157 }
158
159 [[nodiscard]] char32_t front() const noexcept
160 {
161 if (size() == 0) {
162 return 0;
163 } else {
164 return (*this)[0];
165 }
166 }
167
168 [[nodiscard]] char32_t operator[](size_t i) const noexcept
169 {
170 if (has_pointer()) {
171 tt_axiom(i < std::tuple_size_v<long_grapheme>);
172 return (*get_pointer())[i];
173
174 } else {
175 tt_axiom(i < 3);
176 return (value >> ((i * 21) + 1)) & 0x1f'ffff;
177 }
178 }
179
180 [[nodiscard]] std::u32string NFC() const noexcept
181 {
183 r.reserve(std::ssize(*this));
184 for (ssize_t i = 0; i != std::ssize(*this); ++i) {
185 r += (*this)[i];
186 }
187 return r;
188 }
189
190 [[nodiscard]] std::u32string NFD() const noexcept;
191
192 [[nodiscard]] std::u32string NFKC() const noexcept;
193
194 [[nodiscard]] std::u32string NFKD() const noexcept;
195
198 static grapheme PS() noexcept
199 {
200 return grapheme(U'\u2029');
201 }
202
205 static grapheme LS() noexcept
206 {
207 return grapheme(U'\u2028');
208 }
209
210 [[nodiscard]] friend std::string to_string(grapheme const &g) noexcept
211 {
212 return tt::to_string(g.NFC());
213 }
214
215 friend std::ostream &operator<<(std::ostream &lhs, grapheme const &rhs)
216 {
217 return lhs << to_string(rhs);
218 }
219
220private:
221 [[nodiscard]] bool has_pointer() const noexcept
222 {
223 return (value & 1) == 0;
224 }
225
226 [[nodiscard]] static uint64_t create_pointer(char32_t const *data, size_t size) noexcept
227 {
228 tt_assert(size <= std::tuple_size<long_grapheme>::value);
229
230 auto ptr = new long_grapheme();
231 memcpy(ptr->data(), data, size);
232
233 auto iptr = reinterpret_cast<ptrdiff_t>(ptr);
234 auto uptr = static_cast<uint64_t>(iptr << 16) >> 16;
235 return (size << 48) | uptr;
236 }
237
238 [[nodiscard]] long_grapheme *get_pointer() const noexcept
239 {
240 auto uptr = (value << 16);
241 auto iptr = static_cast<ptrdiff_t>(uptr) >> 16;
242 return std::launder(reinterpret_cast<long_grapheme *>(iptr));
243 }
244
245 void delete_pointer() noexcept
246 {
247 if (has_pointer()) {
248 delete get_pointer();
249 }
250 }
251
252 [[nodiscard]] friend bool operator<(grapheme const &a, grapheme const &b) noexcept
253 {
254 ttlet length = std::min(std::ssize(a), std::ssize(b));
255
256 for (ssize_t i = 0; i != length; ++i) {
257 if (a[i] < b[i]) {
258 return true;
259 }
260 }
261 return std::ssize(a) < std::ssize(b);
262 }
263
264 [[nodiscard]] friend bool operator==(grapheme const &a, grapheme const &b) noexcept
265 {
266 if (a.value == b.value) {
267 return true;
268 }
269
270 if (std::ssize(a) != std::ssize(b)) {
271 return false;
272 }
273
274 for (ssize_t i = 0; i != std::ssize(a); ++i) {
275 if (a[i] != b[i]) {
276 return false;
277 }
278 }
279 return true;
280 }
281
282 [[nodiscard]] friend bool operator!=(grapheme const &a, grapheme const &b) noexcept
283 {
284 return !(a == b);
285 }
286
287 [[nodiscard]] friend bool operator==(grapheme const &lhs, char32_t const &rhs) noexcept
288 {
289 return (std::ssize(lhs) == 1) && (lhs[0] == rhs);
290 }
291
292 [[nodiscard]] friend bool operator!=(grapheme const &lhs, char32_t const &rhs) noexcept
293 {
294 return !(lhs == rhs);
295 }
296
297 [[nodiscard]] friend bool operator==(grapheme const &lhs, char const &rhs) noexcept
298 {
299 return lhs == static_cast<char32_t>(rhs);
300 }
301
302 [[nodiscard]] friend bool operator!=(grapheme const &lhs, char const &rhs) noexcept
303 {
304 return !(lhs == rhs);
305 }
306};
307
308} // namespace tt
309
310namespace std {
311
312template<>
313struct hash<tt::grapheme> {
314 [[nodiscard]] size_t operator()(tt::grapheme const &rhs) const noexcept
315 {
316 return rhs.hash();
317 }
318};
319
320} // namespace std
STL namespace.
Definition grapheme.hpp:21
static grapheme PS() noexcept
Paragraph separator.
Definition grapheme.hpp:198
static grapheme LS() noexcept
Line separator.
Definition grapheme.hpp:205
T data(T... args)
T memcpy(T... args)
T min(T... args)
T operator()(T... args)
T reserve(T... args)