HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
grapheme.hpp
1// Copyright Take Vos 2019-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../strings.hpp"
8#include "../cast.hpp"
9#include "../hash.hpp"
10#include <array>
11
12namespace tt {
13
14// "Compatibility mappings are guaranteed to be no longer than 18 characters, although most consist of just a few characters."
15// https://unicode.org/reports/tr44/ (TR44 5.7.3)
16using long_grapheme = std::array<char32_t, 18>;
17
21class grapheme {
38 uint64_t value;
39
40public:
41 grapheme() noexcept : value(1) {}
42
43 ~grapheme()
44 {
45 delete_pointer();
46 }
47
48 grapheme(const grapheme &other) noexcept
49 {
50 value = other.value;
51 if (other.has_pointer()) {
52 value = create_pointer(other.get_pointer()->data(), other.size());
53 }
54 }
55
56 grapheme &operator=(const grapheme &other) noexcept
57 {
58 if (this != &other) {
59 delete_pointer();
60 value = other.value;
61 if (other.has_pointer()) {
62 value = create_pointer(other.get_pointer()->data(), other.size());
63 }
64 }
65 return *this;
66 }
67
68 grapheme(grapheme &&other) noexcept
69 {
70 value = other.value;
71 other.value = 1;
72 }
73
74 grapheme &operator=(grapheme &&other) noexcept
75 {
76 delete_pointer();
77 value = other.value;
78 other.value = 1;
79 return *this;
80 }
81
82 explicit grapheme(std::u32string_view codePoints) noexcept;
83
84 explicit grapheme(char32_t codePoint) noexcept : grapheme(std::u32string_view{&codePoint, 1}) {}
85
86 template<typename It>
87 explicit grapheme(It ptr, It last) noexcept : grapheme(*ptr)
88 {
89 ++ptr;
90 while (ptr != last) {
91 *this += *(ptr++);
92 }
93 }
94
95 grapheme &operator=(std::u32string_view codePoints) noexcept
96 {
97 *this = grapheme(codePoints);
98 return *this;
99 }
100
101 grapheme &operator=(char32_t codePoint) noexcept
102 {
103 *this = grapheme(codePoint);
104 return *this;
105 }
106
107 grapheme &operator+=(char32_t codePoint) noexcept;
108
109 explicit operator std::u32string() const noexcept
110 {
111 if (has_pointer()) {
112 return {get_pointer()->data(), size()};
113 } else {
114 auto r = std::u32string{};
115 auto tmp = value >> 1;
116 for (size_t i = 0; i < 3; i++, tmp >>= 21) {
117 if (auto codePoint = static_cast<char32_t>(tmp & 0x1f'ffff)) {
118 r += codePoint;
119 } else {
120 return r;
121 }
122 }
123 return r;
124 }
125 }
126
127 operator bool() const noexcept
128 {
129 return value != 1;
130 }
131
132 [[nodiscard]] size_t hash() const noexcept
133 {
134 size_t r = 0;
135 for (ssize_t i = 0; i != std::ssize(*this); ++i) {
136 r = hash_mix_two(r, std::hash<char32_t>{}((*this)[i]));
137 }
138 return r;
139 }
140
141 [[nodiscard]] size_t size() const noexcept
142 {
143 if (has_pointer()) {
144 return value >> 48;
145 } else {
146 auto tmp = value >> 1;
147 size_t i;
148 for (i = 0; i < 3; i++, tmp >>= 21) {
149 if ((tmp & 0x1f'ffff) == 0) {
150 return i;
151 }
152 }
153 return i;
154 }
155 }
156
157 [[nodiscard]] char32_t front() const noexcept
158 {
159 if (size() == 0) {
160 return 0;
161 } else {
162 return (*this)[0];
163 }
164 }
165
166 [[nodiscard]] char32_t operator[](size_t i) const noexcept
167 {
168 if (has_pointer()) {
169 tt_axiom(i < std::tuple_size_v<long_grapheme>);
170 return (*get_pointer())[i];
171
172 } else {
173 tt_axiom(i < 3);
174 return (value >> ((i * 21) + 1)) & 0x1f'ffff;
175 }
176 }
177
178 [[nodiscard]] std::u32string NFC() const noexcept
179 {
181 r.reserve(std::ssize(*this));
182 for (ssize_t i = 0; i != std::ssize(*this); ++i) {
183 r += (*this)[i];
184 }
185 return r;
186 }
187
188 [[nodiscard]] std::u32string NFD() const noexcept;
189
190 [[nodiscard]] std::u32string NFKC() const noexcept;
191
192 [[nodiscard]] std::u32string NFKD() const noexcept;
193
196 static grapheme PS() noexcept
197 {
198 return grapheme(U'\u2029');
199 }
200
203 static grapheme LS() noexcept
204 {
205 return grapheme(U'\u2028');
206 }
207
208 [[nodiscard]] friend std::string to_string(grapheme const &g) noexcept
209 {
210 return tt::to_string(g.NFC());
211 }
212
213 [[nodiscard]] friend std::u8string to_u8string(grapheme const &g) noexcept
214 {
215 return tt::to_u8string(g.NFC());
216 }
217
218 friend std::ostream &operator<<(std::ostream &lhs, grapheme const &rhs)
219 {
220 return lhs << to_string(rhs);
221 }
222
223private:
224 [[nodiscard]] bool has_pointer() const noexcept
225 {
226 return (value & 1) == 0;
227 }
228
229 [[nodiscard]] static uint64_t create_pointer(char32_t const *data, size_t size) noexcept
230 {
231 tt_assert(size <= std::tuple_size<long_grapheme>::value);
232
233 auto ptr = new long_grapheme();
234 memcpy(ptr->data(), data, size);
235
236 auto iptr = reinterpret_cast<ptrdiff_t>(ptr);
237 auto uptr = static_cast<uint64_t>(iptr << 16) >> 16;
238 return (size << 48) | uptr;
239 }
240
241 [[nodiscard]] long_grapheme *get_pointer() const noexcept
242 {
243 auto uptr = (value << 16);
244 auto iptr = static_cast<ptrdiff_t>(uptr) >> 16;
245 return std::launder(reinterpret_cast<long_grapheme *>(iptr));
246 }
247
248 void delete_pointer() noexcept
249 {
250 if (has_pointer()) {
251 delete get_pointer();
252 }
253 }
254
255 [[nodiscard]] friend bool operator<(grapheme const &a, grapheme const &b) noexcept
256 {
257 ttlet length = std::min(std::ssize(a), std::ssize(b));
258
259 for (ssize_t i = 0; i != length; ++i) {
260 if (a[i] < b[i]) {
261 return true;
262 }
263 }
264 return std::ssize(a) < std::ssize(b);
265 }
266
267 [[nodiscard]] friend bool operator==(grapheme const &a, grapheme const &b) noexcept
268 {
269 if (a.value == b.value) {
270 return true;
271 }
272
273 if (std::ssize(a) != std::ssize(b)) {
274 return false;
275 }
276
277 for (ssize_t i = 0; i != std::ssize(a); ++i) {
278 if (a[i] != b[i]) {
279 return false;
280 }
281 }
282 return true;
283 }
284
285 [[nodiscard]] friend bool operator!=(grapheme const &a, grapheme const &b) noexcept
286 {
287 return !(a == b);
288 }
289
290 [[nodiscard]] friend bool operator==(grapheme const &lhs, char32_t const &rhs) noexcept
291 {
292 return (std::ssize(lhs) == 1) && (lhs[0] == rhs);
293 }
294
295 [[nodiscard]] friend bool operator!=(grapheme const &lhs, char32_t const &rhs) noexcept
296 {
297 return !(lhs == rhs);
298 }
299
300 [[nodiscard]] friend bool operator==(grapheme const &lhs, char const &rhs) noexcept
301 {
302 return lhs == static_cast<char32_t>(rhs);
303 }
304
305 [[nodiscard]] friend bool operator!=(grapheme const &lhs, char const &rhs) noexcept
306 {
307 return !(lhs == rhs);
308 }
309};
310
311} // namespace tt
312
313namespace std {
314
315template<>
316struct hash<tt::grapheme> {
317 [[nodiscard]] size_t operator()(tt::grapheme const &rhs) const noexcept
318 {
319 return rhs.hash();
320 }
321};
322
323} // namespace std
STL namespace.
Definition grapheme.hpp:21
static grapheme PS() noexcept
Paragraph separator.
Definition grapheme.hpp:196
static grapheme LS() noexcept
Line separator.
Definition grapheme.hpp:203
T data(T... args)
T memcpy(T... args)
T min(T... args)
T operator()(T... args)
T reserve(T... args)