HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_description.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "unicode_general_category.hpp"
8#include "unicode_bidi_class.hpp"
9#include "unicode_bidi_bracket_type.hpp"
10#include "unicode_grapheme_cluster_break.hpp"
11#include "unicode_line_break.hpp"
15#include "unicode_decomposition_type.hpp"
16#include "unicode_script.hpp"
17#include "../required.hpp"
18#include "../assert.hpp"
19#include "../cast.hpp"
20
21namespace hi::inline v1 {
22
23constexpr char32_t unicode_replacement_character = U'\ufffd';
24constexpr char32_t unicode_LS = U'\u2028';
25constexpr char32_t unicode_PS = U'\u2029';
26
34public:
35 constexpr unicode_description() noexcept = default;
37 unicode_description& operator=(unicode_description const&) = delete;
38 constexpr unicode_description(unicode_description&&) noexcept = default;
39 constexpr unicode_description& operator=(unicode_description&&) noexcept = default;
40
41 [[nodiscard]] constexpr unicode_description(
42 unicode_general_category general_category,
43 unicode_grapheme_cluster_break grapheme_cluster_break,
44 unicode_line_break_class line_break_class,
45 unicode_word_break_property word_break_property,
46 unicode_sentence_break_property sentence_break_property,
47 unicode_east_asian_width east_asian_width,
48 unicode_script script,
49 unicode_bidi_class bidi_class,
50 unicode_bidi_bracket_type bidi_bracket_type,
51 char32_t bidi_mirroring_glyph,
52 uint8_t canonical_combining_class,
53 unicode_decomposition_type decomposition_type,
54 uint32_t decomposition_index,
55 uint16_t composition_index) noexcept :
56 _general_category(to_underlying(general_category)),
57 _grapheme_cluster_break(to_underlying(grapheme_cluster_break)),
58 _line_break_class(to_underlying(line_break_class)),
59 _word_break_property(to_underlying(word_break_property)),
60 _sentence_break_property(to_underlying(sentence_break_property)),
61 _east_asian_width(to_underlying(east_asian_width)),
62 _script(to_underlying(script)),
63 _bidi_class(to_underlying(bidi_class)),
64 _bidi_bracket_type(to_underlying(bidi_bracket_type)),
65 _bidi_mirroring_glyph(truncate<uint32_t>(bidi_mirroring_glyph)),
66 _canonical_combining_class(truncate<uint32_t>(canonical_combining_class)),
67 _decomposition_type(to_underlying(decomposition_type)),
68 _decomposition_index(truncate<uint32_t>(decomposition_index)),
69 _composition_index(truncate<uint16_t>(composition_index))
70 {
71 hi_axiom(to_underlying(general_category) <= 0x1f);
72 hi_axiom(to_underlying(grapheme_cluster_break) <= 0x0f);
73 hi_axiom(to_underlying(line_break_class) <= 0x3f);
74 hi_axiom(to_underlying(word_break_property) <= 0x1f);
75 hi_axiom(to_underlying(sentence_break_property) <= 0xf);
76 hi_axiom(to_underlying(east_asian_width) <= 0x7);
77 hi_axiom(to_underlying(script) <= 0xff);
78 hi_axiom(to_underlying(bidi_class) <= 0x1f);
79 hi_axiom(to_underlying(bidi_bracket_type) <= 0x03);
80 hi_axiom(static_cast<uint32_t>(bidi_mirroring_glyph) <= 0xffff);
81 hi_axiom(static_cast<uint32_t>(canonical_combining_class) <= 0xff);
82 hi_axiom(to_underlying(decomposition_type) <= 0x1f);
83 hi_axiom(static_cast<uint32_t>(decomposition_index) <= 0x1f'ffff);
84 hi_axiom(static_cast<uint32_t>(composition_index) <= 0x3fff);
85 }
86
93 [[nodiscard]] constexpr unicode_general_category general_category() const noexcept
94 {
95 return static_cast<unicode_general_category>(_general_category);
96 }
97
104 [[nodiscard]] constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
105 {
106 return static_cast<unicode_grapheme_cluster_break>(_grapheme_cluster_break);
107 }
108
109 [[nodiscard]] constexpr unicode_line_break_class line_break_class() const noexcept
110 {
111 return static_cast<unicode_line_break_class>(_line_break_class);
112 }
113
114 [[nodiscard]] constexpr unicode_word_break_property word_break_property() const noexcept
115 {
116 return static_cast<unicode_word_break_property>(_word_break_property);
117 }
118
119 [[nodiscard]] constexpr unicode_sentence_break_property sentence_break_property() const noexcept
120 {
121 return static_cast<unicode_sentence_break_property>(_sentence_break_property);
122 }
123
124 [[nodiscard]] constexpr unicode_east_asian_width east_asian_width() const noexcept
125 {
126 return static_cast<unicode_east_asian_width>(_east_asian_width);
127 }
128
135 [[nodiscard]] constexpr unicode_bidi_class bidi_class() const noexcept
136 {
137 return static_cast<unicode_bidi_class>(_bidi_class);
138 }
139
142 [[nodiscard]] constexpr unicode_script script() const noexcept
143 {
144 return static_cast<unicode_script>(_script);
145 }
146
153 [[nodiscard]] constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
154 {
155 return static_cast<unicode_bidi_bracket_type>(_bidi_bracket_type);
156 }
157
161 [[nodiscard]] constexpr char32_t bidi_mirroring_glyph() const noexcept
162 {
163 return truncate<char32_t>(_bidi_mirroring_glyph);
164 }
165
169 [[nodiscard]] constexpr unicode_decomposition_type decomposition_type() const noexcept
170 {
171 return static_cast<unicode_decomposition_type>(_decomposition_type);
172 }
173
183 [[nodiscard]] constexpr uint8_t canonical_combining_class() const noexcept
184 {
185 return truncate<uint8_t>(_canonical_combining_class);
186 }
187
192 [[nodiscard]] std::u32string decompose() const noexcept;
193
199 [[nodiscard]] char32_t compose(char32_t other) const noexcept;
200
207 [[nodiscard]] constexpr char32_t canonical_equivalent() const noexcept
208 {
209 if (decomposition_type() == unicode_decomposition_type::canonical and _decomposition_index <= 0x1f'ffff) {
210 return truncate<char32_t>(_decomposition_index);
211 } else {
212 return U'\uffff';
213 }
214 }
215
226 [[nodiscard]] static unicode_description const& find(char32_t code_point) noexcept;
227
228 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_general_category const& rhs) noexcept
229 {
230 return lhs.general_category() == rhs;
231 }
232
233 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_decomposition_type const& rhs) noexcept
234 {
235 return lhs.decomposition_type() == rhs;
236 }
237
238 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_bidi_bracket_type const& rhs) noexcept
239 {
240 return lhs.bidi_bracket_type() == rhs;
241 }
242
243 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_bidi_class const& rhs) noexcept
244 {
245 return lhs.bidi_class() == rhs;
246 }
247 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_east_asian_width const& rhs) noexcept
248 {
249 return lhs.east_asian_width() == rhs;
250 }
251
252 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_sentence_break_property const& rhs) noexcept
253 {
254 return lhs.sentence_break_property() == rhs;
255 }
256
257 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_line_break_class const& rhs) noexcept
258 {
259 return lhs.line_break_class() == rhs;
260 }
261
262 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_word_break_property const& rhs) noexcept
263 {
264 return lhs.word_break_property() == rhs;
265 }
266
267 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_grapheme_cluster_break const& rhs) noexcept
268 {
269 return lhs.grapheme_cluster_break() == rhs;
270 }
271
272 [[nodiscard]] friend bool is_C(unicode_description const& rhs) noexcept
273 {
274 return is_C(rhs.general_category());
275 }
276
277private:
278 // 1st qword
279 uint64_t _general_category : 5;
280 uint64_t _grapheme_cluster_break : 4;
281 uint64_t _line_break_class : 6;
282 uint64_t _word_break_property : 5;
283 uint64_t _sentence_break_property : 4;
284 uint64_t _east_asian_width : 3;
285 uint64_t _bidi_class : 5;
286 uint64_t _bidi_bracket_type : 2;
287 uint64_t _bidi_mirroring_glyph : 16;
288 uint64_t _canonical_combining_class : 8;
289 uint64_t _word0_reserved : 6 = 0;
290
291 // 2nd qword
292 uint64_t _script : 8;
293 uint64_t _decomposition_type : 5;
294 uint64_t _decomposition_index : 21;
295 uint64_t _composition_index : 14;
296 uint64_t _word1_reserved : 16 = 0;
297};
298
299static_assert(sizeof(unicode_description) == 16);
300
301} // namespace hi::inline v1
This file includes required definitions.
Description of a unicode code point.
Definition unicode_description.hpp:33
constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
The grapheme cluster break of this code-point.
Definition unicode_description.hpp:104
constexpr unicode_bidi_class bidi_class() const noexcept
The bidi class of this code-point This function is used by the bidirectional algorithm to figure out ...
Definition unicode_description.hpp:135
std::u32string decompose() const noexcept
Decompose this code-point.
constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
Get the bidi bracket type.
Definition unicode_description.hpp:153
constexpr char32_t bidi_mirroring_glyph() const noexcept
Get the mirrored glyph.
Definition unicode_description.hpp:161
constexpr uint8_t canonical_combining_class() const noexcept
Get the combining class.
Definition unicode_description.hpp:183
constexpr unicode_general_category general_category() const noexcept
The general category of this code-point.
Definition unicode_description.hpp:93
static unicode_description const & find(char32_t code_point) noexcept
Find a code-point in the global unicode_description table.
constexpr unicode_script script() const noexcept
Get the script of this character.
Definition unicode_description.hpp:142
constexpr unicode_decomposition_type decomposition_type() const noexcept
This character has a canonical decomposition.
Definition unicode_description.hpp:169