HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_description.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "unicode_general_category.hpp"
8#include "unicode_bidi_class.hpp"
9#include "unicode_bidi_bracket_type.hpp"
10#include "unicode_grapheme_cluster_break.hpp"
15#include "unicode_decomposition_type.hpp"
16#include "unicode_script.hpp"
17#include "../utility/module.hpp"
18
19namespace hi::inline v1 {
20
21constexpr char32_t unicode_replacement_character = U'\ufffd';
22constexpr char32_t unicode_LS = U'\u2028';
23constexpr char32_t unicode_PS = U'\u2029';
24
32public:
33 constexpr unicode_description() noexcept = default;
35 unicode_description& operator=(unicode_description const&) = delete;
36 constexpr unicode_description(unicode_description&&) noexcept = default;
37 constexpr unicode_description& operator=(unicode_description&&) noexcept = default;
38
39 [[nodiscard]] constexpr unicode_description(
40 unicode_general_category general_category,
41 unicode_grapheme_cluster_break grapheme_cluster_break,
42 unicode_line_break_class line_break_class,
43 unicode_word_break_property word_break_property,
44 unicode_sentence_break_property sentence_break_property,
45 unicode_east_asian_width east_asian_width,
46 unicode_script script,
47 unicode_bidi_class bidi_class,
48 unicode_bidi_bracket_type bidi_bracket_type,
49 char32_t bidi_mirroring_glyph,
50 uint8_t canonical_combining_class,
51 unicode_decomposition_type decomposition_type,
52 uint32_t decomposition_index,
53 uint16_t composition_index) noexcept :
54 _general_category(to_underlying(general_category)),
55 _grapheme_cluster_break(to_underlying(grapheme_cluster_break)),
56 _line_break_class(to_underlying(line_break_class)),
57 _word_break_property(to_underlying(word_break_property)),
58 _sentence_break_property(to_underlying(sentence_break_property)),
59 _east_asian_width(to_underlying(east_asian_width)),
60 _script(to_underlying(script)),
61 _bidi_class(to_underlying(bidi_class)),
62 _bidi_bracket_type(to_underlying(bidi_bracket_type)),
63 _bidi_mirroring_glyph(truncate<uint32_t>(bidi_mirroring_glyph)),
64 _canonical_combining_class(truncate<uint32_t>(canonical_combining_class)),
65 _decomposition_type(to_underlying(decomposition_type)),
66 _decomposition_index(truncate<uint32_t>(decomposition_index)),
67 _composition_index(truncate<uint16_t>(composition_index))
68 {
69 hi_assert(to_underlying(general_category) <= 0x1f);
70 hi_assert(to_underlying(grapheme_cluster_break) <= 0x0f);
71 hi_assert(to_underlying(line_break_class) <= 0x3f);
72 hi_assert(to_underlying(word_break_property) <= 0x1f);
73 hi_assert(to_underlying(sentence_break_property) <= 0xf);
74 hi_assert(to_underlying(east_asian_width) <= 0x7);
75 hi_assert(to_underlying(script) <= 0xff);
76 hi_assert(to_underlying(bidi_class) <= 0x1f);
77 hi_assert(to_underlying(bidi_bracket_type) <= 0x03);
78 hi_assert(static_cast<uint32_t>(bidi_mirroring_glyph) <= 0xffff);
79 hi_assert(static_cast<uint32_t>(canonical_combining_class) <= 0xff);
80 hi_assert(to_underlying(decomposition_type) <= 0x1f);
81 hi_assert(static_cast<uint32_t>(decomposition_index) <= 0x1f'ffff);
82 hi_assert(static_cast<uint32_t>(composition_index) <= 0x3fff);
83 }
84
91 [[nodiscard]] constexpr unicode_general_category general_category() const noexcept
92 {
93 return static_cast<unicode_general_category>(_general_category);
94 }
95
102 [[nodiscard]] constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
103 {
104 return static_cast<unicode_grapheme_cluster_break>(_grapheme_cluster_break);
105 }
106
107 [[nodiscard]] constexpr unicode_line_break_class line_break_class() const noexcept
108 {
109 return static_cast<unicode_line_break_class>(_line_break_class);
110 }
111
112 [[nodiscard]] constexpr unicode_word_break_property word_break_property() const noexcept
113 {
114 return static_cast<unicode_word_break_property>(_word_break_property);
115 }
116
117 [[nodiscard]] constexpr unicode_sentence_break_property sentence_break_property() const noexcept
118 {
119 return static_cast<unicode_sentence_break_property>(_sentence_break_property);
120 }
121
122 [[nodiscard]] constexpr unicode_east_asian_width east_asian_width() const noexcept
123 {
124 return static_cast<unicode_east_asian_width>(_east_asian_width);
125 }
126
133 [[nodiscard]] constexpr unicode_bidi_class bidi_class() const noexcept
134 {
135 return static_cast<unicode_bidi_class>(_bidi_class);
136 }
137
140 [[nodiscard]] constexpr unicode_script script() const noexcept
141 {
142 return static_cast<unicode_script>(_script);
143 }
144
151 [[nodiscard]] constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
152 {
153 return static_cast<unicode_bidi_bracket_type>(_bidi_bracket_type);
154 }
155
159 [[nodiscard]] constexpr char32_t bidi_mirroring_glyph() const noexcept
160 {
161 return truncate<char32_t>(_bidi_mirroring_glyph);
162 }
163
167 [[nodiscard]] constexpr unicode_decomposition_type decomposition_type() const noexcept
168 {
169 return static_cast<unicode_decomposition_type>(_decomposition_type);
170 }
171
181 [[nodiscard]] constexpr uint8_t canonical_combining_class() const noexcept
182 {
183 return truncate<uint8_t>(_canonical_combining_class);
184 }
185
190 [[nodiscard]] std::u32string decompose() const noexcept;
191
197 [[nodiscard]] char32_t compose(char32_t other) const noexcept;
198
205 [[nodiscard]] constexpr char32_t canonical_equivalent() const noexcept
206 {
207 if (decomposition_type() == unicode_decomposition_type::canonical and _decomposition_index <= 0x1f'ffff) {
208 return truncate<char32_t>(_decomposition_index);
209 } else {
210 return U'\uffff';
211 }
212 }
213
224 [[nodiscard]] static unicode_description const& find(char32_t code_point) noexcept;
225
226 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_general_category const& rhs) noexcept
227 {
228 return lhs.general_category() == rhs;
229 }
230
231 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_decomposition_type const& rhs) noexcept
232 {
233 return lhs.decomposition_type() == rhs;
234 }
235
236 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_bidi_bracket_type const& rhs) noexcept
237 {
238 return lhs.bidi_bracket_type() == rhs;
239 }
240
241 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_bidi_class const& rhs) noexcept
242 {
243 return lhs.bidi_class() == rhs;
244 }
245 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_east_asian_width const& rhs) noexcept
246 {
247 return lhs.east_asian_width() == rhs;
248 }
249
250 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_sentence_break_property const& rhs) noexcept
251 {
252 return lhs.sentence_break_property() == rhs;
253 }
254
255 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_line_break_class const& rhs) noexcept
256 {
257 return lhs.line_break_class() == rhs;
258 }
259
260 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_word_break_property const& rhs) noexcept
261 {
262 return lhs.word_break_property() == rhs;
263 }
264
265 [[nodiscard]] friend bool operator==(unicode_description const& lhs, unicode_grapheme_cluster_break const& rhs) noexcept
266 {
267 return lhs.grapheme_cluster_break() == rhs;
268 }
269
270 [[nodiscard]] friend bool is_C(unicode_description const& rhs) noexcept
271 {
272 return is_C(rhs.general_category());
273 }
274
275private:
276 // 1st qword
277 uint64_t _general_category : 5;
278 uint64_t _grapheme_cluster_break : 4;
279 uint64_t _line_break_class : 6;
280 uint64_t _word_break_property : 5;
281 uint64_t _sentence_break_property : 4;
282 uint64_t _east_asian_width : 3;
283 uint64_t _bidi_class : 5;
284 uint64_t _bidi_bracket_type : 2;
285 uint64_t _bidi_mirroring_glyph : 16;
286 uint64_t _canonical_combining_class : 8;
287 uint64_t _word0_reserved : 6 = 0;
288
289 // 2nd qword
290 uint64_t _script : 8;
291 uint64_t _decomposition_type : 5;
292 uint64_t _decomposition_index : 21;
293 uint64_t _composition_index : 14;
294 uint64_t _word1_reserved : 16 = 0;
295};
296
297static_assert(sizeof(unicode_description) == 16);
298
299} // namespace hi::inline v1
#define hi_assert(expression,...)
Assert if expression is true.
Definition assert.hpp:184
DOXYGEN BUG.
Definition algorithm.hpp:13
unicode_decomposition_type
Definition unicode_decomposition_type.hpp:16
unicode_line_break_class
Unicode line break class.
Definition unicode_line_break.hpp:33
unicode_bidi_bracket_type
Definition unicode_bidi_bracket_type.hpp:11
unicode_bidi_class
Bidirectional class Unicode Standard Annex #9: https://unicode.org/reports/tr9/.
Definition unicode_bidi_class.hpp:16
Description of a unicode code point.
Definition unicode_description.hpp:31
constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
The grapheme cluster break of this code-point.
Definition unicode_description.hpp:102
constexpr unicode_bidi_class bidi_class() const noexcept
The bidi class of this code-point This function is used by the bidirectional algorithm to figure out ...
Definition unicode_description.hpp:133
std::u32string decompose() const noexcept
Decompose this code-point.
constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
Get the bidi bracket type.
Definition unicode_description.hpp:151
constexpr char32_t bidi_mirroring_glyph() const noexcept
Get the mirrored glyph.
Definition unicode_description.hpp:159
constexpr uint8_t canonical_combining_class() const noexcept
Get the combining class.
Definition unicode_description.hpp:181
constexpr unicode_general_category general_category() const noexcept
The general category of this code-point.
Definition unicode_description.hpp:91
static unicode_description const & find(char32_t code_point) noexcept
Find a code-point in the global unicode_description table.
constexpr unicode_script script() const noexcept
Get the script of this character.
Definition unicode_description.hpp:140
constexpr unicode_decomposition_type decomposition_type() const noexcept
This character has a canonical decomposition.
Definition unicode_description.hpp:167