HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_description.hpp
1// Copyright Take Vos 2020-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "unicode_general_category.hpp"
8#include "unicode_bidi_class.hpp"
9#include "unicode_bidi_bracket_type.hpp"
10#include "unicode_grapheme_cluster_break.hpp"
11#include "../required.hpp"
12#include "../assert.hpp"
13
14namespace tt {
15namespace detail {
16constexpr char32_t unicode_hangul_S_base = U'\uac00';
17constexpr char32_t unicode_hangul_L_base = U'\u1100';
18constexpr char32_t unicode_hangul_V_base = U'\u1161';
19constexpr char32_t unicode_hangul_T_base = U'\u11a7';
20constexpr char32_t unicode_hangul_L_count = 19;
21constexpr char32_t unicode_hangul_V_count = 21;
22constexpr char32_t unicode_hangul_T_count = 28;
23constexpr char32_t unicode_hangul_N_count = unicode_hangul_V_count * unicode_hangul_T_count;
24constexpr char32_t unicode_hangul_S_count = unicode_hangul_L_count * unicode_hangul_N_count;
25}
26
27[[nodiscard]] constexpr bool is_hangul_L_part(char32_t code_point) noexcept
28{
29 return code_point >= detail::unicode_hangul_L_base && code_point < (detail::unicode_hangul_L_base + detail::unicode_hangul_L_count);
30}
31
32[[nodiscard]] constexpr bool is_hangul_V_part(char32_t code_point) noexcept
33{
34 return code_point >= detail::unicode_hangul_V_base && code_point < (detail::unicode_hangul_V_base + detail::unicode_hangul_V_count);
35}
36
37[[nodiscard]] constexpr bool is_hangul_T_part(char32_t code_point) noexcept
38{
39 return code_point >= detail::unicode_hangul_T_base && code_point < (detail::unicode_hangul_T_base + detail::unicode_hangul_T_count);
40}
41
42[[nodiscard]] constexpr bool is_hangul_syllable(char32_t code_point) noexcept
43{
44 return code_point >= detail::unicode_hangul_S_base && code_point < (detail::unicode_hangul_S_base + detail::unicode_hangul_S_count);
45}
46
47[[nodiscard]] constexpr bool is_hangul_LV_part(char32_t code_point) noexcept
48{
49 return is_hangul_syllable(code_point) && ((code_point - detail::unicode_hangul_S_base) % detail::unicode_hangul_T_count) == 0;
50}
51
52[[nodiscard]] constexpr bool is_hangul_LVT_part(char32_t code_point) noexcept
53{
54 return is_hangul_syllable(code_point) && ((code_point - detail::unicode_hangul_S_base) % detail::unicode_hangul_T_count) != 0;
55}
56
64public:
65 [[nodiscard]] constexpr unicode_description(
66 char32_t code_point,
67 unicode_general_category general_category,
68 unicode_grapheme_cluster_break grapheme_cluster_break,
69 unicode_bidi_class bidi_class,
70 unicode_bidi_bracket_type bidi_bracket_type,
71 char32_t bidi_mirrored_glyph,
74 uint8_t combining_class,
76 uint32_t decomposition_index
77 ) noexcept :
78 _general_info((static_cast<uint32_t>(code_point) << 10) | (static_cast<uint32_t>(general_category) << 5) | (static_cast<uint32_t>(grapheme_cluster_break) << 1)),
79 _bidi_class(static_cast<uint32_t>(bidi_class)),
80 _bidi_bracket_type(static_cast<uint32_t>(bidi_bracket_type)),
81 _bidi_mirrored_glyph(static_cast<uint32_t>(bidi_mirrored_glyph)),
82 _decomposition_canonical(static_cast<uint32_t>(decomposition_canonical)),
83 _composition_canonical(static_cast<uint32_t>(composition_canonical)),
84 _combining_class(static_cast<uint32_t>(combining_class)),
85 _decomposition_index(static_cast<uint32_t>(decomposition_index)),
86 _decomposition_length(static_cast<uint32_t>(decomposition_length))
87 {
88 tt_axiom(code_point <= 0x10ffff);
89 tt_axiom(static_cast<uint32_t>(general_category) <= 0x1f);
90 tt_axiom(static_cast<uint32_t>(grapheme_cluster_break) <= 0x0f);
91 tt_axiom(static_cast<uint32_t>(bidi_class) <= 0x1f);
92 tt_axiom(static_cast<uint32_t>(bidi_bracket_type) <= 0x03);
93 tt_axiom(static_cast<uint32_t>(bidi_mirrored_glyph) <= 0x10ffff);
94 tt_axiom(static_cast<uint32_t>(combining_class) <= 0xff);
95 tt_axiom(static_cast<uint32_t>(decomposition_length) <= 0x1f);
96 tt_axiom(static_cast<uint32_t>(decomposition_index) <= 0x1f'ffff);
97 }
98
102 [[nodiscard]] constexpr char32_t code_point() const noexcept
103 {
104 return static_cast<char32_t>(_general_info >> 10);
105 }
106
113 [[nodiscard]] constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
114 {
115 return static_cast<unicode_grapheme_cluster_break>((_general_info >> 1) & 0xf);
116 }
117
124 [[nodiscard]] constexpr unicode_general_category general_category() const noexcept
125 {
126 return static_cast<unicode_general_category>((_general_info >> 5) & 0x1f);
127 }
128
135 [[nodiscard]] constexpr unicode_bidi_class bidi_class() const noexcept
136 {
137 return static_cast<unicode_bidi_class>(_bidi_class);
138 }
139
146 [[nodiscard]] constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
147 {
148 return static_cast<unicode_bidi_bracket_type>(_bidi_bracket_type);
149 }
150
154 [[nodiscard]] constexpr char32_t bidi_mirrored_glyph() const noexcept
155 {
156 return static_cast<char32_t>(_bidi_mirrored_glyph);
157 }
158
162 [[nodiscard]] constexpr bool decomposition_canonical() const noexcept
163 {
164 return static_cast<bool>(_decomposition_canonical);
165 }
166
170 [[nodiscard]] constexpr bool composition_canonical() const noexcept
171 {
172 return static_cast<bool>(_composition_canonical);
173 }
174
184 [[nodiscard]] constexpr uint8_t combining_class() const noexcept
185 {
186 return static_cast<uint8_t>(_combining_class);
187 }
188
200 [[nodiscard]] constexpr size_t decomposition_length() const noexcept
201 {
202 return static_cast<size_t>(_decomposition_length);
203 }
204
216 [[nodiscard]] constexpr size_t decomposition_index() const noexcept
217 {
218 return static_cast<size_t>(_decomposition_index);
219 }
220
227 [[nodiscard]] constexpr char32_t canonical_equivalent() const noexcept
228 {
229 if (_decomposition_canonical && _decomposition_length == 1) {
230 return static_cast<char32_t>(_decomposition_index);
231 } else {
232 return U'\uffff';
233 }
234 }
235
236private:
237 // 1st dword
238 // code_point must be in msb for fast binary search, so no bit-fields here.
239 // [31:10] code-point
240 // [9:5] general category
241 // [4:1] grapheme cluster break
242 // [0:0] reserved
243 uint32_t _general_info;
244
245 // 2nd dword
246 uint32_t _bidi_class:5;
247 uint32_t _bidi_bracket_type:2;
248 uint32_t _bidi_mirrored_glyph : 21;
249 uint32_t _bidi_reserved : 4 = 0;
250
251 // 3rd dword
252 uint32_t _decomposition_canonical : 1;
253 uint32_t _composition_canonical : 1;
254 uint32_t _combining_class : 8;
255 uint32_t _decomposition_index : 21;
256 uint32_t _decomposition_reserved1 : 1 = 0;
257
258 // 4th dword
259 uint32_t _decomposition_length : 5;
260 uint32_t _decomposition_reserved2 : 27 = 0;
261
262
263 template<typename It>
264 friend constexpr It unicode_description_find(It first, It last, char32_t code_point) noexcept;
265};
266
267static_assert(sizeof(unicode_description) == 16);
268
275template<typename It>
276[[nodiscard]] constexpr It unicode_description_find(It first, It last, char32_t code_point) noexcept
277{
278 tt_axiom(code_point <= 0x10'ffff);
279 uint32_t general_info = static_cast<uint32_t>(code_point) << 10;
280
281 auto it = std::lower_bound(first, last, general_info, [](auto const &item, auto const &value) {
282 return item._general_info < value;
283 });
284
285 if (it == last || it->code_point() != code_point) {
286 return last;
287 } else {
288 return it;
289 }
290}
291
304[[nodiscard]] unicode_description const &unicode_description_find(char32_t code_point) noexcept;
305
306}
Description of a unicode code point.
Definition unicode_description.hpp:63
friend constexpr It unicode_description_find(It first, It last, char32_t code_point) noexcept
Find a code-point in a unicode_description table using a binary-search algorithm.
Definition unicode_description.hpp:276
constexpr unicode_bidi_class bidi_class() const noexcept
The bidi class of this code-point This function is used by the bidirectional algorithm to figure out ...
Definition unicode_description.hpp:135
constexpr uint8_t combining_class() const noexcept
Get the combining class.
Definition unicode_description.hpp:184
constexpr size_t decomposition_length() const noexcept
The number of code-points the decomposed grapheme has.
Definition unicode_description.hpp:200
constexpr bool composition_canonical() const noexcept
This character has a canonical composition.
Definition unicode_description.hpp:170
constexpr bool decomposition_canonical() const noexcept
This character has a canonical decomposition.
Definition unicode_description.hpp:162
constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
Get the bidi bracket type.
Definition unicode_description.hpp:146
constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
The grapheme cluster break of this code-point.
Definition unicode_description.hpp:113
constexpr size_t decomposition_index() const noexcept
A multi-use value representing the decomposition of this code-point.
Definition unicode_description.hpp:216
constexpr char32_t code_point() const noexcept
The code point of the description.
Definition unicode_description.hpp:102
constexpr char32_t bidi_mirrored_glyph() const noexcept
Get the mirrored glyph.
Definition unicode_description.hpp:154
constexpr unicode_general_category general_category() const noexcept
The general category of this code-point.
Definition unicode_description.hpp:124
constexpr char32_t canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition unicode_description.hpp:227
T lower_bound(T... args)