HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
unicode_description.hpp
1// Copyright Take Vos 2020-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "unicode_general_category.hpp"
8#include "unicode_bidi_class.hpp"
9#include "unicode_bidi_bracket_type.hpp"
10#include "unicode_grapheme_cluster_break.hpp"
11#include "../required.hpp"
12
13namespace tt {
14namespace detail {
15constexpr char32_t unicode_hangul_S_base = U'\uac00';
16constexpr char32_t unicode_hangul_L_base = U'\u1100';
17constexpr char32_t unicode_hangul_V_base = U'\u1161';
18constexpr char32_t unicode_hangul_T_base = U'\u11a7';
19constexpr char32_t unicode_hangul_L_count = 19;
20constexpr char32_t unicode_hangul_V_count = 21;
21constexpr char32_t unicode_hangul_T_count = 28;
22constexpr char32_t unicode_hangul_N_count = unicode_hangul_V_count * unicode_hangul_T_count;
23constexpr char32_t unicode_hangul_S_count = unicode_hangul_L_count * unicode_hangul_N_count;
24}
25
26[[nodiscard]] constexpr bool is_hangul_L_part(char32_t code_point) noexcept
27{
28 return code_point >= detail::unicode_hangul_L_base && code_point < (detail::unicode_hangul_L_base + detail::unicode_hangul_L_count);
29}
30
31[[nodiscard]] constexpr bool is_hangul_V_part(char32_t code_point) noexcept
32{
33 return code_point >= detail::unicode_hangul_V_base && code_point < (detail::unicode_hangul_V_base + detail::unicode_hangul_V_count);
34}
35
36[[nodiscard]] constexpr bool is_hangul_T_part(char32_t code_point) noexcept
37{
38 return code_point >= detail::unicode_hangul_T_base && code_point < (detail::unicode_hangul_T_base + detail::unicode_hangul_T_count);
39}
40
41[[nodiscard]] constexpr bool is_hangul_syllable(char32_t code_point) noexcept
42{
43 return code_point >= detail::unicode_hangul_S_base && code_point < (detail::unicode_hangul_S_base + detail::unicode_hangul_S_count);
44}
45
46[[nodiscard]] constexpr bool is_hangul_LV_part(char32_t code_point) noexcept
47{
48 return is_hangul_syllable(code_point) && ((code_point - detail::unicode_hangul_S_base) % detail::unicode_hangul_T_count) == 0;
49}
50
51[[nodiscard]] constexpr bool is_hangul_LVT_part(char32_t code_point) noexcept
52{
53 return is_hangul_syllable(code_point) && ((code_point - detail::unicode_hangul_S_base) % detail::unicode_hangul_T_count) != 0;
54}
55
63public:
64 [[nodiscard]] constexpr unicode_description(
65 char32_t code_point,
66 unicode_general_category general_category,
67 unicode_grapheme_cluster_break grapheme_cluster_break,
68 unicode_bidi_class bidi_class,
69 unicode_bidi_bracket_type bidi_bracket_type,
70 char32_t bidi_mirrored_glyph,
73 uint8_t combining_class,
75 uint32_t decomposition_index
76 ) noexcept :
77 _general_info((static_cast<uint32_t>(code_point) << 10) | (static_cast<uint32_t>(general_category) << 5) | (static_cast<uint32_t>(grapheme_cluster_break) << 1)),
78 _bidi_class(static_cast<uint32_t>(bidi_class)),
79 _bidi_bracket_type(static_cast<uint32_t>(bidi_bracket_type)),
80 _bidi_mirrored_glyph(static_cast<uint32_t>(bidi_mirrored_glyph)),
81 _decomposition_canonical(static_cast<uint32_t>(decomposition_canonical)),
82 _composition_canonical(static_cast<uint32_t>(composition_canonical)),
83 _combining_class(static_cast<uint32_t>(combining_class)),
84 _decomposition_index(static_cast<uint32_t>(decomposition_index)),
85 _decomposition_length(static_cast<uint32_t>(decomposition_length))
86 {
87 tt_axiom(code_point <= 0x10ffff);
88 tt_axiom(static_cast<uint32_t>(general_category) <= 0x1f);
89 tt_axiom(static_cast<uint32_t>(grapheme_cluster_break) <= 0x0f);
90 tt_axiom(static_cast<uint32_t>(bidi_class) <= 0x1f);
91 tt_axiom(static_cast<uint32_t>(bidi_bracket_type) <= 0x03);
92 tt_axiom(static_cast<uint32_t>(bidi_mirrored_glyph) <= 0x10ffff);
93 tt_axiom(static_cast<uint32_t>(combining_class) <= 0xff);
94 tt_axiom(static_cast<uint32_t>(decomposition_length) <= 0x1f);
95 tt_axiom(static_cast<uint32_t>(decomposition_index) <= 0x1f'ffff);
96 }
97
101 [[nodiscard]] constexpr char32_t code_point() const noexcept
102 {
103 return static_cast<char32_t>(_general_info >> 10);
104 }
105
112 [[nodiscard]] constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
113 {
114 return static_cast<unicode_grapheme_cluster_break>((_general_info >> 1) & 0xf);
115 }
116
123 [[nodiscard]] constexpr unicode_general_category general_category() const noexcept
124 {
125 return static_cast<unicode_general_category>((_general_info >> 5) & 0x1f);
126 }
127
134 [[nodiscard]] constexpr unicode_bidi_class bidi_class() const noexcept
135 {
136 return static_cast<unicode_bidi_class>(_bidi_class);
137 }
138
145 [[nodiscard]] constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
146 {
147 return static_cast<unicode_bidi_bracket_type>(_bidi_bracket_type);
148 }
149
153 [[nodiscard]] constexpr char32_t bidi_mirrored_glyph() const noexcept
154 {
155 return static_cast<char32_t>(_bidi_mirrored_glyph);
156 }
157
161 [[nodiscard]] constexpr bool decomposition_canonical() const noexcept
162 {
163 return static_cast<bool>(_decomposition_canonical);
164 }
165
169 [[nodiscard]] constexpr bool composition_canonical() const noexcept
170 {
171 return static_cast<bool>(_composition_canonical);
172 }
173
183 [[nodiscard]] constexpr uint8_t combining_class() const noexcept
184 {
185 return static_cast<uint8_t>(_combining_class);
186 }
187
199 [[nodiscard]] constexpr size_t decomposition_length() const noexcept
200 {
201 return static_cast<size_t>(_decomposition_length);
202 }
203
215 [[nodiscard]] constexpr size_t decomposition_index() const noexcept
216 {
217 return static_cast<size_t>(_decomposition_index);
218 }
219
226 [[nodiscard]] constexpr char32_t canonical_equivalent() const noexcept
227 {
228 if (_decomposition_canonical && _decomposition_length == 1) {
229 return static_cast<char32_t>(_decomposition_index);
230 } else {
231 return U'\uffff';
232 }
233 }
234
235private:
236 // 1st dword
237 // code_point must be in msb for fast binary search, so no bit-fields here.
238 // [31:10] code-point
239 // [9:5] general category
240 // [4:1] grapheme cluster break
241 // [0:0] reserved
242 uint32_t _general_info;
243
244 // 2nd dword
245 uint32_t _bidi_class:5;
246 uint32_t _bidi_bracket_type:2;
247 uint32_t _bidi_mirrored_glyph : 21;
248 uint32_t _bidi_reserved : 4 = 0;
249
250 // 3rd dword
251 uint32_t _decomposition_canonical : 1;
252 uint32_t _composition_canonical : 1;
253 uint32_t _combining_class : 8;
254 uint32_t _decomposition_index : 21;
255 uint32_t _decomposition_reserved1 : 1 = 0;
256
257 // 4th dword
258 uint32_t _decomposition_length : 5;
259 uint32_t _decomposition_reserved2 : 27 = 0;
260
261
262 template<typename It>
263 friend constexpr It unicode_description_find(It first, It last, char32_t code_point) noexcept;
264};
265
266static_assert(sizeof(unicode_description) == 16);
267
274template<typename It>
275[[nodiscard]] constexpr It unicode_description_find(It first, It last, char32_t code_point) noexcept
276{
277 tt_axiom(code_point <= 0x10'ffff);
278 uint32_t general_info = static_cast<uint32_t>(code_point) << 10;
279
280 auto it = std::lower_bound(first, last, general_info, [](auto const &item, auto const &value) {
281 return item._general_info < value;
282 });
283
284 if (it == last || it->code_point() != code_point) {
285 return last;
286 } else {
287 return it;
288 }
289}
290
303[[nodiscard]] unicode_description const &unicode_description_find(char32_t code_point) noexcept;
304
305}
Description of a unicode code point.
Definition unicode_description.hpp:62
friend constexpr It unicode_description_find(It first, It last, char32_t code_point) noexcept
Find a code-point in a unicode_description table using a binary-search algorithm.
Definition unicode_description.hpp:275
constexpr unicode_bidi_class bidi_class() const noexcept
The bidi class of this code-point This function is used by the bidirectional algorithm to figure out ...
Definition unicode_description.hpp:134
constexpr uint8_t combining_class() const noexcept
Get the combining class.
Definition unicode_description.hpp:183
constexpr size_t decomposition_length() const noexcept
The number of code-points the decomposed grapheme has.
Definition unicode_description.hpp:199
constexpr bool composition_canonical() const noexcept
This character has a canonical composition.
Definition unicode_description.hpp:169
constexpr bool decomposition_canonical() const noexcept
This character has a canonical decomposition.
Definition unicode_description.hpp:161
constexpr unicode_bidi_bracket_type bidi_bracket_type() const noexcept
Get the bidi bracket type.
Definition unicode_description.hpp:145
constexpr unicode_grapheme_cluster_break grapheme_cluster_break() const noexcept
The grapheme cluster break of this code-point.
Definition unicode_description.hpp:112
constexpr size_t decomposition_index() const noexcept
A multi-use value representing the decomposition of this code-point.
Definition unicode_description.hpp:215
constexpr char32_t code_point() const noexcept
The code point of the description.
Definition unicode_description.hpp:101
constexpr char32_t bidi_mirrored_glyph() const noexcept
Get the mirrored glyph.
Definition unicode_description.hpp:153
constexpr unicode_general_category general_category() const noexcept
The general category of this code-point.
Definition unicode_description.hpp:123
constexpr char32_t canonical_equivalent() const noexcept
Get the canonical equivalent of this code-point.
Definition unicode_description.hpp:226
T lower_bound(T... args)