89 unicode_general_category general_category,
90 unicode_grapheme_cluster_break grapheme_cluster_break,
91 unicode_line_break_class line_break_class,
92 unicode_word_break_property word_break_property,
93 unicode_sentence_break_property sentence_break_property,
94 unicode_east_asian_width east_asian_width,
95 unicode_script script,
96 unicode_bidi_class bidi_class,
97 unicode_bidi_bracket_type bidi_bracket_type,
98 char32_t bidi_mirrored_glyph,
99 unicode_decomposition_type decomposition_type,
100 bool is_canonical_composition,
101 uint8_t canonical_combining_class,
102 uint8_t decomposition_length,
103 uint32_t decomposition_index,
104 uint16_t non_starter_code) noexcept :
106 (
static_cast<uint32_t
>(code_point) << code_point_shift) |
107 (
static_cast<uint32_t
>(general_category) << general_category_shift) |
108 (
static_cast<uint32_t
>(grapheme_cluster_break) << grapheme_cluster_break_shift) |
109 (
static_cast<uint32_t
>(is_canonical_composition) << is_canonical_composition_shift) |
110 (
static_cast<uint32_t
>(canonical_combining_class != 0) << is_combining_mark_shift)),
111 _bidi_class(to_underlying(bidi_class)),
112 _east_asian_width(
static_cast<uint32_t
>(east_asian_width)),
113 _line_break_class(to_underlying(line_break_class)),
114 _word_break_property(to_underlying(word_break_property)),
115 _sentence_break_property(to_underlying(sentence_break_property)),
116 _decomposition_index(
static_cast<uint32_t
>(decomposition_index)),
117 _decomposition_type(
static_cast<uint32_t
>(decomposition_type)),
118 _decomposition_length(
static_cast<uint32_t
>(decomposition_length))
121 if (canonical_combining_class == 0) {
123 _non_mark.script =
static_cast<uint32_t
>(script);
125 if (bidi_bracket_type != unicode_bidi_bracket_type::n and bidi_mirrored_glyph !=
char32_t{0xffff}) {
126 auto mirrored_glyph_delta =
static_cast<int32_t
>(bidi_mirrored_glyph) -
static_cast<int32_t
>(code_point);
127 hi_axiom(mirrored_glyph_delta >= bidi_mirrored_glyph_min and mirrored_glyph_delta <= bidi_mirrored_glyph_max);
128 _non_mark.bidi_mirrored_glyph =
static_cast<uint32_t
>(mirrored_glyph_delta) & bidi_mirrored_glyph_mask;
131 _non_mark.bidi_mirrored_glyph = bidi_mirrored_glyph_null;
133 _non_mark._reserved = 0;
136 _mark.canonical_combining_class = canonical_combining_class;
137 _mark.non_starter_code =
static_cast<uint32_t
>(non_starter_code);
141 hi_axiom(code_point <= 0x10ffff);
142 hi_axiom(to_underlying(general_category) <= 0x1f);
143 hi_axiom(to_underlying(grapheme_cluster_break) <= 0x0f);
144 hi_axiom(to_underlying(line_break_class) <= 0x3f);
145 hi_axiom(to_underlying(word_break_property) <= 0x1f);
146 hi_axiom(to_underlying(sentence_break_property) <= 0xf);
147 hi_axiom(to_underlying(east_asian_width) <= 0x7);
148 hi_axiom(to_underlying(script) <= 0xff);
149 hi_axiom(to_underlying(bidi_class) <= 0x1f);
150 hi_axiom(to_underlying(bidi_bracket_type) <= 0x03);
151 hi_axiom(
static_cast<uint32_t
>(bidi_mirrored_glyph) <= 0x10ffff);
152 hi_axiom(
static_cast<uint32_t
>(canonical_combining_class) <= 0xff);
153 hi_axiom(to_underlying(decomposition_type) <= 0x7);
154 hi_axiom(
static_cast<uint32_t
>(decomposition_length) <= 0x1f);
155 hi_axiom(
static_cast<uint32_t
>(decomposition_index) <= 0x1f'ffff);
156 hi_axiom(
static_cast<uint32_t
>(non_starter_code) <= 0x3ff);
162 r._general_info = other._general_info;
163 r._bidi_class = other._bidi_class;
164 if (other.is_combining_mark()) {
165 r._mark = other._mark;
167 r._non_mark = other._non_mark;
169 r._east_asian_width = other._east_asian_width;
170 r._line_break_class = other._line_break_class;
171 r._word_break_property = other._word_break_property;
172 r._sentence_break_property = other._sentence_break_property;
173 r._decomposition_index = other._decomposition_index;
174 r._decomposition_type = other._decomposition_type;
175 r._decomposition_length = other._decomposition_length;
177 r._general_info &= ~(general_category_mask << general_category_shift);
178 r._general_info |=
static_cast<uint32_t
>(to_underlying(unicode_general_category::Cn)) << general_category_shift;
187 return static_cast<char32_t>((_general_info >> code_point_shift) & code_point_mask);
198 return static_cast<unicode_general_category
>((_general_info >> general_category_shift) & general_category_mask);
209 return static_cast<unicode_grapheme_cluster_break
>(
210 (_general_info >> grapheme_cluster_break_shift) & grapheme_cluster_break_mask);
213 [[nodiscard]]
constexpr bool is_canonical_composition() const noexcept
215 return static_cast<bool>((_general_info >> is_canonical_composition_shift) & is_canonical_composition_mask);
218 [[nodiscard]]
constexpr bool is_combining_mark() const noexcept
220 return static_cast<bool>((_general_info >> is_combining_mark_shift) & is_combining_mark_mask);
223 [[nodiscard]]
constexpr unicode_line_break_class line_break_class() const noexcept
225 return static_cast<unicode_line_break_class
>(_line_break_class);
228 [[nodiscard]]
constexpr unicode_word_break_property word_break_property() const noexcept
230 return static_cast<unicode_word_break_property
>(_word_break_property);
233 [[nodiscard]]
constexpr unicode_sentence_break_property sentence_break_property() const noexcept
235 return static_cast<unicode_sentence_break_property
>(_sentence_break_property);
238 [[nodiscard]]
constexpr unicode_east_asian_width east_asian_width() const noexcept
240 return static_cast<unicode_east_asian_width
>(_east_asian_width);
249 [[nodiscard]]
constexpr unicode_bidi_class
bidi_class() const noexcept
251 return static_cast<unicode_bidi_class
>(_bidi_class);
256 [[nodiscard]]
constexpr unicode_script
script() const noexcept
258 if (is_combining_mark()) {
259 return unicode_script::Common;
261 return static_cast<unicode_script
>(_non_mark.script);
273 if (is_combining_mark()) {
274 return unicode_bidi_bracket_type::n;
276 return static_cast<unicode_bidi_bracket_type
>(_non_mark.bidi_bracket_type);
285 if (bidi_bracket_type() == unicode_bidi_bracket_type::n or _non_mark.bidi_mirrored_glyph == bidi_mirrored_glyph_null) {
289 constexpr auto sign_extent_shift = 32 - bidi_mirrored_glyph_width;
291 hilet mirrored_glyph_delta =
292 static_cast<int32_t
>(_non_mark.bidi_mirrored_glyph << sign_extent_shift) >> sign_extent_shift;
293 hilet cp = code_point();
294 hilet mirror_cp =
static_cast<char32_t>(cp + mirrored_glyph_delta);
303 return static_cast<unicode_decomposition_type
>(_decomposition_type);
317 if (is_combining_mark()) {
318 return static_cast<uint8_t
>(_mark.canonical_combining_class);
337 return static_cast<std::size_t>(_decomposition_length);
353 return static_cast<std::size_t>(_decomposition_index);
364 if (decomposition_type() == unicode_decomposition_type::canonical and _decomposition_length == 1) {
365 return static_cast<char32_t>(_decomposition_index);
380 hi_axiom(is_combining_mark());
381 return static_cast<size_t>(_mark.non_starter_code);
396 [[nodiscard]]
friend bool operator==(
unicode_description const &lhs, unicode_general_category
const &rhs)
noexcept
398 return lhs.general_category() == rhs;
401 [[nodiscard]]
friend bool operator==(
unicode_description const &lhs, unicode_decomposition_type
const &rhs)
noexcept
403 return lhs.decomposition_type() == rhs;
406 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_bidi_bracket_type
const &rhs)
noexcept
408 return lhs.bidi_bracket_type() == rhs;
411 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_bidi_class
const &rhs)
noexcept
413 return lhs.bidi_class() == rhs;
415 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_east_asian_width
const &rhs)
noexcept
417 return lhs.east_asian_width() == rhs;
420 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_sentence_break_property
const &rhs)
noexcept
422 return lhs.sentence_break_property() == rhs;
425 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_line_break_class
const &rhs)
noexcept
427 return lhs.line_break_class() == rhs;
430 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_word_break_property
const &rhs)
noexcept
432 return lhs.word_break_property() == rhs;
435 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs, unicode_grapheme_cluster_break
const &rhs)
noexcept
437 return lhs.grapheme_cluster_break() == rhs;
440 [[nodiscard]]
friend bool operator==(unicode_description
const &lhs,
char32_t const &rhs)
noexcept
442 return lhs.code_point() == rhs;
445 [[nodiscard]]
friend bool is_C(unicode_description
const &rhs)
noexcept
447 return is_C(rhs.general_category());
451 static constexpr uint32_t code_point_shift = 11;
452 static constexpr uint32_t code_point_mask = 0x1f'ffff;
453 static constexpr uint32_t general_category_shift = 6;
454 static constexpr uint32_t general_category_mask = 0x1f;
455 static constexpr uint32_t grapheme_cluster_break_shift = 2;
456 static constexpr uint32_t grapheme_cluster_break_mask = 0xf;
457 static constexpr uint32_t is_canonical_composition_shift = 1;
458 static constexpr uint32_t is_canonical_composition_mask = 0x1;
459 static constexpr uint32_t is_combining_mark_shift = 0;
460 static constexpr uint32_t is_combining_mark_mask = 0x1;
462 static constexpr uint32_t bidi_mirrored_glyph_mask = 0x1fff;
463 static constexpr uint32_t bidi_mirrored_glyph_width = std::bit_width(bidi_mirrored_glyph_mask);
464 static constexpr int32_t bidi_mirrored_glyph_max =
static_cast<int32_t
>(bidi_mirrored_glyph_mask >> 1);
465 static constexpr int32_t bidi_mirrored_glyph_min = -bidi_mirrored_glyph_max;
466 static constexpr uint32_t bidi_mirrored_glyph_null =
467 static_cast<uint32_t
>(bidi_mirrored_glyph_min - 1) & bidi_mirrored_glyph_mask;
477 uint32_t _general_info;
480 uint32_t _bidi_class : 5;
481 uint32_t _word_break_property : 5;
482 uint32_t _line_break_class : 6;
483 uint32_t _sentence_break_property : 4;
484 uint32_t _word2_reserved : 12 = 0;
487 uint32_t canonical_combining_class : 8;
488 uint32_t non_starter_code : 10;
489 uint32_t _reserved : 14;
492 struct non_mark_type {
493 uint32_t bidi_mirrored_glyph : 13;
494 uint32_t bidi_bracket_type : 2;
496 uint32_t _reserved : 9;
502 non_mark_type _non_mark;
507 uint32_t _decomposition_index : 21;
508 uint32_t _decomposition_type : 3;
509 uint32_t _decomposition_length : 5;
510 uint32_t _east_asian_width : 3;