7#include "../utility/module.hpp"
8#include "../i18n/module.hpp"
9#include "../container/module.hpp"
10#include "../telemetry/log.hpp"
11#include "../strings.hpp"
12#include "unicode_normalization.hpp"
13#include "ucd_general_categories.hpp"
14#include "phrasing.hpp"
24namespace hi::inline
v1 {
27inline auto long_graphemes = ::hi::stable_set<std::u32string>{};
45 using value_type = uint64_t;
61 constexpr grapheme() noexcept = default;
67 constexpr
grapheme(intrinsic_t, value_type value) : _value(value) {}
69 constexpr value_type& intrinsic() noexcept
74 constexpr value_type
const& intrinsic() const noexcept
81 constexpr grapheme(
char32_t code_point) noexcept : _value(char_cast<value_type>(code_point))
86 constexpr grapheme(
char ascii_char) noexcept : _value(char_cast<value_type>(ascii_char))
88 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
95 _value = char_cast<value_type>(code_point);
103 hi_axiom(ascii_char >= 0 and ascii_char <= 0x7f);
104 _value = char_cast<value_type>(ascii_char);
114 switch (code_points.size()) {
119 _value = char_cast<value_type>(code_points[0]);
124 if (index < 0x0f'0000) {
125 _value = narrow_cast<value_type>(index + 0x11'0000);
128 [[unlikely]] hi_log_error_once(
129 "grapheme::error::too-many",
"Too many long graphemes encoded, replacing with U+fffd");
130 _value = char_cast<value_type>(U
'\ufffd');
139 constexpr explicit grapheme(std::u32string_view code_points) noexcept :
146 [[nodiscard]]
constexpr uint32_t
index() const noexcept
148 return _value & 0x1f'ffff;
151 [[nodiscard]]
constexpr iso_639 language() const noexcept
153 return iso_639{intrinsic_t{}, narrow_cast<uint16_t>((_value >> 21) & 0x7fff)};
156 constexpr void set_language(iso_639 rhs)
noexcept
158 hi_axiom(rhs.intrinsic() <= 0x7fff);
160 constexpr auto mask = ~(value_type{0x7fff} << 21);
162 _value |= wide_cast<value_type>(rhs.intrinsic()) << 21;
165 [[nodiscard]]
constexpr iso_15924 script() const noexcept
167 return iso_15924{intrinsic_t{}, narrow_cast<uint16_t>((_value >> 36) & 0x3ff)};
170 constexpr void set_script(iso_15924 rhs)
noexcept
174 constexpr auto mask = ~(value_type{0x3ff} << 36);
176 _value |= wide_cast<value_type>(rhs.intrinsic()) << 36;
179 [[nodiscard]]
constexpr iso_3166 region() const noexcept
181 return iso_3166{intrinsic_t{}, narrow_cast<uint16_t>((_value >> 46) & 0x3ff)};
184 constexpr void set_region(iso_3166 rhs)
noexcept
188 constexpr auto mask = ~(value_type{0x3ff} << 46);
190 _value |= wide_cast<value_type>(rhs.intrinsic()) << 46;
193 [[nodiscard]]
constexpr hi::language_tag language_tag() const noexcept
197 hilet language_ = iso_639{intrinsic_t{}, narrow_cast<uint16_t>(tmp & 0x7fff)};
199 hilet script_ = iso_15924{intrinsic_t{}, narrow_cast<uint16_t>(tmp & 0x3ff)};
201 hilet region_ = iso_3166{intrinsic_t{}, narrow_cast<uint16_t>(tmp & 0x3ff)};
202 return hi::language_tag{language_, script_, region_};
205 constexpr void set_language_tag(hi::language_tag rhs)
noexcept
207 hi_axiom(rhs.region.intrinsic() <= 0x7fff);
208 hi_axiom(rhs.script.intrinsic() < 1000);
209 hi_axiom(rhs.language.intrinsic() < 1000);
211 auto tmp = wide_cast<value_type>(rhs.region.intrinsic());
213 tmp |= rhs.script.intrinsic();
215 tmp |= rhs.language.intrinsic();
218 constexpr auto mask = ~(uint64_t{0x7'ffff} << 21);
223 [[nodiscard]]
constexpr hi::phrasing
phrasing() const noexcept
225 return static_cast<hi::phrasing
>((_value >> 56) & 0x3f);
228 constexpr void set_phrasing(hi::phrasing rhs)
noexcept
230 hi_axiom(std::to_underlying(rhs) <= 0x3f);
232 constexpr auto mask = ~(value_type{0x3f} << 56);
234 _value |=
static_cast<value_type
>(rhs) << 56;
237 [[nodiscard]]
std::u32string const& long_grapheme() const noexcept
240 hi_axiom(i > 0x10'ffff and i <= 0x1f'ffff);
241 return detail::long_graphemes[i - 0x11'0000];
248 return index() <= 0x10'ffff ? 1_uz : long_grapheme().size();
257 [[nodiscard]]
constexpr char32_t operator[](
size_t i)
const noexcept
259 if (
hilet code_point = index(); code_point <= 0x10'ffff) {
261 return char_cast<char32_t>(code_point);
264 return long_grapheme()[i];
276 [[nodiscard]]
friend constexpr char32_t get(
grapheme const& rhs)
noexcept
278 if (
hilet code_point = rhs.index(); code_point <= 0x10'ffff) {
284 return rhs.long_grapheme()[I];
292 if (
hilet code_point = index(); code_point <= 0x10'ffff) {
296 return long_grapheme();
305 return unicode_decompose(composed(), config);
314 return lhs.index() == rhs.index();
317 [[nodiscard]]
friend constexpr bool operator==(
grapheme const& lhs,
char32_t const& rhs)
noexcept
319 hi_axiom(char_cast<value_type>(rhs) <= 0x10'ffff);
320 return lhs.index() == char_cast<value_type>(rhs);
323 [[nodiscard]]
friend constexpr bool operator==(grapheme
const& lhs,
char const& rhs)
noexcept
325 hi_axiom(char_cast<value_type>(rhs) <= 0x7f);
326 return lhs.index() == char_cast<value_type>(rhs);
333 return lhs.decomposed() <=> rhs.decomposed();
336 [[nodiscard]]
friend constexpr std::strong_ordering operator<=>(
grapheme const& lhs,
char32_t const& rhs)
noexcept
341 [[nodiscard]]
friend constexpr std::strong_ordering operator<=>(grapheme
const& lhs,
char const& rhs)
noexcept
358 return rhs.composed();
#define hi_axiom_bounds(x,...)
Specify an axiom that the value is within bounds.
Definition assert.hpp:264
#define hi_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:279
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
constexpr std::string to_string(std::u32string_view rhs) noexcept
Conversion from UTF-32 to UTF-8.
Definition to_string.hpp:215
constexpr std::u32string to_u32string(std::u32string_view rhs) noexcept
Identity conversion from UTF-32 to UTF-32.
Definition to_string.hpp:23
constexpr std::wstring to_wstring(std::u32string_view rhs) noexcept
Conversion from UTF-32 to wide-string (UTF-16/32).
Definition to_string.hpp:155
@ grapheme
The gui_event has grapheme data.
phrasing
Phrasing.
Definition phrasing.hpp:27
DOXYGEN BUG.
Definition algorithm.hpp:13
constexpr std::u32string unicode_normalize(std::u32string_view text, unicode_normalize_config config=unicode_normalize_config::NFC()) noexcept
Convert text to a Unicode composed normal form.
Definition unicode_normalization.hpp:303
geometry/margins.hpp
Definition cache.hpp:11
ISO-639 language code.
Definition iso_639.hpp:23
Definition grapheme.hpp:31
A grapheme-cluster, what a user thinks a character is.
Definition grapheme.hpp:44
constexpr grapheme(std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:139
friend constexpr bool operator==(grapheme const &lhs, grapheme const &rhs) noexcept
Compare equivalence of two graphemes.
Definition grapheme.hpp:312
friend constexpr std::strong_ordering operator<=>(grapheme const &lhs, grapheme const &rhs) noexcept
Compare two graphemes lexicographically.
Definition grapheme.hpp:331
constexpr char32_t operator[](size_t i) const noexcept
Get the code-point at the given index.
Definition grapheme.hpp:257
constexpr grapheme(composed_t, std::u32string_view code_points) noexcept
Encode a grapheme from a list of code-points.
Definition grapheme.hpp:112
constexpr uint32_t index() const noexcept
Get the codepoint/index part of the grapheme.
Definition grapheme.hpp:146
constexpr grapheme(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:81
constexpr grapheme & operator=(char ascii_char) noexcept
Encode a single code-point.
Definition grapheme.hpp:101
value_type _value
The grapheme's value.
Definition grapheme.hpp:59
constexpr std::u32string decomposed(unicode_normalize_config config=unicode_normalize_config::NFD()) const noexcept
Get a list of code-point normalized to NFD.
Definition grapheme.hpp:303
constexpr std::u32string composed() const noexcept
Get a list of code-point normalized to NFC.
Definition grapheme.hpp:290
constexpr grapheme & operator=(char32_t code_point) noexcept
Encode a single code-point.
Definition grapheme.hpp:93
constexpr std::size_t size() const noexcept
Return the number of code-points encoded in the grapheme.
Definition grapheme.hpp:246
friend constexpr char32_t get(grapheme const &rhs) noexcept
Get the code-point at the given index.
Definition grapheme.hpp:276
Definition unicode_normalization.hpp:19