6#include "TTauri/Text/Grapheme.hpp"
7#include "TTauri/Text/UnicodeBidi.hpp"
8#include "TTauri/Foundation/ResourceView.hpp"
9#include "TTauri/Foundation/math.hpp"
10#include "TTauri/Foundation/URL.hpp"
11#include "TTauri/Foundation/required.hpp"
15struct UnicodeData_Description;
17enum class GraphemeUnitType : uint8_t {
24 Regional_Indicator = 6,
32 Extended_Pictographic = 14
38 GraphemeUnitType previous = GraphemeUnitType::Other;
40 bool firstCharacter =
true;
41 bool inExtendedPictographic =
false;
43 void reset()
noexcept {
44 previous = GraphemeUnitType::Other;
46 firstCharacter =
true;
47 inExtendedPictographic =
false;
55enum class BidiClass : uint8_t {
85enum GeneralCharacterClass {
95[[nodiscard]]
constexpr GeneralCharacterClass to_GeneralCharacterClass(BidiClass bidiClass)
noexcept {
97 case BidiClass::Unknown:
return GeneralCharacterClass::Unknown;
98 case BidiClass::L:
return GeneralCharacterClass::Letter;
99 case BidiClass::R:
return GeneralCharacterClass::Letter;
100 case BidiClass::AL:
return GeneralCharacterClass::Letter;
101 case BidiClass::EN:
return GeneralCharacterClass::Digit;
102 case BidiClass::ES:
return GeneralCharacterClass::Unknown;
103 case BidiClass::ET:
return GeneralCharacterClass::Unknown;
104 case BidiClass::AN:
return GeneralCharacterClass::Digit;
105 case BidiClass::CS:
return GeneralCharacterClass::Unknown;
106 case BidiClass::NSM:
return GeneralCharacterClass::Unknown;
107 case BidiClass::BN:
return GeneralCharacterClass::Unknown;
108 case BidiClass::B:
return GeneralCharacterClass::ParagraphSeparator;
109 case BidiClass::S:
return GeneralCharacterClass::Unknown;
110 case BidiClass::WS:
return GeneralCharacterClass::WhiteSpace;
111 case BidiClass::ON:
return GeneralCharacterClass::Unknown;
112 case BidiClass::LRE:
return GeneralCharacterClass::Unknown;
113 case BidiClass::LRO:
return GeneralCharacterClass::Unknown;
114 case BidiClass::RLE:
return GeneralCharacterClass::Unknown;
115 case BidiClass::RLO:
return GeneralCharacterClass::Unknown;
116 case BidiClass::PDF:
return GeneralCharacterClass::Unknown;
117 case BidiClass::LRI:
return GeneralCharacterClass::Unknown;
118 case BidiClass::RLI:
return GeneralCharacterClass::Unknown;
119 case BidiClass::FSI:
return GeneralCharacterClass::Unknown;
120 case BidiClass::PDI:
return GeneralCharacterClass::Unknown;
121 default: tt_no_default;
129 nonstd::span<std::byte const> bytes;
135 size_t descriptions_offset;
136 size_t descriptions_count;
138 size_t compositions_offset;
139 size_t compositions_count;
180 std::u32string
toNFC(
std::u32string_view text,
bool decomposeLigatures=false,
bool composeCRLF=false) const noexcept;
197 std::u32string
toNFKC(
std::u32string_view text,
bool composeCRLF=false) const noexcept;
220 UnicodeData_Description const *getDescription(
char32_t codePoint) const noexcept;
221 GraphemeUnitType getGraphemeUnitType(
char32_t codePoint) const noexcept;
222 uint8_t getDecompositionOrder(
char32_t codePoint) const noexcept;
224 char32_t compose(
char32_t startCharacter,
char32_t composingCharacter,
bool composeCRLF) const noexcept;
225 void decomposeCodePoint(
std::u32string &result,
char32_t codePoint,
bool decomposeCompatible,
bool decomposeLigatures) const noexcept;
226 std::u32string decompose(
std::u32string_view text,
bool decomposeCompatible,
bool decomposeLigatures=false) const noexcept;
233 static
void reorder(
std::u32string &text) noexcept;
239 static
void clean(
std::u32string &text) noexcept;
252 void compose(
std::u32string &text,
bool composeCRLF=false) const noexcept;
Definition UnicodeData.hpp:37
Unicode Data used for characterizing unicode code-points.
Definition UnicodeData.hpp:127
std::u32string toNFC(std::u32string_view text, bool decomposeLigatures=false, bool composeCRLF=false) const noexcept
Convert text to Unicode-NFC normal form.
BidiClass getBidiClass(char32_t codePoint) const noexcept
Get the bidirectional class for a code-point.
UnicodeData(nonstd::span< std::byte const > bytes)
Load binary unicode data.
std::u32string toNFKD(std::u32string_view text) const noexcept
Convert text to Unicode-NFKD normal form.
UnicodeData(std::unique_ptr< ResourceView > view)
Load binary unicode data from a resource.
std::u32string toNFKC(std::u32string_view text, bool composeCRLF=false) const noexcept
Convert text to Unicode-NFKC normal form.
std::u32string toNFD(std::u32string_view text, bool decomposeLigatures=false) const noexcept
Convert text to Unicode-NFD normal form.
bool checkGraphemeBreak(char32_t codeUnit, GraphemeBreakState &state) const noexcept
Check if for a graphemeBreak before the character.