HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
UnicodeData.hpp
1// Copyright 2019 Pokitec
2// All rights reserved.
3
4#pragma once
5
6#include "TTauri/Text/Grapheme.hpp"
7#include "TTauri/Text/UnicodeBidi.hpp"
8#include "TTauri/Foundation/ResourceView.hpp"
9#include "TTauri/Foundation/math.hpp"
10#include "TTauri/Foundation/URL.hpp"
11#include "TTauri/Foundation/required.hpp"
12#include <nonstd/span>
13
14namespace tt {
15struct UnicodeData_Description;
16
17enum class GraphemeUnitType : uint8_t {
18 Other = 0,
19 CR = 1,
20 LF = 2,
21 Control = 3,
22 Extend = 4,
23 ZWJ = 5,
24 Regional_Indicator = 6,
25 Prepend = 7,
26 SpacingMark = 8,
27 L = 9,
28 V = 10,
29 T = 11,
30 LV = 12,
31 LVT = 13,
32 Extended_Pictographic = 14
33};
34
35
36
38 GraphemeUnitType previous = GraphemeUnitType::Other;
39 int RICount = 0;
40 bool firstCharacter = true;
41 bool inExtendedPictographic = false;
42
43 void reset() noexcept {
44 previous = GraphemeUnitType::Other;
45 RICount = 0;
46 firstCharacter = true;
47 inExtendedPictographic = false;
48 }
49};
50
51
55enum class BidiClass : uint8_t {
56 Unknown = 0,
57 L = 1,
58 R = 2,
59 AL = 3,
60 EN = 4,
61 ES = 5,
62 ET = 6,
63 AN = 7,
64 CS = 8,
65 NSM = 9,
66 BN = 10,
67 B = 11,
68 S = 12,
69 WS = 13,
70 ON = 14,
71 // Explicit values.
72 LRE,
73 LRO,
74 RLE,
75 RLO,
76 PDF,
77 LRI,
78 RLI,
79 FSI,
80 PDI
81};
82
85enum GeneralCharacterClass {
86 Unknown,
87 Digit,
88 Letter,
89 WhiteSpace,
90 ParagraphSeparator
91};
92
95[[nodiscard]] constexpr GeneralCharacterClass to_GeneralCharacterClass(BidiClass bidiClass) noexcept {
96 switch (bidiClass) {
97 case BidiClass::Unknown: return GeneralCharacterClass::Unknown;
98 case BidiClass::L: return GeneralCharacterClass::Letter;
99 case BidiClass::R: return GeneralCharacterClass::Letter;
100 case BidiClass::AL: return GeneralCharacterClass::Letter;
101 case BidiClass::EN: return GeneralCharacterClass::Digit;
102 case BidiClass::ES: return GeneralCharacterClass::Unknown;
103 case BidiClass::ET: return GeneralCharacterClass::Unknown;
104 case BidiClass::AN: return GeneralCharacterClass::Digit;
105 case BidiClass::CS: return GeneralCharacterClass::Unknown;
106 case BidiClass::NSM: return GeneralCharacterClass::Unknown;
107 case BidiClass::BN: return GeneralCharacterClass::Unknown;
108 case BidiClass::B: return GeneralCharacterClass::ParagraphSeparator;
109 case BidiClass::S: return GeneralCharacterClass::Unknown;
110 case BidiClass::WS: return GeneralCharacterClass::WhiteSpace;
111 case BidiClass::ON: return GeneralCharacterClass::Unknown;
112 case BidiClass::LRE: return GeneralCharacterClass::Unknown;
113 case BidiClass::LRO: return GeneralCharacterClass::Unknown;
114 case BidiClass::RLE: return GeneralCharacterClass::Unknown;
115 case BidiClass::RLO: return GeneralCharacterClass::Unknown;
116 case BidiClass::PDF: return GeneralCharacterClass::Unknown;
117 case BidiClass::LRI: return GeneralCharacterClass::Unknown;
118 case BidiClass::RLI: return GeneralCharacterClass::Unknown;
119 case BidiClass::FSI: return GeneralCharacterClass::Unknown;
120 case BidiClass::PDI: return GeneralCharacterClass::Unknown;
121 default: tt_no_default;
122 }
123}
124
128private:
129 nonstd::span<std::byte const> bytes;
130
134
135 size_t descriptions_offset;
136 size_t descriptions_count;
137
138 size_t compositions_offset;
139 size_t compositions_count;
140public:
144 UnicodeData(nonstd::span<std::byte const> bytes);
145
149
150 UnicodeData() = delete;
151 UnicodeData(UnicodeData const &other) = delete;
152 UnicodeData &operator=(UnicodeData const &other) = delete;
153 UnicodeData(UnicodeData &&other) = delete;
154 UnicodeData &operator=(UnicodeData &&other) = delete;
155 ~UnicodeData() = default;
156
167 std::u32string toNFD(std::u32string_view text, bool decomposeLigatures=false) const noexcept;
168
180 std::u32string toNFC(std::u32string_view text, bool decomposeLigatures=false, bool composeCRLF=false) const noexcept;
181
188 std::u32string toNFKD(std::u32string_view text) const noexcept;
189
197 std::u32string toNFKC(std::u32string_view text, bool composeCRLF=false) const noexcept;
198
209 bool checkGraphemeBreak(char32_t codeUnit, GraphemeBreakState &state) const noexcept;
210
215 BidiClass getBidiClass(char32_t codePoint) const noexcept;
216
217private:
218 void initialize();
219
220 UnicodeData_Description const *getDescription(char32_t codePoint) const noexcept;
221 GraphemeUnitType getGraphemeUnitType(char32_t codePoint) const noexcept;
222 uint8_t getDecompositionOrder(char32_t codePoint) const noexcept;
223
224 char32_t compose(char32_t startCharacter, char32_t composingCharacter, bool composeCRLF) const noexcept;
225 void decomposeCodePoint(std::u32string &result, char32_t codePoint, bool decomposeCompatible, bool decomposeLigatures) const noexcept;
226 std::u32string decompose(std::u32string_view text, bool decomposeCompatible, bool decomposeLigatures=false) const noexcept;
227
228
233 static void reorder(std::u32string &text) noexcept;
234
239 static void clean(std::u32string &text) noexcept;
240
241
252 void compose(std::u32string &text, bool composeCRLF=false) const noexcept;
253};
254
255}
256
257namespace tt {
258
259template<>
260std::unique_ptr<tt::UnicodeData> parseResource(URL const &location);
261
262}
263
STL namespace.
Definition URL.hpp:45
Definition UnicodeData.hpp:37
Unicode Data used for characterizing unicode code-points.
Definition UnicodeData.hpp:127
std::u32string toNFC(std::u32string_view text, bool decomposeLigatures=false, bool composeCRLF=false) const noexcept
Convert text to Unicode-NFC normal form.
BidiClass getBidiClass(char32_t codePoint) const noexcept
Get the bidirectional class for a code-point.
UnicodeData(nonstd::span< std::byte const > bytes)
Load binary unicode data.
std::u32string toNFKD(std::u32string_view text) const noexcept
Convert text to Unicode-NFKD normal form.
UnicodeData(std::unique_ptr< ResourceView > view)
Load binary unicode data from a resource.
std::u32string toNFKC(std::u32string_view text, bool composeCRLF=false) const noexcept
Convert text to Unicode-NFKC normal form.
std::u32string toNFD(std::u32string_view text, bool decomposeLigatures=false) const noexcept
Convert text to Unicode-NFD normal form.
bool checkGraphemeBreak(char32_t codeUnit, GraphemeBreakState &state) const noexcept
Check if for a graphemeBreak before the character.