HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
lexer.hpp
1
2#pragma once
3
4namespace hi {
5inline namespace v1 {
6
13 bool zero_starts_octal = false;
14
23 char number_group_separator = no_capture;
24
28};
29
30namespace detail {
31
32template<lexer_config Config>
33class lexer {
34public:
35 enun class token_type: uint8_t {
36 none
37 };
38
39 constexpr static auto zero_starts_octal = Config.zero_starts_octal;
40 constexpr static auto number_group_separator = Config.number_group_separator;
41
42private:
43 enum class state_type: uint8_t {
44 idle,
45 zero,
46 bin_integer,
47 oct_integer,
48 dec_integer,
49 hex_integer,
50
51 _size
52 };
53
54 struct clear_tag{};
55 struct no_read_tag{};
56 struct no_capture_tag{};
57
58 constexpr static char no_capture = no_capture_tag{};
59 constexpr static auto no_read = no_read_tag{};
60 constexpr static auto clear = clear_tag{};
61
62 constexpr static auto idle = state_type::idle;
63 constexpr static auto zero = state_type::zero;
64
67 struct command_type {
71 state_type next_state = state_type::_size;
72
76 token_type emit_token = token_type::none;
77
81 char char_to_capture = no_capture;
82
85 uint8_t clear: 1 = 0;
86
89 uint8_t read: 1 = 0;
90 };
91
95 using transition_table_type = std::array<command_type,to_underlying(state_type::_size) * 128>;
96
97 transition_table_type _transition_table;
98
99public:
100
101 constexpr lexer() noexcept : _transition_table()
102 {
103 add_literal_numbers();
104 add_literal_string(
105 '\'', token_type::sqstring_literal, sqstring_literal, sqstring_literal_quote, sqstring_literal_escape, sqstring_literal_escape_finish);
106 add_literal_string(
107 '"', token_type::dqstring_literal, dqstring_literal, dqstring_literal_quote, dqstring_literal_escape, dqstring_literal_escape_finish);
108 add_literal_string(
109 '`', token_type::btstring_literal, btstring_literal, btstring_literal_quote, btstring_literal_escape, btstring_literal_escape_finish);
110 add_line_comment();
111 add_block_comment();
112 }
113
114 constexpr void add_literal_numbers() noexcept
115 {
116 add(idle, "0", zero);
117 add(idle, "123456789", dec_integer);
118
119 add(zero, idle, token_type::integer_literal, no_read);
120 add(zero, "bB", bin_integer);
121 add(zero, "oO", oct_integer);
122 add(zero, "dD", dec_integer);
123 add(zero, "xX", hex_integer);
124 if constexpr (zero_starts_octal) {
125 add(zero, "01234567", oct_integer);
126 } else {
127 add(zero, "0123456789", dec_integer);
128 }
129
130 if constexpr (number_group_separator != no_capture) {
131 // Don't capture number-group-separators.
132 add(zero, number_group_separator, zero_starts_octal ? oct_integer : dec_integer, no_capture);
133 add(bin_integer, number_group_separator, bin_integer, no_capture);
134 add(oct_integer, number_group_separator, oct_integer, no_capture);
135 add(dec_integer, number_group_separator, dec_integer, no_capture);
136 add(hex_integer, number_group_separator, hex_integer, no_capture);
137 add(dec_float, number_group_separator, dec_integer, no_capture);
138 add(hex_float, number_group_separator, dec_integer, no_capture);
139 add(dec_exponent, number_group_separator, dec_integer, no_capture);
140 add(hex_exponent, number_group_separator, dec_integer, no_capture);
141 }
142
143 // binary-integer
144 add(bin_integer, idle, token_type::integer_literal, no_read);
145 add(bin_integer, "01", bin_integer);
146
147 // octal-integer
148 add(oct_integer, idle, token_type::integer_literal, no_read);
149 add(oct_integer, "01234567", oct_integer);
150
151 // decimal-integer
152 add(dec_integer, idle, token_type::integer_literal, no_read);
153 add(dec_integer, "0123456789", dec_integer);
154 add(dec_integer, ".", dec_float);
155 add(dec_integer, "eE", dec_sign_exponent);
156
157 // hexadecimal-integer
158 add(hex_integer, idle, token_type::integer_literal, no_read);
159 add(hex_integer, "0123456789abcdefABCDEF", hex_integer);
160 add(hex_integer, ".", hex_float);
161 add(hex_integer, "pP", hex_sign_exponent);
162
163 // decimal-float
164 add(dec_float, idle, token_type::float_literal, no_read);
165 add(dec_float, "0123456789", dec_float);
166 add(dec_float, "eE", dec_sign_exponent);
167 add(dec_sign_exponent, idle, token_type::error_missing_exponent_number, no_read);
168 add(dec_sign_exponent, "0123456789", dec_exponent);
169 add(dec_sign_exponent, "+-", dec_exponent);
170 add(dec_exponent, idle, token_type::float_literal, no_read);
171 add(dec_exponent, "0123456789", dec_exponent);
172
173 // hexadecimal-float
174 add(hex_float, idle, token_type::float_literal, no_read);
175 add(hex_float, "0123456789abcdefABCDEF", hex_float);
176 add(hex_float, "pP", hex_sign_exponent);
177 add(hex_sign_exponent, idle, token_type::error_missing_exponent_number, no_read);
178 add(hex_sign_exponent, "0123456789abcdefABCDEF", hex_exponent);
179 add(hex_sign_exponent, "+-", hex_exponent);
180 add(hex_exponent, idle, token_type::float_literal, no_read);
181 add(hex_exponent, "0123456789abcdefABCDEF", hex_exponent);
182 }
183
184 constexpr void add_literal_string(
185 char c,
186 token_type token,
187 state_type literal,
188 state_type literal_quote,
189 state_type literal_escape,
190 state_type literal_escape_finish) noexcept
191 {
192 add(idle, c, literal, no_capture);
193 add(literal, literal);
194 if (escape_by_quote_doubling) {
195 add(literal, '"', literal_quote, no_capture);
196 add(literal_quote, idle, token_type::string_literal, no_capture);
197 add(literal_quote, '"', literal)
198 } else {
199 add(literal, '"', idle, token_type::string_literal, no_capture);
200 }
201 add(literal, '\\', literal_escape, no_capture);
202 add(literal_escape, literal_escape_finish, '\\', no_read);
203 add(literal_escape, '"', literal);
204 add(literal_escape, '\'', literal);
205 add(literal_escape, '?', literal);
206 add(literal_escape, 'a', literal, '\a');
207 add(literal_escape, 'b', literal, '\b');
208 add(literal_escape, 'f', literal, '\f');
209 add(literal_escape, 'n', literal, '\n');
210 add(literal_escape, 'r', literal, '\r');
211 add(literal_escape, 't', literal, '\t');
212 add(literal_escape, 'v', literal, '\v');
213 add(literal_escape_finish, literal);
214 }
215
216 constexpr void add_comment() noexcept
217 {
218 add(idle, '/', slash, no_read, no_capture);
219 add(slash, slash_finish, token_type::op, no_read);
220 add(slash_finish, idle, token_type::op);
221
222 if (cpp_comment) {
223 add(slash, '/', line_comment, no_capture);
224 }
225
226 if (c_comment) {
227 add(slash, '*', slash_star_block_comment, no_capture);
228 add(slash_star_block_comment, slash_star_block_comment);
229 add(slash_star_block_comment, '*', slash_star_star, no_capture);
230 add(slash_star_star, slash_start_block_comment, '*', no_read);
231 add(slash_star_star, '/', idle, no_capture);
232 }
233
234 if (sgml_comment) {
235 add(slash, '<', slash_star_block_comment, no_capture);
236 add(slash_star_block_comment, slash_star_block_comment);
237 add(slash_star_block_comment, '*', slash_star_star, no_capture);
238 add(slash_star_star, slash_start_block_comment, '*', no_read);
239 add(slash_star_star, '/', idle, no_capture);
240 }
241
242 if (semicolon_starts_comment) {
243 add(idle, ';', line_comment, no_capture);
244 }
245
246 if (hash_starts_comment) {
247 add(idle, '#', line_comment, no_capture);
248 }
249
250 add(line_comment, line_comment);
251 add(line_comment, "\n\f\v", idle, token_type::comment, no_read);
252 }
253
254 constexpr void add_identifier() noexcept
255 {
256 add(idle, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", identifier);
257 add(identifier, identifier, token_type::identigier, no_read);
258 add(identifier, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789", identifier);
259 }
260
261 [[nodiscard]] constexpr static size_t make_index(state_type from, char c) noexcept
262 {
263 auto c_ = char_cast<size_t>(c);
264 return to_underlying(from) * 128 + index;
265 }
266
267 constexpr command_type &add(state_type from, char c, state_type to) noexcept
268 {
269 hilet i = make_index(from, c);
270 auto &command = transition_table[i];
271 command.next_state = to;
272 command.char_to_capture = c;
273 command.read = 1;
274 command.clear_capture = 0;
275 return command;
276 }
277
292 template<typename First, typename... Args>
293 constexpr command_type &add(state_type from, char c, state_type to, First first, Args const &...args) noexcept
294 {
295 auto &command = add(from, c, to, args...);
296 if constexpr (std::is_same_v<First, token_type>) {
297 command.reset = 1;
298 command.emit_token = first;
299
300 } else if constexpr (std::is_same_v<First, no_read_tag) {
301 command.read = 0;
302
303 } else if constexpr (std::is_same_v<First, no_capture_tag) {
304 command.char_to_capture = no_capture;
305
306 } else if constexpr (std::is_same_v<First, reset_tag) {
307 command.reset = 1;
308
309 } else if constexpr (std::is_same_v<First, char) {
310 command.char_to_capture = first;
311
312 } else {
314 }
315
316 return command;
317 }
318
319 template<size_t N, typename... Args>
320 constexpr void add(state_type from, char (&str)[N], state_type to, Args const &...args) noexcept
321 {
322 for (auto i = 0_uz; i != N; ++i) {
323 add(from, str[i], to, args...);
324 }
325 }
326
327 template<typename... Args>
328 constexpr void add(state_type from, state_type to, Args const &...args) noexcept
329 {
330 for (char c = 0; c != 127; ++c) {
331 add(from, c, to, args...);
332 }
333 }
334};
335
336} // detail
337
338template<lexer_config Config>
339constexpr auto lexer = detail::lexer<Config>();
340
341}}
342
#define hi_static_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:308
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
Definition lexer.hpp:7
bool zero_starts_octal
A zero starts in octal number.
Definition lexer.hpp:13
bool escape_by_quote_doubling
Escaping quotes within a string may be done using quote doubling.
Definition lexer.hpp:27
char number_group_separator
The character used to seperate groups of numbers.
Definition lexer.hpp:23
Definition lexer.hpp:33
constexpr command_type & add(state_type from, char c, state_type to, First first, Args const &...args) noexcept
Add a state change.
Definition lexer.hpp:293
Definition lexer.hpp:35