39 constexpr static auto zero_starts_octal = Config.zero_starts_octal;
40 constexpr static auto number_group_separator = Config.number_group_separator;
43 enum class state_type: uint8_t {
56 struct no_capture_tag{};
58 constexpr static char no_capture = no_capture_tag{};
59 constexpr static auto no_read = no_read_tag{};
60 constexpr static auto clear = clear_tag{};
62 constexpr static auto idle = state_type::idle;
63 constexpr static auto zero = state_type::zero;
71 state_type next_state = state_type::_size;
76 token_type emit_token = token_type::none;
81 char char_to_capture = no_capture;
95 using transition_table_type =
std::array<command_type,to_underlying(state_type::_size) * 128>;
97 transition_table_type _transition_table;
101 constexpr lexer() noexcept : _transition_table()
103 add_literal_numbers();
105 '\'', token_type::sqstring_literal, sqstring_literal, sqstring_literal_quote, sqstring_literal_escape, sqstring_literal_escape_finish);
107 '"', token_type::dqstring_literal, dqstring_literal, dqstring_literal_quote, dqstring_literal_escape, dqstring_literal_escape_finish);
109 '`', token_type::btstring_literal, btstring_literal, btstring_literal_quote, btstring_literal_escape, btstring_literal_escape_finish);
114 constexpr void add_literal_numbers() noexcept
116 add(idle,
"0", zero);
117 add(idle,
"123456789", dec_integer);
119 add(zero, idle, token_type::integer_literal, no_read);
120 add(zero,
"bB", bin_integer);
121 add(zero,
"oO", oct_integer);
122 add(zero,
"dD", dec_integer);
123 add(zero,
"xX", hex_integer);
124 if constexpr (zero_starts_octal) {
125 add(zero,
"01234567", oct_integer);
127 add(zero,
"0123456789", dec_integer);
130 if constexpr (number_group_separator != no_capture) {
132 add(zero, number_group_separator, zero_starts_octal ? oct_integer : dec_integer, no_capture);
133 add(bin_integer, number_group_separator, bin_integer, no_capture);
134 add(oct_integer, number_group_separator, oct_integer, no_capture);
135 add(dec_integer, number_group_separator, dec_integer, no_capture);
136 add(hex_integer, number_group_separator, hex_integer, no_capture);
137 add(dec_float, number_group_separator, dec_integer, no_capture);
138 add(hex_float, number_group_separator, dec_integer, no_capture);
139 add(dec_exponent, number_group_separator, dec_integer, no_capture);
140 add(hex_exponent, number_group_separator, dec_integer, no_capture);
144 add(bin_integer, idle, token_type::integer_literal, no_read);
145 add(bin_integer,
"01", bin_integer);
148 add(oct_integer, idle, token_type::integer_literal, no_read);
149 add(oct_integer,
"01234567", oct_integer);
152 add(dec_integer, idle, token_type::integer_literal, no_read);
153 add(dec_integer,
"0123456789", dec_integer);
154 add(dec_integer,
".", dec_float);
155 add(dec_integer,
"eE", dec_sign_exponent);
158 add(hex_integer, idle, token_type::integer_literal, no_read);
159 add(hex_integer,
"0123456789abcdefABCDEF", hex_integer);
160 add(hex_integer,
".", hex_float);
161 add(hex_integer,
"pP", hex_sign_exponent);
164 add(dec_float, idle, token_type::float_literal, no_read);
165 add(dec_float,
"0123456789", dec_float);
166 add(dec_float,
"eE", dec_sign_exponent);
167 add(dec_sign_exponent, idle, token_type::error_missing_exponent_number, no_read);
168 add(dec_sign_exponent,
"0123456789", dec_exponent);
169 add(dec_sign_exponent,
"+-", dec_exponent);
170 add(dec_exponent, idle, token_type::float_literal, no_read);
171 add(dec_exponent,
"0123456789", dec_exponent);
174 add(hex_float, idle, token_type::float_literal, no_read);
175 add(hex_float,
"0123456789abcdefABCDEF", hex_float);
176 add(hex_float,
"pP", hex_sign_exponent);
177 add(hex_sign_exponent, idle, token_type::error_missing_exponent_number, no_read);
178 add(hex_sign_exponent,
"0123456789abcdefABCDEF", hex_exponent);
179 add(hex_sign_exponent,
"+-", hex_exponent);
180 add(hex_exponent, idle, token_type::float_literal, no_read);
181 add(hex_exponent,
"0123456789abcdefABCDEF", hex_exponent);
184 constexpr void add_literal_string(
188 state_type literal_quote,
189 state_type literal_escape,
190 state_type literal_escape_finish)
noexcept
192 add(idle, c, literal, no_capture);
193 add(literal, literal);
194 if (escape_by_quote_doubling) {
195 add(literal,
'"', literal_quote, no_capture);
196 add(literal_quote, idle, token_type::string_literal, no_capture);
197 add(literal_quote,
'"', literal)
199 add(literal,
'"', idle, token_type::string_literal, no_capture);
201 add(literal,
'\\', literal_escape, no_capture);
202 add(literal_escape, literal_escape_finish,
'\\', no_read);
203 add(literal_escape,
'"', literal);
204 add(literal_escape,
'\'', literal);
205 add(literal_escape,
'?', literal);
206 add(literal_escape,
'a', literal,
'\a');
207 add(literal_escape,
'b', literal,
'\b');
208 add(literal_escape,
'f', literal,
'\f');
209 add(literal_escape,
'n', literal,
'\n');
210 add(literal_escape,
'r', literal,
'\r');
211 add(literal_escape,
't', literal,
'\t');
212 add(literal_escape,
'v', literal,
'\v');
213 add(literal_escape_finish, literal);
216 constexpr void add_comment() noexcept
218 add(idle,
'/', slash, no_read, no_capture);
219 add(slash, slash_finish, token_type::op, no_read);
220 add(slash_finish, idle, token_type::op);
223 add(slash,
'/', line_comment, no_capture);
227 add(slash,
'*', slash_star_block_comment, no_capture);
228 add(slash_star_block_comment, slash_star_block_comment);
229 add(slash_star_block_comment,
'*', slash_star_star, no_capture);
230 add(slash_star_star, slash_start_block_comment,
'*', no_read);
231 add(slash_star_star,
'/', idle, no_capture);
235 add(slash,
'<', slash_star_block_comment, no_capture);
236 add(slash_star_block_comment, slash_star_block_comment);
237 add(slash_star_block_comment,
'*', slash_star_star, no_capture);
238 add(slash_star_star, slash_start_block_comment,
'*', no_read);
239 add(slash_star_star,
'/', idle, no_capture);
242 if (semicolon_starts_comment) {
243 add(idle,
';', line_comment, no_capture);
246 if (hash_starts_comment) {
247 add(idle,
'#', line_comment, no_capture);
250 add(line_comment, line_comment);
251 add(line_comment,
"\n\f\v", idle, token_type::comment, no_read);
254 constexpr void add_identifier() noexcept
256 add(idle,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", identifier);
257 add(identifier, identifier, token_type::identigier, no_read);
258 add(identifier,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789", identifier);
261 [[nodiscard]]
constexpr static size_t make_index(state_type from,
char c)
noexcept
263 auto c_ = char_cast<size_t>(c);
264 return to_underlying(from) * 128 + index;
267 constexpr command_type &add(state_type from,
char c, state_type to)
noexcept
269 hilet i = make_index(from, c);
270 auto &command = transition_table[i];
271 command.next_state = to;
272 command.char_to_capture =
c;
274 command.clear_capture = 0;
292 template<
typename First,
typename... Args>
293 constexpr command_type &
add(state_type from,
char c, state_type to, First first, Args
const &...args)
noexcept
295 auto &command = add(from, c, to, args...);
296 if constexpr (std::is_same_v<First, token_type>) {
298 command.emit_token = first;
300 }
else if constexpr (std::is_same_v<First, no_read_tag) {
303 }
else if constexpr (std::is_same_v<First, no_capture_tag) {
304 command.char_to_capture = no_capture;
306 }
else if constexpr (std::is_same_v<First, reset_tag) {
309 }
else if constexpr (std::is_same_v<First, char) {
310 command.char_to_capture = first;
319 template<
size_t N,
typename... Args>
320 constexpr void add(state_type from,
char (&str)[N], state_type to, Args
const &...args)
noexcept
322 for (
auto i = 0_uz; i != N; ++i) {
323 add(from, str[i], to, args...);
327 template<
typename... Args>
328 constexpr void add(state_type from, state_type to, Args
const &...args)
noexcept
330 for (
char c = 0; c != 127; ++c) {
331 add(from, c, to, args...);