166 enum class state_type : uint8_t {
190 dec_integer_found_e_id,
191 dec_integer_found_E_id,
196 dec_float_found_e_id,
197 dec_float_found_E_id,
207 sqstring_literal_quote,
208 sqstring_literal_escape,
210 dqstring_literal_quote,
211 dqstring_literal_escape,
213 bqstring_literal_quote,
214 bqstring_literal_escape,
217 block_comment_found_star,
218 block_comment_found_dash,
219 block_comment_found_dash_dash,
220 block_comment_found_dash_dash_fin0,
255 struct command_type {
258 state_type next_state = state_type::idle;
262 token::kind_type emit_token = token::none;
266 char char_to_capture =
'\0';
270 uint8_t clear : 1 = 0;
274 uint8_t advance : 1 = 0;
278 uint8_t assigned : 1 = 0;
282 uint8_t advance_line : 1 = 0;
286 uint8_t advance_tab : 1 = 0;
291 struct advance_tag {};
292 struct capture_tag {};
294 class excluding_tag {
296 constexpr excluding_tag(
std::string exclusions) noexcept : _exclusions(
std::move(exclusions)) {}
298 [[nodiscard]]
constexpr bool contains(
char c)
const noexcept
300 return _exclusions.
find(c) != _exclusions.npos;
309 constexpr static auto capture = capture_tag{};
313 constexpr static auto advance = advance_tag{};
317 constexpr static auto clear = clear_tag{};
321 constexpr static auto any = any_tag{};
324 [[nodiscard]]
constexpr excluding_tag excluding(
char const (&exclusions)[N])
noexcept
326 return excluding_tag{
std::string(exclusions, N - 1)};
329 template<
typename First,
typename... Args>
330 [[nodiscard]]
constexpr static bool _has_advance_tag_argument()
noexcept
332 if constexpr (std::is_same_v<First, advance_tag>) {
334 }
else if constexpr (
sizeof...(Args) == 0) {
337 return _has_advance_tag_argument<Args...>();
341 template<
typename... Args>
342 [[nodiscard]]
constexpr static bool has_advance_tag_argument()
noexcept
344 if constexpr (
sizeof...(Args) == 0) {
347 return _has_advance_tag_argument<Args...>();
352 constexpr lexer() noexcept : _transition_table()
354 using enum state_type;
356 add(idle,
'/', found_slash, advance, capture);
357 add(idle,
'<', found_lt, advance, capture);
358 add(idle,
'#', found_hash, advance, capture);
359 add(idle,
'.', found_dot, advance, capture);
360 add(idle,
'=', found_eq, advance, capture);
361 add(idle,
':', found_colon, advance, capture);
363 add(found_slash, any, idle, token::other);
364 add(found_lt, any, idle, token::other);
365 add(found_hash, any, idle, token::other);
366 add(found_dot, any, idle, token::other);
367 add(found_eq, any, idle, token::other);
368 add(found_colon, any, idle, token::other);
371 add_string_literals();
374 add_number_literals();
380 add_ini_assignment();
384 for (uint8_t i = 0; i != 128; ++i) {
385 auto& command = get_command(idle, char_cast<char>(i));
386 if (not command.assigned) {
387 command.assigned = 1;
390 command.char_to_capture = char_cast<char>(i);
391 command.emit_token = token::error_unexepected_character;
392 command.next_state = idle;
397 [[nodiscard]]
constexpr command_type& get_command(state_type from,
char c)
noexcept
399 return _transition_table[std::to_underlying(from) * 128_uz + char_cast<size_t>(c)];
402 [[nodiscard]]
constexpr command_type
const& get_command(state_type from,
char c)
const noexcept
404 return _transition_table[std::to_underlying(from) * 128_uz + char_cast<size_t>(c)];
413 reference operator*()
const noexcept
419 template<
typename It, std::sentinel_for<It> ItEnd>
429 _lexer(
lexer), _first(first), _last(last), _it(first)
433 _token.kind = parse_token();
434 }
while (is_token_filtered(_token));
437 [[nodiscard]]
constexpr static bool is_token_filtered(
token x)
noexcept
439 return (Config.filter_white_space and x == token::ws) or (Config.filter_comment and x == token::lcomment) or
440 (Config.filter_comment and x == token::bcomment);
443 [[nodiscard]]
constexpr reference operator*()
const noexcept
448 [[nodiscard]]
constexpr pointer operator->()
const noexcept
453 constexpr iterator& operator++()
noexcept
455 hi_axiom(*
this != std::default_sentinel);
457 _token.kind = parse_token();
458 }
while (is_token_filtered(_token));
462 constexpr proxy operator++(
int)
noexcept
464 auto r =
proxy{**
this};
469 [[nodiscard]]
constexpr bool operator==(std::default_sentinel_t)
const noexcept
471 return _token.kind == token::none;
481 state_type _state = state_type::idle;
483 size_t _column_nr = 0;
489 constexpr void clear()
noexcept
491 _token.capture.
clear();
498 constexpr void capture(
char code_point)
noexcept
507 constexpr void capture(
char32_t code_point)
noexcept
509 hi_axiom(code_point < 0x7fff'ffff);
512 char_map<
"utf-8">{}.write(code_point, out_it);
515 constexpr void advance_counters()
noexcept
517 if (_cp ==
'\n' or _cp ==
'\v' or _cp ==
'\f' or _cp ==
'\x85' or _cp == U
'\u2028' or _cp == U
'\u2029') {
519 }
else if (_cp ==
'\t') {
532 [[nodiscard]]
constexpr char32_t advance()
noexcept
538 auto const[code_point, valid] =
char_map<
"utf-8">{}.read(_it, _last);
542 [[nodiscard]]
constexpr token::kind_type parse_token_unicode_identifier()
noexcept
544 switch (ucd_get_lexical_class(_cp & 0x1f'ffff)) {
545 case unicode_lexical_class::id_start:
546 case unicode_lexical_class::id_continue:
553 if (Config.minus_in_identifier and _cp ==
'-') {
560 _state = state_type::idle;
566 [[nodiscard]]
constexpr token::kind_type parse_token_unicode_line_comment()
noexcept
568 auto const cp_ = _cp & 0x1f'ffff;
569 if (cp_ == U
'\u0085' or cp_ == U
'\u2028' or cp_ == U
'\u2029') {
570 _state = state_type::idle;
573 return token::lcomment;
583 [[nodiscard]]
constexpr token::kind_type parse_token_unicode_white_space()
noexcept
585 if (ucd_get_lexical_class(_cp & 0x1f'ffff) == unicode_lexical_class::white_space) {
592 _state = state_type::idle;
597 [[nodiscard]]
constexpr token::kind_type parse_token_unicode_idle()
noexcept
599 switch (ucd_get_lexical_class(_cp & 0x1f'ffff)) {
600 case unicode_lexical_class::id_start:
601 _state = state_type::identifier;
607 case unicode_lexical_class::white_space:
608 _state = state_type::white_space;
614 case unicode_lexical_class::syntax:
615 _state = state_type::idle;
625 return token::error_unexepected_character;
629 [[nodiscard]] hi_no_inline
constexpr token::kind_type parse_token_unicode()
noexcept
631 using enum state_type;
636 return parse_token_unicode_idle();
639 return parse_token_unicode_white_space();
642 return parse_token_unicode_line_comment();
645 return parse_token_unicode_identifier();
647 case dqstring_literal:
648 case sqstring_literal:
649 case bqstring_literal:
658 if (_cp == U
'\u0085' or _cp == U
'\u2028' or _cp == U
'\u2029') {
670 return process_command();
674 [[nodiscard]]
constexpr token::kind_type process_command(
char c =
'\0')
noexcept
676 auto const command = _lexer->get_command(_state, c);
677 _state = command.next_state;
683 if (command.char_to_capture !=
'\0') {
684 capture(command.char_to_capture);
687 if (command.advance) {
688 if (command.advance_line) {
691 }
else if (command.advance_tab) {
701 return command.emit_token;
704 [[nodiscard]]
constexpr token::kind_type parse_token()
noexcept
706 _token.line_nr = _line_nr;
707 _token.column_nr = _column_nr;
710 while (_cp <= 0x7fff'ffff) {
712 if (
auto token_kind = process_command(char_cast<char>(_cp)); token_kind != token::none) {
717 auto emit_token = parse_token_unicode();
718 if (emit_token != token::none) {
725 while (_state != state_type::idle) {
726 if (
auto token_kind = process_command(); token_kind != token::none) {
737 static_assert(std::movable<iterator<std::string::iterator, std::string::iterator>>);
739 std::is_same_v<std::iterator_traits<iterator<std::string::iterator, std::string::iterator>>::value_type,
token>);
740 static_assert(std::input_or_output_iterator<iterator<std::string::iterator, std::string::iterator>>);
741 static_assert(std::weakly_incrementable<iterator<std::string::iterator, std::string::iterator>>);
749 template<
typename It, std::sentinel_for<It> ItEnd>
760 [[nodiscard]]
constexpr auto parse(std::string_view str)
const noexcept
762 return parse(str.begin(), str.end());
769 using transition_table_type =
std::array<command_type, std::to_underlying(state_type::_size) * 128>;
771 transition_table_type _transition_table;
773 constexpr void add_string_literal(
775 token::kind_type string_token,
776 state_type string_literal,
777 state_type string_literal_quote,
778 state_type string_literal_escape)
noexcept
780 using enum state_type;
782 add(idle, c, string_literal, advance);
783 add(string_literal, any, idle, token::error_incomplete_string);
784 for (uint8_t i = 1; i != 128; ++i) {
785 if (char_cast<char>(i) != c and char_cast<char>(i) !=
'\\') {
786 add(string_literal, char_cast<char>(i), string_literal, advance, capture);
790 if constexpr (Config.escape_by_quote_doubling) {
792 add(string_literal, c, string_literal_quote, advance);
794 add(string_literal_quote, any, idle, string_token);
796 add(string_literal_quote, c, string_literal, advance, capture);
799 add(string_literal, c, idle, advance, string_token);
803 add(string_literal,
'\\', string_literal_escape, advance, capture);
804 add(string_literal_escape, any, idle, token::error_incomplete_string);
805 for (uint8_t i = 1; i != 128; ++i) {
806 add(string_literal_escape, char_cast<char>(i), string_literal, advance, capture);
810 constexpr void add_string_literals() noexcept
812 using enum state_type;
814 if constexpr (Config.has_single_quote_string_literal) {
815 add_string_literal(
'\'', token::sstr, sqstring_literal, sqstring_literal_quote, sqstring_literal_escape);
817 add(idle,
'\'', idle, token::other, advance, capture);
820 if constexpr (Config.has_double_quote_string_literal) {
821 add_string_literal(
'"', token::dstr, dqstring_literal, dqstring_literal_quote, dqstring_literal_escape);
823 add(idle,
'"', idle, token::other, advance, capture);
826 if constexpr (Config.has_back_quote_string_literal) {
827 add_string_literal(
'`', token::bstr, bqstring_literal, bqstring_literal_quote, bqstring_literal_escape);
829 add(idle,
'`', idle, token::other, advance, capture);
833 constexpr void add_number_literals() noexcept
835 using enum state_type;
837 add(idle,
"0", zero, advance, capture);
838 add(idle,
"123456789", dec_integer, advance, capture);
840 add(zero, any, idle, token::integer);
841 add(zero,
".", dec_float, advance, capture);
842 add(zero,
"b", zero_b, advance);
843 add(zero,
"B", zero_B, advance);
844 add(zero,
"o", zero_o, advance);
845 add(zero,
"O", zero_O, advance);
846 add(zero,
"d", zero_d, advance);
847 add(zero,
"D", zero_D, advance);
848 add(zero,
"x", zero_x, advance);
849 add(zero,
"X", zero_X, advance);
851 add(zero_b, any, zero_b_id, token::integer);
852 add(zero_B, any, zero_B_id, token::integer);
853 add(zero_o, any, zero_o_id, token::integer);
854 add(zero_O, any, zero_O_id, token::integer);
855 add(zero_d, any, zero_d_id, token::integer);
856 add(zero_D, any, zero_D_id, token::integer);
857 add(zero_x, any, zero_x_id, token::integer);
858 add(zero_X, any, zero_X_id, token::integer);
859 add(zero_b,
"0123456789", bin_integer,
'b');
860 add(zero_B,
"0123456789", bin_integer,
'B');
861 add(zero_o,
"0123456789", oct_integer,
'o');
862 add(zero_O,
"0123456789", oct_integer,
'O');
863 add(zero_d,
"0123456789", dec_integer,
'd');
864 add(zero_D,
"0123456789", dec_integer,
'D');
865 add(zero_x,
"0123456789.", hex_integer,
'x');
866 add(zero_X,
"0123456789.", hex_integer,
'X');
868 add(zero_b_id, any, identifier,
'b');
869 add(zero_B_id, any, identifier,
'B');
870 add(zero_o_id, any, identifier,
'o');
871 add(zero_O_id, any, identifier,
'O');
872 add(zero_d_id, any, identifier,
'd');
873 add(zero_D_id, any, identifier,
'D');
874 add(zero_x_id, any, identifier,
'x');
875 add(zero_X_id, any, identifier,
'X');
877 if constexpr (Config.zero_starts_octal) {
878 add(zero,
"01234567", oct_integer, advance, capture);
879 add(zero,
"89", idle, token::error_invalid_digit);
881 add(zero,
"0123456789", dec_integer, advance, capture);
885 add(bin_integer, any, idle, token::integer);
886 add(bin_integer,
"01", bin_integer, advance, capture);
887 add(bin_integer,
"23456789", idle, token::error_invalid_digit);
890 add(oct_integer, any, idle, token::integer);
891 add(oct_integer,
"01234567", oct_integer, advance, capture);
892 add(oct_integer,
"89", idle, token::error_invalid_digit);
895 add(dec_integer, any, idle, token::integer);
896 add(dec_integer,
"0123456789", dec_integer, advance, capture);
897 add(dec_integer,
".", dec_float, advance, capture);
898 add(dec_integer,
"e", dec_integer_found_e, advance);
899 add(dec_integer,
"E", dec_integer_found_E, advance);
900 add(dec_integer_found_e, any, dec_integer_found_e_id, token::integer);
901 add(dec_integer_found_E, any, dec_integer_found_E_id, token::integer);
902 add(dec_integer_found_e,
"+-0123456789", dec_sign_exponent,
'e');
903 add(dec_integer_found_E,
"+-0123456789", dec_sign_exponent,
'E');
904 add(dec_integer_found_e_id, any, identifier,
'e');
905 add(dec_integer_found_E_id, any, identifier,
'E');
908 add(hex_integer, any, idle, token::integer);
909 add(hex_integer,
"0123456789abcdefABCDEF", hex_integer, advance, capture);
910 add(hex_integer,
".", hex_float, advance, capture);
911 add(hex_integer,
"pP", hex_sign_exponent, advance, capture);
914 add(found_dot,
"0123456789eE", dec_float);
915 add(dec_float, any, idle, token::real);
916 add(dec_float,
"0123456789", dec_float, advance, capture);
917 add(dec_float,
"e", dec_float_found_e, advance);
918 add(dec_float,
"E", dec_float_found_E, advance);
919 add(dec_float_found_e, any, dec_float_found_e_id, token::real);
920 add(dec_float_found_E, any, dec_float_found_E_id, token::real);
921 add(dec_float_found_e,
"+-0123456789", dec_sign_exponent,
'e');
922 add(dec_float_found_E,
"+-0123456789", dec_sign_exponent,
'E');
923 add(dec_float_found_e_id, any, identifier,
'e');
924 add(dec_float_found_E_id, any, identifier,
'E');
926 add(dec_sign_exponent, any, idle, token::error_incomplete_exponent);
927 add(dec_sign_exponent,
"0123456789", dec_exponent_more, advance, capture);
928 add(dec_sign_exponent,
"+-", dec_exponent, advance, capture);
929 add(dec_exponent, any, idle, token::error_incomplete_exponent);
930 add(dec_exponent,
"0123456789", dec_exponent_more, advance, capture);
931 add(dec_exponent_more, any, idle, token::real);
932 add(dec_exponent_more,
"0123456789", dec_exponent_more, advance, capture);
935 add(hex_float, any, idle, token::real);
936 add(hex_float,
"0123456789abcdefABCDEF", hex_float, advance, capture);
937 add(hex_float,
"pP", hex_sign_exponent, advance, capture);
938 add(hex_sign_exponent, any, idle, token::error_incomplete_exponent);
939 add(hex_sign_exponent,
"0123456789abcdefABCDEF", hex_exponent_more, advance, capture);
940 add(hex_sign_exponent,
"+-", hex_exponent, advance, capture);
941 add(hex_exponent, any, idle, token::error_incomplete_exponent);
942 add(hex_exponent,
"0123456789abcdefABCDEF", hex_exponent_more, advance, capture);
943 add(hex_exponent_more, any, idle, token::real);
944 add(hex_exponent_more,
"0123456789abcdefABCDEF", hex_exponent_more, advance, capture);
946 if constexpr (Config.digit_separator !=
'\0') {
947 if constexpr (Config.zero_starts_octal) {
948 add(zero, Config.digit_separator, oct_integer, advance);
950 add(zero, Config.digit_separator, dec_integer, advance);
952 add(bin_integer, Config.digit_separator, bin_integer, advance);
953 add(oct_integer, Config.digit_separator, oct_integer, advance);
954 add(dec_integer, Config.digit_separator, dec_integer, advance);
955 add(hex_integer, Config.digit_separator, hex_integer, advance);
956 add(dec_float, Config.digit_separator, dec_integer, advance);
957 add(hex_float, Config.digit_separator, dec_integer, advance);
958 add(dec_exponent, Config.digit_separator, dec_integer, advance);
959 add(hex_exponent, Config.digit_separator, dec_integer, advance);
963 constexpr void add_color_literal() noexcept
965 using enum state_type;
967 if constexpr (Config.has_color_literal) {
968 add(found_hash,
"0123456789abcdefABCDEF", color_literal, clear, capture, advance);
969 add(color_literal, any, idle, token::color);
970 add(color_literal,
"0123456789abcdefABCDEF", color_literal, advance, capture);
974 constexpr void add_ini_assignment() noexcept
976 using enum state_type;
978 if constexpr (Config.equal_is_ini_assignment) {
980 add(found_eq,
" \t", found_eq, advance);
981 add(found_eq,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", ini_string, token::other);
984 if constexpr (Config.colon_is_ini_assignment) {
986 add(found_colon,
" \t", found_colon, advance);
987 add(found_colon,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", ini_string, token::other);
990 add(ini_string, any, idle, token::istr);
991 add(ini_string, excluding(
"\n\v\f\r\0"), ini_string, advance, capture);
992 add(ini_string,
'\r', ini_string, advance);
995 constexpr void add_comments() noexcept
997 using enum state_type;
999 if constexpr (Config.has_double_slash_line_comment) {
1000 add(found_slash,
'/', line_comment, clear, advance);
1003 if constexpr (Config.has_semicolon_line_comment) {
1004 add(idle,
';', line_comment, advance);
1006 add(idle,
';', idle, token::other, capture, advance);
1009 if constexpr (Config.has_hash_line_comment) {
1010 add(found_hash, excluding(
"\0"), line_comment, clear, advance, capture);
1013 if constexpr (Config.has_c_block_comment) {
1014 add(found_slash,
'*', block_comment, advance, clear);
1017 if constexpr (Config.has_sgml_block_comment) {
1018 add(found_lt,
'!', found_lt_bang, advance);
1019 add(found_lt_bang, any, idle, token::error_after_lt_bang);
1020 add(found_lt_bang,
'-', found_lt_bang_dash, advance);
1021 add(found_lt_bang_dash, any, idle, token::error_after_lt_bang);
1022 add(found_lt_bang_dash,
'-', block_comment, advance);
1025 add(line_comment, any, idle, token::lcomment);
1026 add(line_comment, excluding(
"\r\n\f\v\0"), line_comment, advance, capture);
1028 add(line_comment,
'\r', line_comment, advance);
1029 add(line_comment,
"\n\f\v", idle, advance, token::lcomment);
1031 add(block_comment, any, idle, token::error_incomplete_comment);
1033 static_assert(Config.has_c_block_comment == 0 or Config.has_sgml_block_comment == 0);
1035 if constexpr (Config.has_c_block_comment) {
1036 add(block_comment, excluding(
"*\0"), block_comment, advance, capture);
1037 add(block_comment,
'*', block_comment_found_star, advance);
1038 add(block_comment_found_star, any, block_comment,
'*');
1039 add(block_comment_found_star,
'/', idle, advance, token::bcomment);
1041 }
else if constexpr (Config.has_sgml_block_comment) {
1042 add(block_comment, excluding(
"-\0"), block_comment, advance, capture);
1043 add(block_comment,
'-', block_comment_found_dash, advance);
1044 add(block_comment_found_dash, any, block_comment,
'-');
1045 add(block_comment_found_dash,
'-', block_comment_found_dash_dash, advance);
1046 add(block_comment_found_dash_dash, any, block_comment_found_dash_dash_fin0,
'-');
1047 add(block_comment_found_dash_dash_fin0, any, block_comment,
'-');
1048 add(block_comment_found_dash_dash,
'>', idle, advance, token::bcomment);
1052 constexpr void add_white_space() noexcept
1054 using enum state_type;
1056 add(idle,
'\r', white_space, advance);
1057 add(idle,
" \n\t\v\f", white_space, advance, capture);
1058 add(white_space, any, idle, token::ws);
1059 add(white_space,
'\r', white_space, advance);
1060 add(white_space,
" \n\t\v\f", white_space, advance, capture);
1063 constexpr void add_identifier() noexcept
1065 using enum state_type;
1067 add(idle,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_", identifier, advance, capture);
1068 add(identifier, any, idle, token::id);
1069 add(identifier,
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789", identifier, advance, capture);
1070 if constexpr (Config.minus_in_identifier) {
1071 add(identifier,
'-', identifier, advance, capture);
1075 constexpr void add_others() noexcept
1077 using enum state_type;
1080 add(idle,
"()[]{},@$\\", idle, token::other, capture, advance);
1083 add(idle,
'+', found_plus, advance, capture);
1084 add(idle,
'-', found_minus, advance, capture);
1085 add(idle,
'*', found_star, advance, capture);
1086 add(idle,
'&', found_and, advance, capture);
1087 add(idle,
'|', found_vbar, advance, capture);
1088 add(idle,
'^', found_caret, advance, capture);
1089 add(idle,
'%', found_percent, advance, capture);
1090 add(idle,
'!', found_bang, advance, capture);
1091 add(idle,
'?', found_question, advance, capture);
1092 add(idle,
'~', found_tilde, advance, capture);
1093 add(idle,
'>', found_gt, advance, capture);
1095 add(found_plus, any, idle, token::other);
1096 add(found_minus, any, idle, token::other);
1097 add(found_star, any, idle, token::other);
1098 add(found_and, any, idle, token::other);
1099 add(found_vbar, any, idle, token::other);
1100 add(found_caret, any, idle, token::other);
1101 add(found_percent, any, idle, token::other);
1102 add(found_bang, any, idle, token::other);
1103 add(found_question, any, idle, token::other);
1104 add(found_tilde, any, idle, token::other);
1105 add(found_gt, any, idle, token::other);
1108 add(found_colon,
':', idle, advance, capture, token::other);
1109 if constexpr (Config.has_dot_star_operator) {
1110 add(found_dot,
'*', idle, advance, capture, token::other);
1112 if constexpr (Config.has_dot_dot_operator) {
1113 add(found_dot,
'.', found_dot_dot, advance, capture);
1115 add(found_plus,
"+=", idle, advance, capture, token::other);
1116 add(found_minus,
"-=", idle, advance, capture, token::other);
1117 add(found_minus,
'>', found_minus_gt, advance, capture);
1118 add(found_star,
"*=", idle, advance, capture, token::other);
1119 if constexpr (not Config.has_double_slash_line_comment) {
1120 add(found_slash,
'/', idle, advance, capture, token::other);
1122 add(found_slash,
'=', idle, advance, capture, token::other);
1123 add(found_and,
"&=+-*", idle, advance, capture, token::other);
1124 add(found_vbar,
"|=", idle, advance, capture, token::other);
1125 add(found_caret,
"^=", idle, advance, capture, token::other);
1126 add(found_percent,
"%=", idle, advance, capture, token::other);
1127 add(found_bang,
'=', idle, advance, capture, token::other);
1128 add(found_question,
"?=", idle, advance, capture, token::other);
1129 add(found_tilde,
'=', idle, advance, capture, token::other);
1130 add(found_lt,
'=', found_lt_eq, advance, capture);
1131 add(found_lt,
'<', found_lt_lt, advance, capture);
1132 add(found_gt,
'=', idle, advance, capture, token::other);
1133 add(found_gt,
'>', found_gt_gt, advance, capture);
1134 add(found_eq,
'=', found_eq_eq, advance, capture);
1136 add(found_minus_gt, any, idle, token::other);
1137 add(found_dot_dot, any, idle, token::other);
1138 add(found_lt_eq, any, idle, token::other);
1139 add(found_lt_lt, any, idle, token::other);
1140 add(found_gt_gt, any, idle, token::other);
1141 add(found_eq_eq, any, idle, token::other);
1144 add(found_minus_gt,
'*', idle, advance, capture, token::other);
1145 add(found_dot_dot,
".<", idle, advance, capture, token::other);
1146 add(found_lt_eq,
'>', idle, advance, capture, token::other);
1147 add(found_lt_lt,
'=', idle, advance, capture, token::other);
1148 add(found_gt_gt,
'=', idle, advance, capture, token::other);
1149 add(found_eq_eq,
'=', idle, advance, capture, token::other);
1152 constexpr command_type& _add(state_type from,
char c, state_type to)
noexcept
1154 auto& command = get_command(from, c);
1155 command.next_state = to;
1156 command.char_to_capture =
'\0';
1157 command.advance = 0;
1158 command.advance_line = 0;
1159 command.advance_tab = 0;
1161 command.emit_token = token::none;
1179 template<
typename First,
typename... Args>
1180 constexpr command_type& _add(state_type from,
char c, state_type to, First first, Args
const&...args)
noexcept
1182 auto& command = _add(from, c, to, args...);
1183 if constexpr (std::is_same_v<First, token::kind_type>) {
1184 command.emit_token = first;
1186 }
else if constexpr (std::is_same_v<First, advance_tag>) {
1187 command.advance = 1;
1188 if (c ==
'\n' or c ==
'\v' or c ==
'\f') {
1189 command.advance_line = 1;
1190 }
else if (c ==
'\t') {
1191 command.advance_tab = 1;
1194 }
else if constexpr (std::is_same_v<First, clear_tag>) {
1197 }
else if constexpr (std::is_same_v<First, capture_tag>) {
1198 command.char_to_capture = c;
1200 }
else if constexpr (std::is_same_v<First, char>) {
1201 command.char_to_capture = first;
1204 hi_static_no_default();
1210 template<
typename... Args>
1211 constexpr void add(state_type from,
char c, state_type to, Args
const&...args)
noexcept
1213 auto& command = _add(from, c, to, args...);
1214 hi_assert(not command.assigned,
"Overwriting an already assigned state:char combination.");
1215 command.assigned =
true;
1218 template<
typename... Args>
1219 constexpr void add(state_type from, std::string_view str, state_type to, Args
const&...args)
noexcept
1221 for (
auto c : str) {
1222 auto& command = _add(from, c, to, args...);
1223 hi_assert(not command.assigned,
"Overwriting an already assigned state:char combination.");
1224 command.assigned =
true;
1228 template<
typename... Args>
1229 constexpr void add(state_type from, any_tag, state_type to, Args
const&...args)
noexcept
1231 static_assert(not has_advance_tag_argument<Args...>(),
"any should not advance");
1233 for (uint8_t c = 0; c != 128; ++c) {
1234 auto const& command = _add(from, char_cast<char>(c), to, args...);
1235 hi_assert(not command.assigned,
"any should be added first to a state");
1239 template<
typename... Args>
1240 constexpr void add(state_type from, excluding_tag
const& exclusions, state_type to, Args
const&...args)
noexcept
1242 for (uint8_t c = 0; c != 128; ++c) {
1243 if (not exclusions.contains(char_cast<char>(c))) {
1244 auto& command = _add(from, char_cast<char>(c), to, args...);
1245 hi_assert(not command.assigned,
"Overwriting an already assigned state:char combination.");
1246 command.assigned =
true;