HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
tokenizer.hpp
1// Copyright Take Vos 2019-2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "strings.hpp"
8#include "small_vector.hpp"
9#include "required.hpp"
10#include "decimal.hpp"
11#include "exception.hpp"
12#include "parse_location.hpp"
13#include "codec/UTF.hpp"
14#include "charconv.hpp"
15#include <chrono>
16#include <memory>
17#include <string>
18#include <string_view>
19#include <charconv>
20#include <array>
21
22namespace tt {
23
24enum class tokenizer_name_t : uint8_t {
25 NotAssigned,
26 ErrorInvalidCharacter,
27 ErrorEOTInBlockComment,
28 ErrorEOTInString,
29 ErrorLFInString,
30
31 Name,
32 StringLiteral,
33 IntegerLiteral,
34 DateLiteral,
35 TimeLiteral,
36 FloatLiteral,
37 Operator, // Operator, or bracket, or other literal text.
38 End
39};
40
41constexpr char const *to_const_string(tokenizer_name_t name) noexcept
42{
43 switch (name) {
44 case tokenizer_name_t::NotAssigned: return "NotAssigned";
45 case tokenizer_name_t::ErrorInvalidCharacter: return "ErrorInvalidCharacter";
46 case tokenizer_name_t::ErrorEOTInBlockComment: return "ErrorEOTInBlockComment";
47 case tokenizer_name_t::ErrorEOTInString: return "ErrorEOTInString";
48 case tokenizer_name_t::ErrorLFInString: return "ErrorLFInString";
49 case tokenizer_name_t::Name: return "Name";
50 case tokenizer_name_t::StringLiteral: return "StringLiteral";
51 case tokenizer_name_t::IntegerLiteral: return "IntegerLiteral";
52 case tokenizer_name_t::DateLiteral: return "DateLiteral";
53 case tokenizer_name_t::TimeLiteral: return "TimeLiteral";
54 case tokenizer_name_t::FloatLiteral: return "FloatLiteral";
55 case tokenizer_name_t::Operator: return "Operator";
56 case tokenizer_name_t::End: return "End";
57 default: tt_no_default();
58 }
59}
60
61inline std::ostream &operator<<(std::ostream &lhs, tokenizer_name_t rhs)
62{
63 return lhs << to_const_string(rhs);
64}
65
66template<typename CharT>
67struct std::formatter<tt::tokenizer_name_t, CharT> : std::formatter<char const *, CharT> {
68 auto format(tt::tokenizer_name_t const &t, auto &fc)
69 {
70 return std::formatter<char const *, CharT>::format(tt::to_const_string(t), fc);
71 }
72};
73
74struct token_t {
75 tokenizer_name_t name;
76 std::string value;
77 parse_location location;
78 bool is_binary;
79 int precedence;
80
81 token_t() noexcept : name(tokenizer_name_t::NotAssigned), value(), location(), is_binary(false), precedence(0) {}
82
83 token_t(tokenizer_name_t name, std::string value) noexcept :
84 name(name), value(std::move(value)), location(), is_binary(false), precedence(0)
85 {
86 }
87
88 token_t(token_t const &other) noexcept :
89 name(other.name), value(other.value), location(other.location), is_binary(other.is_binary), precedence(other.precedence)
90 {
91 tt_axiom(&other != this);
92 }
93
94 token_t(token_t &&other) noexcept :
95 name(other.name),
96 value(std::move(other.value)),
97 location(std::move(other.location)),
98 is_binary(other.is_binary),
99 precedence(other.precedence)
100 {
101 tt_axiom(&other != this);
102 }
103
104 token_t &operator=(token_t const &other) noexcept
105 {
106 tt_return_on_self_assignment(other);
107 name = other.name;
108 value = other.value;
109 location = other.location;
110 is_binary = other.is_binary;
111 precedence = other.precedence;
112 return *this;
113 }
114
115 token_t &operator=(token_t &&other) noexcept
116 {
117 // Self-assignment is allowed.
118 using std::move;
119 name = move(other.name);
120 value = move(other.value);
121 location = move(other.location);
122 is_binary = move(other.is_binary);
123 precedence = move(other.precedence);
124 return *this;
125 }
126
127 operator bool() const noexcept
128 {
129 return name != tokenizer_name_t::NotAssigned;
130 }
131
132 explicit operator long double() const
133 {
134 try {
135 return std::stold(value);
136
137 } catch (...) {
138 throw parse_error("Could not convert token {} to long double", *this);
139 }
140 }
141
142 explicit operator double() const
143 {
144 try {
145 return std::stod(value);
146
147 } catch (...) {
148 throw parse_error("Could not convert token {} to double", *this);
149 }
150 }
151
152 explicit operator float() const
153 {
154 try {
155 return std::stof(value);
156
157 } catch (...) {
158 throw parse_error("Could not convert token {} to float", *this);
159 }
160 }
161
162 template<std::integral T>
163 explicit operator T() const
164 {
165 try {
166 return tt::from_string<T>(value);
167
168 } catch (...) {
169 throw parse_error("Could not convert token {} to {}", *this, typeid(T).name());
170 }
171 }
172
173 explicit operator std::string() const noexcept
174 {
175 return utf8_to_utf8(value);
176 }
177
178 explicit operator decimal() const
179 {
180 return decimal{value};
181 }
182
183 explicit operator std::chrono::year_month_day() const
184 {
185 ttlet parts = split(value, '-');
186 if (parts.size() != 3) {
187 throw parse_error("Expect date to be in the format YYYY-MM-DD");
188 }
189
190 ttlet year = std::chrono::year{stoi(parts[0])};
191 ttlet month = std::chrono::month{narrow_cast<unsigned int>(stoi(parts[1]))};
192 ttlet day = std::chrono::day{narrow_cast<unsigned int>(stoi(parts[2]))};
193 return {year, month, day};
194 }
195
196 std::string repr() const noexcept
197 {
198 std::string r = to_const_string(name);
199 if (value.size() > 0) {
200 r += '\"';
201 r += value;
202 r += '\"';
203 }
204 return r;
205 }
206
207 friend inline std::ostream &operator<<(std::ostream &lhs, token_t const &rhs)
208 {
209 return lhs << rhs.repr();
210 }
211
212 [[nodiscard]] friend bool operator==(token_t const &lhs, token_t const &rhs) noexcept
213 {
214 return (lhs.name == rhs.name) && (lhs.value == rhs.value);
215 }
216
217 [[nodiscard]] friend bool operator==(token_t const &lhs, tokenizer_name_t const &rhs) noexcept
218 {
219 return lhs.name == rhs;
220 }
221
222 [[nodiscard]] friend bool operator!=(token_t const &lhs, tokenizer_name_t const &rhs) noexcept
223 {
224 return !(lhs == rhs);
225 }
226
227 [[nodiscard]] friend bool operator==(token_t const &lhs, const char *rhs) noexcept
228 {
229 return lhs.value == rhs;
230 }
231
232 [[nodiscard]] friend bool operator!=(token_t const &lhs, const char *rhs) noexcept
233 {
234 return !(lhs == rhs);
235 }
236};
237
239using token_iterator = typename token_vector::iterator;
240
241template<typename T>
243 bool found;
244 T value;
245 token_iterator next_token;
246
247 parse_result() noexcept : found(false), value(), next_token() {}
248
249 parse_result(T const &value, token_iterator next_token) : found(true), value(value), next_token(next_token) {}
250
251 operator bool() const noexcept
252 {
253 return found;
254 }
255
256 T const &operator*() const noexcept
257 {
258 return value;
259 }
260};
261
279[[nodiscard]] std::vector<token_t> parseTokens(std::string_view text) noexcept;
280
281[[nodiscard]] std::vector<token_t>
282parseTokens(std::string_view::const_iterator first, std::string_view::const_iterator last) noexcept;
283
284} // namespace tt
285
286namespace std {
287
288template<typename CharT>
289struct std::formatter<tt::token_t, CharT> : std::formatter<std::string_view, CharT> {
290 auto format(tt::token_t const &t, auto &fc)
291 {
292 return std::formatter<std::string_view, CharT>::format(t.repr(), fc);
293 }
294};
295
296
297} // namespace std
STL namespace.
Definition decimal.hpp:18
Exception thrown during parsing on an error.
Definition exception.hpp:26
Definition parse_location.hpp:17
Definition tokenizer.hpp:74
Definition tokenizer.hpp:242
T move(T... args)
T size(T... args)
T stold(T... args)