HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
tokenizer.hpp
1// Copyright Take Vos 2019-2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "strings.hpp"
8#include "small_vector.hpp"
9#include "required.hpp"
10#include "decimal.hpp"
11#include "exception.hpp"
12#include "parse_location.hpp"
13#include "codec/UTF.hpp"
14#include "charconv.hpp"
15#include <date/date.h>
16#include <memory>
17#include <string>
18#include <string_view>
19#include <charconv>
20#include <array>
21
22namespace tt {
23
24enum class tokenizer_name_t : uint8_t {
25 NotAssigned,
26 ErrorInvalidCharacter,
27 ErrorEOTInBlockComment,
28 ErrorEOTInString,
29 ErrorLFInString,
30
31 Name,
32 StringLiteral,
33 IntegerLiteral,
34 DateLiteral,
35 TimeLiteral,
36 FloatLiteral,
37 Operator, // Operator, or bracket, or other literal text.
38 End
39};
40
41constexpr char const *to_string(tokenizer_name_t name) noexcept
42{
43 switch (name) {
44 case tokenizer_name_t::NotAssigned: return "NotAssigned";
45 case tokenizer_name_t::ErrorInvalidCharacter: return "ErrorInvalidCharacter";
46 case tokenizer_name_t::ErrorEOTInBlockComment: return "ErrorEOTInBlockComment";
47 case tokenizer_name_t::ErrorEOTInString: return "ErrorEOTInString";
48 case tokenizer_name_t::ErrorLFInString: return "ErrorLFInString";
49 case tokenizer_name_t::Name: return "Name";
50 case tokenizer_name_t::StringLiteral: return "StringLiteral";
51 case tokenizer_name_t::IntegerLiteral: return "IntegerLiteral";
52 case tokenizer_name_t::DateLiteral: return "DateLiteral";
53 case tokenizer_name_t::TimeLiteral: return "TimeLiteral";
54 case tokenizer_name_t::FloatLiteral: return "FloatLiteral";
55 case tokenizer_name_t::Operator: return "Operator";
56 case tokenizer_name_t::End: return "End";
57 default: tt_no_default();
58 }
59}
60
61inline std::ostream &operator<<(std::ostream &lhs, tokenizer_name_t rhs)
62{
63 return lhs << to_string(rhs);
64}
65
66struct token_t {
67 tokenizer_name_t name;
68 std::string value;
69 parse_location location;
70 bool is_binary;
71 int precedence;
72
73 token_t() noexcept : name(tokenizer_name_t::NotAssigned), value(), location(), is_binary(false), precedence(0) {}
74
75 token_t(tokenizer_name_t name, std::string value) noexcept :
76 name(name), value(std::move(value)), location(), is_binary(false), precedence(0)
77 {
78 }
79
80 token_t(token_t const &other) noexcept :
81 name(other.name), value(other.value), location(other.location), is_binary(other.is_binary), precedence(other.precedence)
82 {
83 }
84
85 token_t(token_t &&other) noexcept :
86 name(other.name),
87 value(std::move(other.value)),
88 location(std::move(other.location)),
89 is_binary(other.is_binary),
90 precedence(other.precedence)
91 {
92 }
93
94 token_t &operator=(token_t const &other) noexcept
95 {
96 if (this != &other) {
97 name = other.name;
98 value = other.value;
99 location = other.location;
100 is_binary = other.is_binary;
101 precedence = other.precedence;
102 }
103 return *this;
104 }
105
106 token_t &operator=(token_t &&other) noexcept
107 {
108 using std::move;
109 name = move(other.name);
110 value = move(other.value);
111 location = move(other.location);
112 is_binary = move(other.is_binary);
113 precedence = move(other.precedence);
114 return *this;
115 }
116
117 operator bool() const noexcept
118 {
119 return name != tokenizer_name_t::NotAssigned;
120 }
121
122 explicit operator long double() const
123 {
124 try {
125 return std::stold(value);
126
127 } catch (...) {
128 throw parse_error("Could not convert token {} to long double", *this);
129 }
130 }
131
132 explicit operator double() const
133 {
134 try {
135 return std::stod(value);
136
137 } catch (...) {
138 throw parse_error("Could not convert token {} to double", *this);
139 }
140 }
141
142 explicit operator float() const
143 {
144 try {
145 return std::stof(value);
146
147 } catch (...) {
148 throw parse_error("Could not convert token {} to float", *this);
149 }
150 }
151
152 template<std::integral T>
153 explicit operator T() const
154 {
155 try {
156 return tt::from_string<T>(value);
157
158 } catch (...) {
159 throw parse_error("Could not convert token {} to {}", *this, typeid(T).name());
160 }
161 }
162
163 explicit operator std::string() const noexcept
164 {
165 return value;
166 }
167
168 explicit operator std::u8string() const noexcept
169 {
170 return sanitize_u8string(make_u8string(value));
171 }
172
173 explicit operator decimal() const
174 {
175 return decimal{value};
176 }
177
178 explicit operator date::year_month_day() const
179 {
180 ttlet parts = split(value, '-');
181 if (parts.size() != 3) {
182 throw parse_error("Expect date to be in the format YYYY-MM-DD");
183 }
184
185 ttlet year = date::year{stoi(parts[0])};
186 ttlet month = date::month{narrow_cast<unsigned int>(stoi(parts[1]))};
187 ttlet day = date::day{narrow_cast<unsigned int>(stoi(parts[2]))};
188 return {year, month, day};
189 }
190
191 std::string repr() const noexcept
192 {
193 std::string r = to_string(name);
194 if (value.size() > 0) {
195 r += '\"';
196 r += value;
197 r += '\"';
198 }
199 return r;
200 }
201
202 friend inline std::ostream &operator<<(std::ostream &lhs, token_t const &rhs)
203 {
204 return lhs << rhs.repr();
205 }
206
207 [[nodiscard]] friend bool operator==(token_t const &lhs, token_t const &rhs) noexcept
208 {
209 return (lhs.name == rhs.name) && (lhs.value == rhs.value);
210 }
211
212 [[nodiscard]] friend bool operator==(token_t const &lhs, tokenizer_name_t const &rhs) noexcept
213 {
214 return lhs.name == rhs;
215 }
216
217 [[nodiscard]] friend bool operator!=(token_t const &lhs, tokenizer_name_t const &rhs) noexcept
218 {
219 return !(lhs == rhs);
220 }
221
222 [[nodiscard]] friend bool operator==(token_t const &lhs, const char *rhs) noexcept
223 {
224 return lhs.value == rhs;
225 }
226
227 [[nodiscard]] friend bool operator!=(token_t const &lhs, const char *rhs) noexcept
228 {
229 return !(lhs == rhs);
230 }
231};
232
234using token_iterator = typename token_vector::iterator;
235
236template<typename T>
238 bool found;
239 T value;
240 token_iterator next_token;
241
242 parse_result() noexcept : found(false), value(), next_token() {}
243
244 parse_result(T const &value, token_iterator next_token) : found(true), value(value), next_token(next_token) {}
245
246 operator bool() const noexcept
247 {
248 return found;
249 }
250
251 T const &operator*() const noexcept
252 {
253 return value;
254 }
255};
256
274[[nodiscard]] std::vector<token_t> parseTokens(std::string_view text) noexcept;
275
276[[nodiscard]] std::vector<token_t>
277parseTokens(std::string_view::const_iterator first, std::string_view::const_iterator last) noexcept;
278
279} // namespace tt
Definition decimal.hpp:19
Exception thrown during parsing on an error.
Definition exception.hpp:21
Definition parse_location.hpp:16
Definition tokenizer.hpp:66
Definition tokenizer.hpp:237
T move(T... args)
T size(T... args)
T stold(T... args)
T to_string(T... args)