HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
glob.hpp
1// Copyright Take Vos 2019-2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "required.hpp"
8#include "algorithm.hpp"
9#include <vector>
10#include <string>
11#include <string_view>
12#include <ostream>
13
14namespace hi::inline v1 {
15
16enum class glob_token_type_t {
17 String,
18 StringList,
19 CharacterList,
20 InverseCharacterList,
21 Separator,
22 AnyString,
23 AnyCharacter,
24 AnyDirectory
25};
26
27inline std::ostream &operator<<(std::ostream &lhs, glob_token_type_t const &rhs)
28{
29 switch (rhs) {
30 case glob_token_type_t::String: lhs << "String"; break;
31 case glob_token_type_t::StringList: lhs << "StringList"; break;
32 case glob_token_type_t::CharacterList: lhs << "CharacterList"; break;
33 case glob_token_type_t::InverseCharacterList: lhs << "InverseCharacterList"; break;
34 case glob_token_type_t::Separator: lhs << "Separator"; break;
35 case glob_token_type_t::AnyString: lhs << "AnyString"; break;
36 case glob_token_type_t::AnyCharacter: lhs << "AnyCharacter"; break;
37 case glob_token_type_t::AnyDirectory: lhs << "AnyDirectory"; break;
38 default: hi_no_default();
39 }
40 return lhs;
41}
42
44 glob_token_type_t type;
45 std::string value;
47
48 glob_token_t(glob_token_type_t type) : type(type), value(), values() {}
49 glob_token_t(glob_token_type_t type, std::string value) : type(type), value(value), values() {}
50 glob_token_t(glob_token_type_t type, std::vector<std::string> values) : type(type), value(), values(values) {}
51};
52
54using glob_token_iterator = glob_token_list_t::iterator;
55using glob_token_const_iterator = glob_token_list_t::const_iterator;
56
57inline bool operator==(glob_token_t const &lhs, glob_token_t const &rhs) noexcept
58{
59 return lhs.type == rhs.type && lhs.value == rhs.value && lhs.values == rhs.values;
60}
61
62inline std::ostream &operator<<(std::ostream &lhs, glob_token_t const &rhs)
63{
64 lhs << rhs.type;
65 if (rhs.value.size() > 0) {
66 lhs << ":" << rhs.value;
67 } else if (rhs.values.size() > 0) {
68 lhs << ":{";
69 for (std::size_t i = 0; i < rhs.values.size(); i++) {
70 if (i != 0) {
71 lhs << ",";
72 }
73 lhs << rhs.values[i];
74 }
75 lhs << "}";
76 }
77 return lhs;
78}
79
99inline glob_token_list_t parseGlob(std::string_view glob)
100{
101 enum class state_t {
102 Idle,
103 FoundText,
104 FoundSlash,
105 FoundEscape,
106 FoundSlashStar,
107 FoundSlashDoubleStar,
108 FoundBracket,
109 FoundBrace,
110 };
111 state_t state = state_t::Idle;
112
113 glob_token_list_t r;
114 std::string tmpString;
115 std::vector<std::string> tmpStringList;
116 bool isInverse = false;
117 bool isFirstCharacter = false;
118 bool isRange = false;
119
120 auto i = glob.begin();
121 while (true) {
122 auto c = (i != glob.end()) ? *i : '\0';
123
124 switch (state) {
125 case state_t::Idle:
126 switch (c) {
127 case '/': state = state_t::FoundSlash; break;
128 case '?': r.emplace_back(glob_token_type_t::AnyCharacter); break;
129 case '*': r.emplace_back(glob_token_type_t::AnyString); break;
130 case '[':
131 isInverse = false;
132 isFirstCharacter = true;
133 isRange = false;
134 state = state_t::FoundBracket;
135 break;
136 case '{': state = state_t::FoundBrace; break;
137 case '\\': state = state_t::FoundEscape; break;
138 case '\0': return r;
139 default: state = state_t::FoundText; continue;
140 }
141 break;
142
143 case state_t::FoundText:
144 if (c == '/' || c == '?' || c == '*' || c == '[' || c == '{' || c == '\0') {
145 r.emplace_back(glob_token_type_t::String, tmpString);
146 tmpString.clear();
147 state = state_t::Idle;
148 continue; // Don't increment the iterator.
149 } else if (c == '\\') {
150 state = state_t::FoundEscape;
151 } else {
152 tmpString += c;
153 }
154 break;
155
156 case state_t::FoundEscape:
157 if (c == '\0') {
158 r.emplace_back(glob_token_type_t::String, tmpString);
159 state = state_t::Idle;
160 continue; // Don't increment the iterator.
161 } else {
162 tmpString += c;
163 state = state_t::FoundText;
164 }
165 break;
166
167 case state_t::FoundSlash:
168 if (c == '*') {
169 state = state_t::FoundSlashStar;
170 } else {
171 r.emplace_back(glob_token_type_t::Separator);
172 state = state_t::Idle;
173 continue;
174 }
175 break;
176
177 case state_t::FoundSlashStar:
178 if (c == '*') {
179 state = state_t::FoundSlashDoubleStar;
180 } else {
181 r.emplace_back(glob_token_type_t::Separator);
182 r.emplace_back(glob_token_type_t::AnyString);
183 state = state_t::Idle;
184 continue;
185 }
186 break;
187
188 case state_t::FoundSlashDoubleStar:
189 if (c == '/') {
190 r.emplace_back(glob_token_type_t::AnyDirectory);
191 r.emplace_back(glob_token_type_t::Separator);
192 state = state_t::Idle;
193
194 } else {
195 // Fallback to AnyString, as if there was only a single '*'.
196 r.emplace_back(glob_token_type_t::Separator);
197 r.emplace_back(glob_token_type_t::AnyString);
198 state = state_t::Idle;
199 continue; // Don't increment the iterator.
200 }
201 break;
202
203 case state_t::FoundBracket:
204 switch (c) {
205 case '^':
206 if (isFirstCharacter) {
207 isInverse = true;
208 tmpString += '/';
209 } else {
210 tmpString += c;
211 }
212 break;
213
214 case ']':
215 if (isFirstCharacter) {
216 tmpString += c;
217 } else {
218 if (isRange) {
219 tmpString += '-';
220 }
221
222 if (isInverse) {
223 r.emplace_back(glob_token_type_t::InverseCharacterList, tmpString);
224 } else {
225 r.emplace_back(glob_token_type_t::CharacterList, tmpString);
226 }
227
228 tmpString.clear();
229 state = state_t::Idle;
230 }
231 isFirstCharacter = false;
232 break;
233
234 case '-':
235 if (isFirstCharacter) {
236 tmpString += '-';
237 } else {
238 isRange = true;
239 }
240 isFirstCharacter = false;
241 break;
242
243 case '\0':
244 if (isRange) {
245 tmpString += '-';
246 }
247
248 if (isInverse) {
249 r.emplace_back(glob_token_type_t::InverseCharacterList, tmpString);
250 } else {
251 r.emplace_back(glob_token_type_t::CharacterList, tmpString);
252 }
253 state = state_t::Idle;
254 continue; // Don't increment the iterator.
255
256 default:
257 if (isRange) {
258 hilet firstCharacter = static_cast<uint8_t>(tmpString.back());
259 hilet lastCharacter = static_cast<uint8_t>(c);
260 for (uint8_t tmp_c = firstCharacter + 1; tmp_c <= lastCharacter; tmp_c++) {
261 tmpString += static_cast<char>(tmp_c);
262 }
263 } else {
264 tmpString += c;
265 }
266 isRange = false;
267 isFirstCharacter = false;
268 break;
269 }
270 break;
271
272 case state_t::FoundBrace:
273 switch (c) {
274 case '}':
275 tmpStringList.push_back(tmpString);
276 tmpString.clear();
277 r.emplace_back(glob_token_type_t::StringList, tmpStringList);
278 tmpStringList.clear();
279 state = state_t::Idle;
280 break;
281 case ',':
282 tmpStringList.push_back(tmpString);
283 tmpString.clear();
284 break;
285 case '\0':
286 tmpStringList.push_back(tmpString);
287 r.emplace_back(glob_token_type_t::StringList, tmpStringList);
288 state = state_t::Idle;
289 continue; // Don't increment the iterator.
290 default: tmpString += c; break;
291 }
292 break;
293
294 default: hi_no_default();
295 }
296
297 i++;
298 }
299}
300
301enum class glob_match_result_t { No, Partial, Match };
302
303inline glob_match_result_t matchGlob(glob_token_const_iterator index, glob_token_const_iterator end, std::string_view str)
304{
305 if (index == end) {
306 return (str.size() == 0) ? glob_match_result_t::Match : glob_match_result_t::No;
307
308 } else if (str.size() == 0) {
309 switch (index->type) {
310 case glob_token_type_t::Separator: return glob_match_result_t::Partial;
311 case glob_token_type_t::AnyDirectory: return glob_match_result_t::Partial;
312 case glob_token_type_t::AnyString: return matchGlob(index + 1, end, str);
313 default: return glob_match_result_t::No;
314 }
315 }
316
317#define MATCH_GLOB_RECURSE(out, next, end, str) \
318 switch (hilet tmp = matchGlob(next, end, str)) { \
319 case glob_match_result_t::No: break; \
320 case glob_match_result_t::Match: return tmp; \
321 case glob_match_result_t::Partial: out = tmp; break; \
322 default: hi_no_default(); \
323 }
324
325 // result may be assigned Partial by MATCH_GLOB_RECURSE.
326 auto result = glob_match_result_t::No;
327 bool found_slash = false;
328 hilet next_index = index + 1;
329
330 switch (index->type) {
331 case glob_token_type_t::String:
332 if (str.starts_with(index->value)) {
333 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(index->value.size()));
334 }
335 return result;
336
337 case glob_token_type_t::StringList:
338 for (hilet &value : index->values) {
339 if (str.starts_with(value)) {
340 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(value.size()));
341 }
342 }
343 return result;
344
345 case glob_token_type_t::CharacterList:
346 if (index->value.find(str.front()) != std::string::npos) {
347 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(1));
348 }
349 return result;
350
351 case glob_token_type_t::InverseCharacterList:
352 if (index->value.find(str.front()) == std::string::npos) {
353 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(1));
354 }
355 return result;
356
357 case glob_token_type_t::Separator:
358 if (str.front() == '/') {
359 return matchGlob(next_index, end, str.substr(1));
360 } else {
361 return glob_match_result_t::No;
362 }
363
364 case glob_token_type_t::AnyCharacter:
365 if (str.front() != '/') {
366 return matchGlob(next_index, end, str.substr(1));
367 } else {
368 return glob_match_result_t::No;
369 }
370
371 case glob_token_type_t::AnyString:
372 // Loop through each character in the string, including the end.
373 for (std::size_t i = 0; i <= str.size(); i++) {
374 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(i));
375
376 // Don't continue beyond a slash.
377 if (i < str.size() && str[i] == '/') {
378 break;
379 }
380 }
381 return result;
382
383 case glob_token_type_t::AnyDirectory:
384 // Recurse after each slash.
385 found_slash = false;
386 for (std::size_t i = 0; i <= str.size(); i++) {
387 if (i == str.size() || str[i] == '/') {
388 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(i));
389 }
390 }
391 return result;
392
393 default: hi_no_default();
394 }
395#undef MATCH_GLOB_RECURSE
396}
397
398inline glob_match_result_t matchGlob(glob_token_list_t const &glob, std::string_view str)
399{
400 return matchGlob(glob.begin(), glob.end(), str);
401}
402
403inline glob_match_result_t matchGlob(std::string_view glob, std::string_view str)
404{
405 return matchGlob(parseGlob(glob), str);
406}
407
408inline std::string basePathOfGlob(glob_token_const_iterator first, glob_token_const_iterator last)
409{
410 if (first == last) {
411 return "";
412 }
413
414 // Find the first place holder and don't include it as a token.
415 auto endOfBase = std::find_if_not(first, last, [](hilet &x) {
416 return x.type == glob_token_type_t::String || x.type == glob_token_type_t::Separator;
417 });
418
419 if (endOfBase != last) {
420 // Backtrack until the last separator, and remove it.
421 // Except when we included everything in the first loop because in that case there
422 // are no placeholders at all and we want to include the filename.
423 endOfBase = rfind_if(first, endOfBase, [](hilet &x) {
424 return x.type == glob_token_type_t::Separator;
425 });
426 }
427
428 // Add back the leading slash.
429 if (endOfBase == first && first->type == glob_token_type_t::Separator) {
430 endOfBase++;
431 }
432
433 std::string r;
434 for (auto index = first; index != endOfBase; index++) {
435 switch (index->type) {
436 case glob_token_type_t::String: r += index->value; break;
437 case glob_token_type_t::Separator: r += '/'; break;
438 default: hi_no_default();
439 }
440 }
441 return r;
442}
443
444inline std::string basePathOfGlob(glob_token_list_t const &glob)
445{
446 return basePathOfGlob(glob.begin(), glob.end());
447}
448
449inline std::string basePathOfGlob(std::string_view glob)
450{
451 return basePathOfGlob(parseGlob(glob));
452}
453
454} // namespace hi::inline v1
This file includes required definitions.
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
Definition glob.hpp:43
T back(T... args)
T clear(T... args)
T find_if_not(T... args)
T push_back(T... args)