HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
glob.hpp
1// Copyright Take Vos 2019-2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "required.hpp"
8#include "algorithm.hpp"
9#include <vector>
10#include <string>
11#include <string_view>
12#include <ostream>
13
14namespace tt {
15
16enum class glob_token_type_t {
17 String,
18 StringList,
19 CharacterList,
20 InverseCharacterList,
21 Separator,
22 AnyString,
23 AnyCharacter,
24 AnyDirectory
25};
26
27inline std::ostream &operator<<(std::ostream &lhs, glob_token_type_t const &rhs) {
28 switch (rhs) {
29 case glob_token_type_t::String: lhs << "String"; break;
30 case glob_token_type_t::StringList: lhs << "StringList"; break;
31 case glob_token_type_t::CharacterList: lhs << "CharacterList"; break;
32 case glob_token_type_t::InverseCharacterList: lhs << "InverseCharacterList"; break;
33 case glob_token_type_t::Separator: lhs << "Separator"; break;
34 case glob_token_type_t::AnyString: lhs << "AnyString"; break;
35 case glob_token_type_t::AnyCharacter: lhs << "AnyCharacter"; break;
36 case glob_token_type_t::AnyDirectory: lhs << "AnyDirectory"; break;
37 default: tt_no_default();
38 }
39 return lhs;
40}
41
43 glob_token_type_t type;
44 std::string value;
46
47 glob_token_t(glob_token_type_t type) : type(type), value(), values() {}
48 glob_token_t(glob_token_type_t type, std::string value) : type(type), value(value), values() {}
49 glob_token_t(glob_token_type_t type, std::vector<std::string> values) : type(type), value(), values(values) {}
50};
51
53using glob_token_iterator = glob_token_list_t::iterator;
54using glob_token_const_iterator = glob_token_list_t::const_iterator;
55
56
57inline bool operator==(glob_token_t const &lhs, glob_token_t const &rhs) noexcept {
58 return lhs.type == rhs.type && lhs.value == rhs.value && lhs.values == rhs.values;
59}
60
61inline std::ostream &operator<<(std::ostream &lhs, glob_token_t const &rhs) {
62 lhs << rhs.type;
63 if (rhs.value.size() > 0) {
64 lhs << ":" << rhs.value;
65 } else if (rhs.values.size() > 0) {
66 lhs << ":{";
67 for (size_t i = 0; i < rhs.values.size(); i++) {
68 if (i != 0) {
69 lhs << ",";
70 }
71 lhs << rhs.values[i];
72 }
73 lhs << "}";
74 }
75 return lhs;
76}
77
97inline glob_token_list_t parseGlob(std::string_view glob)
98{
99 enum class state_t {
100 Idle,
101 FoundText,
102 FoundSlash,
103 FoundEscape,
104 FoundSlashStar,
105 FoundSlashDoubleStar,
106 FoundBracket,
107 FoundBrace,
108 };
109 state_t state = state_t::Idle;
110
111 glob_token_list_t r;
112 std::string tmpString;
113 std::vector<std::string> tmpStringList;
114 bool isInverse = false;
115 bool isFirstCharacter = false;
116 bool isRange = false;
117
118 auto i = glob.begin();
119 while (true) {
120 auto c = (i != glob.end()) ? *i : '\0';
121
122 switch (state) {
123 case state_t::Idle:
124 switch (c) {
125 case '/': state = state_t::FoundSlash; break;
126 case '?': r.emplace_back(glob_token_type_t::AnyCharacter); break;
127 case '*': r.emplace_back(glob_token_type_t::AnyString); break;
128 case '[':
129 isInverse = false;
130 isFirstCharacter = true;
131 isRange = false;
132 state = state_t::FoundBracket;
133 break;
134 case '{': state = state_t::FoundBrace; break;
135 case '\\': state = state_t::FoundEscape; break;
136 case '\0': return r;
137 default: state = state_t::FoundText; continue;
138 }
139 break;
140
141 case state_t::FoundText:
142 if (c == '/' || c == '?' || c == '*' || c == '[' || c == '{' || c == '\0') {
143 r.emplace_back(glob_token_type_t::String, tmpString);
144 tmpString.clear();
145 state = state_t::Idle;
146 continue; // Don't increment the iterator.
147 } else if (c == '\\') {
148 state = state_t::FoundEscape;
149 } else {
150 tmpString += c;
151 }
152 break;
153
154 case state_t::FoundEscape:
155 if (c == '\0') {
156 r.emplace_back(glob_token_type_t::String, tmpString);
157 state = state_t::Idle;
158 continue; // Don't increment the iterator.
159 } else {
160 tmpString += c;
161 state = state_t::FoundText;
162 }
163 break;
164
165 case state_t::FoundSlash:
166 if (c == '*') {
167 state = state_t::FoundSlashStar;
168 } else {
169 r.emplace_back(glob_token_type_t::Separator);
170 state = state_t::Idle;
171 continue;
172 }
173 break;
174
175 case state_t::FoundSlashStar:
176 if (c == '*') {
177 state = state_t::FoundSlashDoubleStar;
178 } else {
179 r.emplace_back(glob_token_type_t::Separator);
180 r.emplace_back(glob_token_type_t::AnyString);
181 state = state_t::Idle;
182 continue;
183 }
184 break;
185
186 case state_t::FoundSlashDoubleStar:
187 if (c == '/') {
188 r.emplace_back(glob_token_type_t::AnyDirectory);
189 r.emplace_back(glob_token_type_t::Separator);
190 state = state_t::Idle;
191
192 } else {
193 // Fallback to AnyString, as if there was only a single '*'.
194 r.emplace_back(glob_token_type_t::Separator);
195 r.emplace_back(glob_token_type_t::AnyString);
196 state = state_t::Idle;
197 continue; // Don't increment the iterator.
198 }
199 break;
200
201 case state_t::FoundBracket:
202 switch (c) {
203 case '^':
204 if (isFirstCharacter) {
205 isInverse = true;
206 tmpString += '/';
207 } else {
208 tmpString += c;
209 }
210 break;
211
212 case ']':
213 if (isFirstCharacter) {
214 tmpString += c;
215 } else {
216 if (isRange) {
217 tmpString += '-';
218 }
219
220 if (isInverse) {
221 r.emplace_back(glob_token_type_t::InverseCharacterList, tmpString);
222 } else {
223 r.emplace_back(glob_token_type_t::CharacterList, tmpString);
224 }
225
226 tmpString.clear();
227 state = state_t::Idle;
228 }
229 isFirstCharacter = false;
230 break;
231
232 case '-':
233 if (isFirstCharacter) {
234 tmpString += '-';
235 } else {
236 isRange = true;
237 }
238 isFirstCharacter = false;
239 break;
240
241 case '\0':
242 if (isRange) {
243 tmpString += '-';
244 }
245
246 if (isInverse) {
247 r.emplace_back(glob_token_type_t::InverseCharacterList, tmpString);
248 } else {
249 r.emplace_back(glob_token_type_t::CharacterList, tmpString);
250 }
251 state = state_t::Idle;
252 continue; // Don't increment the iterator.
253
254 default:
255 if (isRange) {
256 ttlet firstCharacter = static_cast<uint8_t>(tmpString.back());
257 ttlet lastCharacter = static_cast<uint8_t>(c);
258 for (uint8_t character = firstCharacter + 1; character <= lastCharacter; character++) {
259 tmpString += static_cast<char>(character);
260 }
261 } else {
262 tmpString += c;
263 }
264 isRange = false;
265 isFirstCharacter = false;
266 break;
267 }
268 break;
269
270 case state_t::FoundBrace:
271 switch (c) {
272 case '}':
273 tmpStringList.push_back(tmpString);
274 tmpString.clear();
275 r.emplace_back(glob_token_type_t::StringList, tmpStringList);
276 tmpStringList.clear();
277 state = state_t::Idle;
278 break;
279 case ',':
280 tmpStringList.push_back(tmpString);
281 tmpString.clear();
282 break;
283 case '\0':
284 tmpStringList.push_back(tmpString);
285 r.emplace_back(glob_token_type_t::StringList, tmpStringList);
286 state = state_t::Idle;
287 continue; // Don't increment the iterator.
288 default:
289 tmpString += c;
290 break;
291 }
292 break;
293
294 default:
295 tt_no_default();
296 }
297
298 i++;
299 }
300}
301
302enum class glob_match_result_t {
303 No,
304 Partial,
305 Match
306};
307
308inline glob_match_result_t matchGlob(glob_token_const_iterator index, glob_token_const_iterator end, std::string_view str)
309{
310 if (index == end) {
311 return (str.size() == 0) ?
312 glob_match_result_t::Match :
313 glob_match_result_t::No;
314
315 } else if (str.size() == 0) {
316 switch (index->type) {
317 case glob_token_type_t::Separator:
318 return glob_match_result_t::Partial;
319 case glob_token_type_t::AnyDirectory:
320 return glob_match_result_t::Partial;
321 case glob_token_type_t::AnyString:
322 return matchGlob(index + 1, end, str);
323 default:
324 return glob_match_result_t::No;
325 }
326 }
327
328#define MATCH_GLOB_RECURSE(out, next, end, str)\
329 switch (ttlet tmp = matchGlob(next, end, str)) {\
330 case glob_match_result_t::No: break;\
331 case glob_match_result_t::Match: return tmp;\
332 case glob_match_result_t::Partial: out = tmp; break;\
333 default: tt_no_default();\
334 }
335
336 // result may be assigned Partial by MATCH_GLOB_RECURSE.
337 auto result = glob_match_result_t::No;
338 bool found_slash = false;
339 ttlet next_index = index + 1;
340
341 switch (index->type) {
342 case glob_token_type_t::String:
343 if (str.starts_with(index->value)) {
344 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(index->value.size()));
345 }
346 return result;
347
348 case glob_token_type_t::StringList:
349 for (ttlet &value: index->values) {
350 if (str.starts_with(value)) {
351 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(value.size()));
352 }
353 }
354 return result;
355
356 case glob_token_type_t::CharacterList:
357 if (index->value.find(str.front()) != std::string::npos) {
358 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(1));
359 }
360 return result;
361
362 case glob_token_type_t::InverseCharacterList:
363 if (index->value.find(str.front()) == std::string::npos) {
364 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(1));
365 }
366 return result;
367
368 case glob_token_type_t::Separator:
369 if (str.front() == '/') {
370 return matchGlob(next_index, end, str.substr(1));
371 } else {
372 return glob_match_result_t::No;
373 }
374
375 case glob_token_type_t::AnyCharacter:
376 if (str.front() != '/') {
377 return matchGlob(next_index, end, str.substr(1));
378 } else {
379 return glob_match_result_t::No;
380 }
381
382 case glob_token_type_t::AnyString:
383 // Loop through each character in the string, including the end.
384 for (size_t i = 0; i <= str.size(); i++) {
385 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(i));
386
387 // Don't continue beyond a slash.
388 if (i < str.size() && str[i] == '/') {
389 break;
390 }
391 }
392 return result;
393
394 case glob_token_type_t::AnyDirectory:
395 // Recurse after each slash.
396 found_slash = false;
397 for (size_t i = 0; i <= str.size(); i++) {
398 if (i == str.size() || str[i] == '/') {
399 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(i));
400 }
401 //found_slash = i < str.size() && ';
402 }
403 return result;
404
405 default:
406 tt_no_default();
407 }
408#undef MATCH_GLOB_RECURSE
409}
410
411inline glob_match_result_t matchGlob(glob_token_list_t const &glob, std::string_view str)
412{
413 return matchGlob(glob.begin(), glob.end(), str);
414}
415
416inline glob_match_result_t matchGlob(std::string_view glob, std::string_view str)
417{
418 return matchGlob(parseGlob(glob), str);
419}
420
421inline std::string basePathOfGlob(glob_token_const_iterator first, glob_token_const_iterator last) {
422 if (first == last) {
423 return "";
424 }
425
426 // Find the first place holder and don't include it as a token.
427 auto endOfBase = std::find_if_not(first, last, [](ttlet &x) {
428 return x.type == glob_token_type_t::String || x.type == glob_token_type_t::Separator;
429 });
430
431 if (endOfBase != last) {
432 // Backtrack until the last separator, and remove it.
433 // Except when we included everything in the first loop because in that case there
434 // are no placeholders at all and we want to include the filename.
435 endOfBase = rfind_if(first, endOfBase, [](ttlet &x) {
436 return x.type == glob_token_type_t::Separator;
437 });
438 }
439
440 // Add back the leading slash.
441 if (endOfBase == first && first->type == glob_token_type_t::Separator) {
442 endOfBase++;
443 }
444
445 std::string r;
446 for (auto index = first; index != endOfBase; index++) {
447 switch (index->type) {
448 case glob_token_type_t::String:
449 r += index->value;
450 break;
451 case glob_token_type_t::Separator:
452 r += '/';
453 break;
454 default:
455 tt_no_default();
456 }
457 }
458 return r;
459}
460
461inline std::string basePathOfGlob(glob_token_list_t const &glob)
462{
463 return basePathOfGlob(glob.begin(), glob.end());
464}
465
466inline std::string basePathOfGlob(std::string_view glob)
467{
468 return basePathOfGlob(parseGlob(glob));
469}
470
471}
Definition glob.hpp:42
T back(T... args)
T clear(T... args)
T find_if_not(T... args)
T push_back(T... args)