HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
glob.hpp
1// Copyright 2019 Pokitec
2// All rights reserved.
3
4#pragma once
5
6#include "TTauri/Foundation/required.hpp"
7#include "TTauri/Foundation/algorithm.hpp"
8#include <vector>
9#include <string>
10#include <string_view>
11#include <ostream>
12
13namespace tt {
14
15enum class glob_token_type_t {
16 String,
17 StringList,
18 CharacterList,
19 InverseCharacterList,
20 Separator,
21 AnyString,
22 AnyCharacter,
23 AnyDirectory
24};
25
26inline std::ostream &operator<<(std::ostream &lhs, glob_token_type_t const &rhs) {
27 switch (rhs) {
28 case glob_token_type_t::String: lhs << "String"; break;
29 case glob_token_type_t::StringList: lhs << "StringList"; break;
30 case glob_token_type_t::CharacterList: lhs << "CharacterList"; break;
31 case glob_token_type_t::InverseCharacterList: lhs << "InverseCharacterList"; break;
32 case glob_token_type_t::Separator: lhs << "Separator"; break;
33 case glob_token_type_t::AnyString: lhs << "AnyString"; break;
34 case glob_token_type_t::AnyCharacter: lhs << "AnyCharacter"; break;
35 case glob_token_type_t::AnyDirectory: lhs << "AnyDirectory"; break;
36 default: tt_no_default;
37 }
38 return lhs;
39}
40
42 glob_token_type_t type;
43 std::string value;
45
46 glob_token_t(glob_token_type_t type) : type(type), value(), values() {}
47 glob_token_t(glob_token_type_t type, std::string value) : type(type), value(value), values() {}
48 glob_token_t(glob_token_type_t type, std::vector<std::string> values) : type(type), value(), values(values) {}
49};
50
52using glob_token_iterator = glob_token_list_t::iterator;
53using glob_token_const_iterator = glob_token_list_t::const_iterator;
54
55
56inline bool operator==(glob_token_t const &lhs, glob_token_t const &rhs) noexcept {
57 return lhs.type == rhs.type && lhs.value == rhs.value && lhs.values == rhs.values;
58}
59
60inline std::ostream &operator<<(std::ostream &lhs, glob_token_t const &rhs) {
61 lhs << rhs.type;
62 if (rhs.value.size() > 0) {
63 lhs << ":" << rhs.value;
64 } else if (rhs.values.size() > 0) {
65 lhs << ":{";
66 for (size_t i = 0; i < rhs.values.size(); i++) {
67 if (i != 0) {
68 lhs << ",";
69 }
70 lhs << rhs.values[i];
71 }
72 lhs << "}";
73 }
74 return lhs;
75}
76
96inline glob_token_list_t parseGlob(std::string_view glob)
97{
98 enum class state_t {
99 Idle,
100 FoundText,
101 FoundSlash,
102 FoundEscape,
103 FoundSlashStar,
104 FoundSlashDoubleStar,
105 FoundBracket,
106 FoundBrace,
107 };
108 state_t state = state_t::Idle;
109
110 glob_token_list_t r;
111 std::string tmpString;
112 std::vector<std::string> tmpStringList;
113 bool isInverse = false;
114 bool isFirstCharacter = false;
115 bool isRange = false;
116
117 auto i = glob.begin();
118 while (true) {
119 auto c = (i != glob.end()) ? *i : '\0';
120
121 switch (state) {
122 case state_t::Idle:
123 switch (c) {
124 case '/': state = state_t::FoundSlash; break;
125 case '?': r.emplace_back(glob_token_type_t::AnyCharacter); break;
126 case '*': r.emplace_back(glob_token_type_t::AnyString); break;
127 case '[':
128 isInverse = false;
129 isFirstCharacter = true;
130 isRange = false;
131 state = state_t::FoundBracket;
132 break;
133 case '{': state = state_t::FoundBrace; break;
134 case '\\': state = state_t::FoundEscape; break;
135 case '\0': return r;
136 default: state = state_t::FoundText; continue;
137 }
138 break;
139
140 case state_t::FoundText:
141 if (c == '/' || c == '?' || c == '*' || c == '[' || c == '{' || c == '\0') {
142 r.emplace_back(glob_token_type_t::String, tmpString);
143 tmpString.clear();
144 state = state_t::Idle;
145 continue; // Don't increment the iterator.
146 } else if (c == '\\') {
147 state = state_t::FoundEscape;
148 } else {
149 tmpString += c;
150 }
151 break;
152
153 case state_t::FoundEscape:
154 if (c == '\0') {
155 r.emplace_back(glob_token_type_t::String, tmpString);
156 state = state_t::Idle;
157 continue; // Don't increment the iterator.
158 } else {
159 tmpString += c;
160 state = state_t::FoundText;
161 }
162 break;
163
164 case state_t::FoundSlash:
165 if (c == '*') {
166 state = state_t::FoundSlashStar;
167 } else {
168 r.emplace_back(glob_token_type_t::Separator);
169 state = state_t::Idle;
170 continue;
171 }
172 break;
173
174 case state_t::FoundSlashStar:
175 if (c == '*') {
176 state = state_t::FoundSlashDoubleStar;
177 } else {
178 r.emplace_back(glob_token_type_t::Separator);
179 r.emplace_back(glob_token_type_t::AnyString);
180 state = state_t::Idle;
181 continue;
182 }
183 break;
184
185 case state_t::FoundSlashDoubleStar:
186 if (c == '/') {
187 r.emplace_back(glob_token_type_t::AnyDirectory);
188 r.emplace_back(glob_token_type_t::Separator);
189 state = state_t::Idle;
190
191 } else {
192 // Fallback to AnyString, as if there was only a single '*'.
193 r.emplace_back(glob_token_type_t::Separator);
194 r.emplace_back(glob_token_type_t::AnyString);
195 state = state_t::Idle;
196 continue; // Don't increment the iterator.
197 }
198 break;
199
200 case state_t::FoundBracket:
201 switch (c) {
202 case '^':
203 if (isFirstCharacter) {
204 isInverse = true;
205 tmpString += '/';
206 } else {
207 tmpString += c;
208 }
209 break;
210
211 case ']':
212 if (isFirstCharacter) {
213 tmpString += c;
214 } else {
215 if (isRange) {
216 tmpString += '-';
217 }
218
219 if (isInverse) {
220 r.emplace_back(glob_token_type_t::InverseCharacterList, tmpString);
221 } else {
222 r.emplace_back(glob_token_type_t::CharacterList, tmpString);
223 }
224
225 tmpString.clear();
226 state = state_t::Idle;
227 }
228 isFirstCharacter = false;
229 break;
230
231 case '-':
232 if (isFirstCharacter) {
233 tmpString += '-';
234 } else {
235 isRange = true;
236 }
237 isFirstCharacter = false;
238 break;
239
240 case '\0':
241 if (isRange) {
242 tmpString += '-';
243 }
244
245 if (isInverse) {
246 r.emplace_back(glob_token_type_t::InverseCharacterList, tmpString);
247 } else {
248 r.emplace_back(glob_token_type_t::CharacterList, tmpString);
249 }
250 state = state_t::Idle;
251 continue; // Don't increment the iterator.
252
253 default:
254 if (isRange) {
255 ttlet firstCharacter = static_cast<uint8_t>(tmpString.back());
256 ttlet lastCharacter = static_cast<uint8_t>(c);
257 for (uint8_t character = firstCharacter + 1; character <= lastCharacter; character++) {
258 tmpString += static_cast<char>(character);
259 }
260 } else {
261 tmpString += c;
262 }
263 isRange = false;
264 isFirstCharacter = false;
265 break;
266 }
267 break;
268
269 case state_t::FoundBrace:
270 switch (c) {
271 case '}':
272 tmpStringList.push_back(tmpString);
273 tmpString.clear();
274 r.emplace_back(glob_token_type_t::StringList, tmpStringList);
275 tmpStringList.clear();
276 state = state_t::Idle;
277 break;
278 case ',':
279 tmpStringList.push_back(tmpString);
280 tmpString.clear();
281 break;
282 case '\0':
283 tmpStringList.push_back(tmpString);
284 r.emplace_back(glob_token_type_t::StringList, tmpStringList);
285 state = state_t::Idle;
286 continue; // Don't increment the iterator.
287 default:
288 tmpString += c;
289 break;
290 }
291 break;
292
293 default:
294 tt_no_default;
295 }
296
297 i++;
298 }
299}
300
301enum class glob_match_result_t {
302 No,
303 Partial,
304 Match
305};
306
307inline glob_match_result_t matchGlob(glob_token_const_iterator index, glob_token_const_iterator end, std::string_view str)
308{
309 if (index == end) {
310 return (str.size() == 0) ?
311 glob_match_result_t::Match :
312 glob_match_result_t::No;
313
314 } else if (str.size() == 0) {
315 switch (index->type) {
316 case glob_token_type_t::Separator:
317 return glob_match_result_t::Partial;
318 case glob_token_type_t::AnyDirectory:
319 return glob_match_result_t::Partial;
320 case glob_token_type_t::AnyString:
321 return matchGlob(index + 1, end, str);
322 default:
323 return glob_match_result_t::No;
324 }
325 }
326
327#define MATCH_GLOB_RECURSE(out, next, end, str)\
328 switch (ttlet tmp = matchGlob(next, end, str)) {\
329 case glob_match_result_t::No: break;\
330 case glob_match_result_t::Match: return tmp;\
331 case glob_match_result_t::Partial: out = tmp; break;\
332 default: tt_no_default;\
333 }
334
335 // result may be assigned Partial by MATCH_GLOB_RECURSE.
336 auto result = glob_match_result_t::No;
337 bool found_slash = false;
338 ttlet next_index = index + 1;
339
340 switch (index->type) {
341 case glob_token_type_t::String:
342 if (starts_with(str, index->value)) {
343 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(index->value.size()));
344 }
345 return result;
346
347 case glob_token_type_t::StringList:
348 for (ttlet &value: index->values) {
349 if (starts_with(str, value)) {
350 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(value.size()));
351 }
352 }
353 return result;
354
355 case glob_token_type_t::CharacterList:
356 if (index->value.find(str.front()) != std::string::npos) {
357 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(1));
358 }
359 return result;
360
361 case glob_token_type_t::InverseCharacterList:
362 if (index->value.find(str.front()) == std::string::npos) {
363 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(1));
364 }
365 return result;
366
367 case glob_token_type_t::Separator:
368 if (str.front() == '/') {
369 return matchGlob(next_index, end, str.substr(1));
370 } else {
371 return glob_match_result_t::No;
372 }
373
374 case glob_token_type_t::AnyCharacter:
375 if (str.front() != '/') {
376 return matchGlob(next_index, end, str.substr(1));
377 } else {
378 return glob_match_result_t::No;
379 }
380
381 case glob_token_type_t::AnyString:
382 // Loop through each character in the string, including the end.
383 for (size_t i = 0; i <= str.size(); i++) {
384 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(i));
385
386 // Don't continue beyond a slash.
387 if (i < str.size() && str[i] == '/') {
388 break;
389 }
390 }
391 return result;
392
393 case glob_token_type_t::AnyDirectory:
394 // Recurse after each slash.
395 found_slash = false;
396 for (size_t i = 0; i <= str.size(); i++) {
397 if (i == str.size() || str[i] == '/') {
398 MATCH_GLOB_RECURSE(result, next_index, end, str.substr(i));
399 }
400 //found_slash = i < str.size() && ';
401 }
402 return result;
403
404 default:
405 tt_no_default;
406 }
407#undef MATCH_GLOB_RECURSE
408}
409
410inline glob_match_result_t matchGlob(glob_token_list_t const &glob, std::string_view str)
411{
412 return matchGlob(glob.begin(), glob.end(), str);
413}
414
415inline glob_match_result_t matchGlob(std::string_view glob, std::string_view str)
416{
417 return matchGlob(parseGlob(glob), str);
418}
419
420inline std::string basePathOfGlob(glob_token_const_iterator first, glob_token_const_iterator last) {
421 if (first == last) {
422 return "";
423 }
424
425 // Find the first place holder and don't include it as a token.
426 auto endOfBase = std::find_if_not(first, last, [](ttlet &x) {
427 return x.type == glob_token_type_t::String || x.type == glob_token_type_t::Separator;
428 });
429
430 if (endOfBase != last) {
431 // Backtrack until the last separator, and remove it.
432 // Except when we included everything in the first loop because in that case there
433 // are no placeholders at all and we want to include the filename.
434 endOfBase = rfind_if(first, endOfBase, [](ttlet &x) {
435 return x.type == glob_token_type_t::Separator;
436 });
437 }
438
439 // Add back the leading slash.
440 if (endOfBase == first && first->type == glob_token_type_t::Separator) {
441 endOfBase++;
442 }
443
444 std::string r;
445 for (auto index = first; index != endOfBase; index++) {
446 switch (index->type) {
447 case glob_token_type_t::String:
448 r += index->value;
449 break;
450 case glob_token_type_t::Separator:
451 r += '/';
452 break;
453 default:
454 tt_no_default;
455 }
456 }
457 return r;
458}
459
460inline std::string basePathOfGlob(glob_token_list_t const &glob)
461{
462 return basePathOfGlob(glob.begin(), glob.end());
463}
464
465inline std::string basePathOfGlob(std::string_view glob)
466{
467 return basePathOfGlob(parseGlob(glob));
468}
469
470}
Definition glob.hpp:41
T back(T... args)
T clear(T... args)
T find_if_not(T... args)
T push_back(T... args)