HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
glob.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "utility.hpp"
8#include <vector>
9#include <string>
10#include <string_view>
11#include <filesystem>
12#include <variant>
13#include <type_traits>
14#include <regex>
15
16namespace hi { inline namespace v1 {
17
19public:
20 constexpr glob_pattern() noexcept = default;
21 constexpr glob_pattern(glob_pattern const&) noexcept = default;
22 constexpr glob_pattern(glob_pattern&&) noexcept = default;
23 constexpr glob_pattern& operator=(glob_pattern const&) noexcept = default;
24 constexpr glob_pattern& operator=(glob_pattern&&) noexcept = default;
25
26 glob_pattern(std::string_view str) : _tokens(parse(str)), _regex()
27 {
28 _regex = make_regex(_tokens);
29 }
30
31 glob_pattern(std::string const& str) : glob_pattern(std::string_view{str}) {}
32 glob_pattern(char const *str) : glob_pattern(std::string_view{str}) {}
33 glob_pattern(std::filesystem::path const& path) : glob_pattern(path.generic_string()) {}
34
35 [[nodiscard]] constexpr friend std::string to_string(glob_pattern const& rhs) noexcept
36 {
37 auto r = std::string{};
38
39 for (hilet& token : rhs._tokens) {
40 if (auto string_ = std::get_if<string_type>(&token)) {
41 r += *string_;
42 } else if (std::holds_alternative<separator_type>(token)) {
43 r += '/';
44 } else if (auto strings_ = std::get_if<strings_type>(&token)) {
45 r += '{';
46 for (hilet& s : *strings_) {
47 r += s;
48 r += ',';
49 }
50 r += '}';
51 } else if (std::holds_alternative<any_char_type>(token)) {
52 r += '?';
53 } else if (std::holds_alternative<any_string_type>(token)) {
54 r += '*';
55 } else if (std::holds_alternative<any_directory_type>(token)) {
56 r += "/**";
57 } else {
58 hi_no_default();
59 }
60 }
61 return r;
62 }
63
64 [[nodiscard]] constexpr friend std::string to_regex_string(glob_pattern const& rhs) noexcept
65 {
66 return make_regex_string(rhs._tokens);
67 }
68
69 [[nodiscard]] constexpr std::string base_generic_string() const noexcept
70 {
71 auto r = std::string{};
72
73 for (hilet& token : _tokens) {
74 if (auto string_ = std::get_if<string_type>(&token)) {
75 r += *string_;
76 } else if (std::holds_alternative<separator_type>(token)) {
77 r += '/';
78 } else if (std::holds_alternative<any_directory_type>(token)) {
79 r += "/";
80 return r;
81 } else {
82 return r;
83 }
84 }
85 return r;
86 }
87
88 [[nodiscard]] std::filesystem::path base_path() const noexcept
89 {
90 return std::filesystem::path{base_generic_string()};
91 }
92
93 [[nodiscard]] bool matches(std::string_view path) const noexcept
94 {
95 return std::regex_match(path.begin(), path.end(), _regex);
96 }
97
98 [[nodiscard]] bool matches(std::string const& path) const noexcept
99 {
100 return matches(std::string_view{path});
101 }
102
103 [[nodiscard]] bool matches(char const *path) const noexcept
104 {
105 return matches(std::string_view{path});
106 }
107
108 [[nodiscard]] bool matches(std::filesystem::path const& path) const noexcept
109 {
110 return matches(path.generic_string());
111 }
112
113private:
114 using string_type = std::string;
115 struct separator_type {};
116 using strings_type = std::vector<std::string>;
117 struct any_string_type {};
118 struct any_char_type {};
119 struct any_directory_type {};
120
121 using token_type =
122 std::variant<string_type, separator_type, strings_type, any_string_type, any_char_type, any_directory_type>;
123
124 using tokens_type = std::vector<token_type>;
125
126 tokens_type _tokens;
127 std::regex _regex;
128
129 [[nodiscard]] constexpr static tokens_type parse(auto first, auto last)
130 {
131#define HI_GLOB_APPEND_STRING() \
132 do { \
133 if (not str.empty()) { \
134 r.emplace_back(std::move(str)); \
135 str.clear(); \
136 } \
137 } while (false)
138
139 enum class state_type { idle, star, slash, slash_star, slash_star_star, bracket, brace };
140 using enum state_type;
141
142 static_assert(std::is_same_v<std::decay_t<decltype(*first)>, char>);
143
144 auto r = tokens_type{};
145
146 auto state = idle;
147 auto str = string_type{};
148 auto strs = strings_type{};
149
150 auto it = first;
151 while (it != last) {
152 auto c = *it;
153 switch (state) {
154 case idle:
155 switch (c) {
156 case '/':
157 HI_GLOB_APPEND_STRING();
158 state = slash;
159 break;
160 case '?':
161 HI_GLOB_APPEND_STRING();
162 r.emplace_back(any_char_type{});
163 break;
164 case '*':
165 HI_GLOB_APPEND_STRING();
166 state = star;
167 break;
168 case '[':
169 HI_GLOB_APPEND_STRING();
170 state = bracket;
171 break;
172 case '{':
173 HI_GLOB_APPEND_STRING();
174 state = brace;
175 break;
176 default:
177 str += c;
178 }
179 break;
180
181 case star:
182 if (c == '*') {
183 throw parse_error("Double /**/ is only allowed between slashes.");
184 } else {
185 r.emplace_back(any_string_type{});
186 state = idle;
187 continue;
188 }
189 break;
190
191 case slash:
192 if (c == '*') {
193 state = slash_star;
194 } else {
195 r.emplace_back(separator_type{});
196 state = idle;
197 continue;
198 }
199 break;
200
201 case slash_star:
202 if (c == '*') {
203 state = slash_star_star;
204 } else {
205 r.emplace_back(separator_type{});
206 r.emplace_back(any_string_type{});
207 state = idle;
208 continue;
209 }
210 break;
211
212 case slash_star_star:
213 if (c == '/') {
214 r.emplace_back(any_directory_type{});
215 state = idle;
216 } else {
217 throw parse_error(
218 std::format("'/**' must end in a slash in glob pattern '{}'", std::string_view{first, last}));
219 }
220 break;
221
222 case bracket:
223 if (c == ']') {
224 r.emplace_back(std::move(strs));
225 strs.clear();
226 state = idle;
227
228 } else {
229 strs.emplace_back(1, c);
230 }
231 break;
232
233 case brace:
234 if (c == '}') {
235 if (not str.empty()) {
236 strs.push_back(std::move(str));
237 str.clear();
238 }
239 r.emplace_back(std::move(strs));
240 strs.clear();
241 state = idle;
242
243 } else if (c == ',') {
244 strs.push_back(std::move(str));
245 str.clear();
246
247 } else {
248 str += c;
249 }
250 break;
251
252 default:
253 hi_no_default();
254 }
255
256 ++it;
257 }
258
259 switch (state) {
260 case idle:
261 HI_GLOB_APPEND_STRING();
262 break;
263
264 case star:
265 r.emplace_back(any_string_type{});
266 break;
267
268 case slash:
269 r.emplace_back(separator_type{});
270 break;
271
272 case slash_star:
273 r.emplace_back(separator_type{});
274 r.emplace_back(any_string_type{});
275 break;
276
277 case slash_star_star:
278 r.emplace_back(any_directory_type{});
279 break;
280
281 case bracket:
282 throw parse_error("Unclosed bracket '[' found in glob pattern.");
283
284 case brace:
285 throw parse_error("Unclosed brace '{' found in glob pattern.");
286 }
287
288 return r;
289
290#undef HI_GLOB_APPEND_STRING
291 }
292
293 [[nodiscard]] constexpr static tokens_type parse(auto&& range)
294 {
295 return parse(std::ranges::begin(range), std::ranges::end(range));
296 }
297
298 [[nodiscard]] constexpr static std::string make_regex_string(tokens_type const& tokens) noexcept
299 {
300 auto r = std::string{};
301
302 for (hilet& token : tokens) {
303 if (auto string_ = std::get_if<string_type>(&token)) {
304 r += *string_;
305 } else if (std::holds_alternative<separator_type>(token)) {
306 r += '/';
307 } else if (auto strings_ = std::get_if<strings_type>(&token)) {
308 r += '(';
309 auto first = true;
310 for (hilet& s : *strings_) {
311 if (not std::exchange(first, false)) {
312 r += '|';
313 }
314 r += s;
315 }
316 r += ')';
317 } else if (std::holds_alternative<any_char_type>(token)) {
318 r += "[^/]";
319 } else if (std::holds_alternative<any_string_type>(token)) {
320 r += "[^/]*";
321 } else if (std::holds_alternative<any_directory_type>(token)) {
322 r += "(/.*)?/";
323 } else {
324 hi_no_default();
325 }
326 }
327 return r;
328 }
329
330 [[nodiscard]] static std::regex make_regex(tokens_type const& tokens) noexcept
331 {
332 return std::regex{make_regex_string(tokens), std::regex_constants::ECMAScript | std::regex_constants::optimize};
333 }
334};
335
336[[nodiscard]] inline generator<std::filesystem::path> glob(glob_pattern const& pattern)
337{
338 auto path = pattern.base_path();
339
340 hilet first = std::filesystem::recursive_directory_iterator(path);
341 hilet last = std::filesystem::recursive_directory_iterator();
342 for (auto it = first; it != last; ++it) {
343 if (pattern.matches(it->path())) {
344 co_yield it->path();
345 }
346 }
347}
348
349[[nodiscard]] inline generator<std::filesystem::path> glob(std::string_view pattern)
350{
351 return glob(glob_pattern{pattern});
352}
353
354[[nodiscard]] inline generator<std::filesystem::path> glob(std::string const& pattern)
355{
356 return glob(glob_pattern{pattern});
357}
358
359[[nodiscard]] inline generator<std::filesystem::path> glob(char const *pattern)
360{
361 return glob(glob_pattern{pattern});
362}
363
364[[nodiscard]] inline generator<std::filesystem::path> glob(std::filesystem::path const& pattern)
365{
366 return glob(glob_pattern{pattern});
367}
368
369[[nodiscard]] inline generator<std::filesystem::path> glob(URL const& pattern)
370{
371 return glob(glob_pattern{pattern.filesystem_path()});
372}
373
374}} // namespace hi::v1
Utilities used by the HikoGUI library itself.
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:15
The HikoGUI namespace.
Definition ascii.hpp:19
Exception thrown during parsing on an error.
Definition exception.hpp:58
Definition glob.hpp:18
T move(T... args)
T regex_match(T... args)