HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
base_n.hpp
1// Copyright Take Vos 2020-2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../byte_string.hpp"
8#include "../required.hpp"
9#include "../assert.hpp"
10#include "../cast.hpp"
11#include "../check.hpp"
12#include <span>
13#include <cstdint>
14#include <array>
15#include <string>
16#include <string_view>
17
18#pragma once
19
20namespace hi::inline v1 {
21namespace detail {
22
24 long long radix;
25 bool case_insensitive;
26 char padding_char;
27 std::array<int8_t, 256> int_from_char_table = {};
28 std::array<char, 127> char_from_int_table = {};
29
35 template<std::size_t StringLength>
36 constexpr base_n_alphabet(
37 char const (&str)[StringLength],
38 bool case_insensitive = StringLength <= 33,
39 char padding_char = '\0') noexcept :
40 radix(narrow_cast<long long>(StringLength - 1)), case_insensitive(case_insensitive), padding_char(padding_char)
41 {
42 static_assert(StringLength < 128);
43
44 // Mark the int_from_char_table to have invalid characters.
45 for (long long i = 0; i != 256; ++i) {
46 int_from_char_table[i] = -2;
47 }
48
49 // Mark white-space in the int_from_char_table as white-space.
50 int_from_char_table[static_cast<std::size_t>(' ')] = -1;
51 int_from_char_table[static_cast<std::size_t>('\t')] = -1;
52 int_from_char_table[static_cast<std::size_t>('\r')] = -1;
53 int_from_char_table[static_cast<std::size_t>('\n')] = -1;
54 int_from_char_table[static_cast<std::size_t>('\f')] = -1;
55
56 if (padding_char != 0) {
57 int_from_char_table[static_cast<std::size_t>(padding_char)] = -1;
58 }
59
60 for (long long i = 0; i != radix; ++i) {
61 auto c = str[i];
62 char_from_int_table[i] = c;
63
64 int_from_char_table[static_cast<std::size_t>(c)] = narrow_cast<int8_t>(i);
65 if constexpr (StringLength <= 33) {
66 // Add an extra entry for case folded form.
67 if (c >= 'a' && c <= 'z') {
68 int_from_char_table[static_cast<std::size_t>((c - 'a') + 'A')] = narrow_cast<int8_t>(i);
69 } else if (c >= 'A' && c <= 'Z') {
70 int_from_char_table[static_cast<std::size_t>((c - 'A') + 'a')] = narrow_cast<int8_t>(i);
71 }
72 }
73 }
74 }
75
79 constexpr char char_from_int(int8_t x) const noexcept
80 {
81 hi_axiom(x < radix);
82 return char_from_int_table[x];
83 }
84
85 constexpr int8_t int_from_char(char c) const noexcept
86 {
87 return int_from_char_table[static_cast<std::size_t>(c)];
88 }
89};
90
91constexpr auto base2_alphabet = base_n_alphabet{"01"};
92
93constexpr auto base8_alphabet = base_n_alphabet{"01234567"};
94
95constexpr auto base10_alphabet = base_n_alphabet{"0123456789"};
96
97constexpr auto base16_alphabet = base_n_alphabet{"0123456789ABCDEF"};
98
99constexpr auto base32_rfc4648_alphabet = base_n_alphabet{"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"};
100
101constexpr auto base32hex_rfc4648_alphabet = base_n_alphabet{"0123456789ABCDEFGHIJKLMNOPQRSTUV"};
102
103constexpr auto base64_rfc4648_alphabet =
104 base_n_alphabet{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", false, '='};
105
106constexpr auto base64url_rfc4648_alphabet =
107 base_n_alphabet{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", false, '='};
108
109constexpr auto base85_rfc1924_alphabet =
110 base_n_alphabet{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"};
111
112constexpr auto base85_btoa_alphabet =
113 base_n_alphabet{"!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu"};
114
115} // namespace detail
116
117template<detail::base_n_alphabet Alphabet, int CharsPerBlock, int BytesPerBlock>
118class base_n {
119public:
120 static constexpr detail::base_n_alphabet alphabet = Alphabet;
121 static constexpr char padding_char = alphabet.padding_char;
122 static constexpr long long radix = alphabet.radix;
123 static constexpr long long bytes_per_block = BytesPerBlock;
124 static constexpr long long chars_per_block = CharsPerBlock;
125 static_assert(bytes_per_block != 0, "radix must be 16, 32, 64 or 85");
126 static_assert(chars_per_block != 0, "radix must be 16, 32, 64 or 85");
127
128 template<typename T>
129 static constexpr T int_from_char(char c) noexcept
130 {
131 return narrow_cast<T>(alphabet.int_from_char(c));
132 }
133
134 template<typename T>
135 static constexpr char char_from_int(T x) noexcept
136 {
137 return alphabet.char_from_int(narrow_cast<int8_t>(x));
138 }
139
146 template<typename ItIn, typename ItOut>
147 static constexpr void encode(ItIn ptr, ItIn last, ItOut output)
148 {
149 long long byte_index_in_block = 0;
150 long long block = 0;
151
152 while (ptr != last) {
153 // Construct a block in big endian.
154 hilet shift = 8 * ((bytes_per_block - 1) - byte_index_in_block);
155 block |= static_cast<long long>(*(ptr++)) << shift;
156
157 if (++byte_index_in_block == bytes_per_block) {
158 encode_block(block, bytes_per_block, output);
159 block = 0;
160 byte_index_in_block = 0;
161 }
162 }
163
164 if (byte_index_in_block != 0) {
165 encode_block(block, byte_index_in_block, output);
166 }
167 }
168
175 template<typename ItIn>
176 static std::string encode(ItIn first, ItIn last) noexcept
177 {
178 std::string r;
179 encode(first, last, std::back_inserter(r));
180 return r;
181 }
182
188 static constexpr std::string encode(std::span<std::byte const> bytes) noexcept
189 {
190 return encode(begin(bytes), end(bytes));
191 }
192
200 template<typename ItIn, typename ItOut>
201 static constexpr ItIn decode(ItIn ptr, ItIn last, ItOut output)
202 {
203 int char_index_in_block = 0;
204 long long block = 0;
205
206 for (; ptr != last; ++ptr) {
207 hilet digit = int_from_char<long long>(*ptr);
208 if (digit == -1) {
209 // Whitespace is ignored.
210 continue;
211
212 } else if (digit == -2) {
213 // Other character means end
214 return ptr;
215
216 } else {
217 block *= radix;
218 block += digit;
219
220 if (++char_index_in_block == chars_per_block) {
221 decode_block(block, chars_per_block, output);
222 block = 0;
223 char_index_in_block = 0;
224 }
225 }
226 }
227
228 if (char_index_in_block != 0) {
229 // pad the block with zeros.
230 for (auto i = char_index_in_block; i != chars_per_block; ++i) {
231 block *= radix;
232 }
233 decode_block(block, char_index_in_block, output);
234 }
235 return ptr;
236 }
237
238 static bstring decode(std::string_view str)
239 {
240 auto r = bstring{};
241 auto i = decode(begin(str), end(str), std::back_inserter(r));
242 hi_parse_check(i == end(str), "base-n encoded string not completely decoded");
243 return r;
244 }
245
246private:
247 template<typename ItOut>
248 static void encode_block(long long block, long long nr_bytes, ItOut output) noexcept
249 {
250 hilet padding = bytes_per_block - nr_bytes;
251
252 // Construct a block in little-endian, using easy division/modulo.
253 auto char_block = std::string{};
254 for (long long i = 0; i != chars_per_block; ++i) {
255 hilet v = block % radix;
256 block /= radix;
257
258 if (i < padding) {
259 hi_assume(v != 0);
260 if (padding_char != 0) {
261 char_block += padding_char;
262 }
263 } else {
264 char_block += char_from_int(v);
265 }
266 }
267
268 // A block should be output as a big-endian radix-number.
269 std::copy(rbegin(char_block), rend(char_block), output);
270 }
271
272 template<typename ItOut>
273 static constexpr void decode_block(long long block, long long nr_chars, ItOut output)
274 {
275 hilet padding = chars_per_block - nr_chars;
276
277 if (block and bytes_per_block == padding) {
278 throw parse_error("Invalid number of character to decode.");
279 }
280
281 // Construct a block in little-endian, using easy division/modulo.
282 for (long long i = 0; i != (bytes_per_block - padding); ++i) {
283 hilet shift = 8 * ((bytes_per_block - 1) - i);
284 hilet byte = static_cast<std::byte>((block >> shift) & 0xff);
285
286 *(output++) = byte;
287 }
288
289 // The output data will not contain the padding.
290 }
291};
292
293// Alphabet, CharsPerBlock, BytesPerBlock
294using base2 = base_n<detail::base2_alphabet, 8, 1>;
295using base8 = base_n<detail::base8_alphabet, 8, 3>;
296using base16 = base_n<detail::base16_alphabet, 2, 1>;
297using base32 = base_n<detail::base32_rfc4648_alphabet, 8, 5>;
298using base32hex = base_n<detail::base32hex_rfc4648_alphabet, 8, 5>;
299using base64 = base_n<detail::base64_rfc4648_alphabet, 4, 3>;
300using base64url = base_n<detail::base64url_rfc4648_alphabet, 4, 3>;
301using base85 = base_n<detail::base85_rfc1924_alphabet, 5, 4>;
302using ascii85 = base_n<detail::base85_btoa_alphabet, 5, 4>;
303
304} // namespace hi::inline v1
This file includes required definitions.
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
#define hi_assume(condition)
Mark an expression as true.
Definition architecture.hpp:222
Definition base_n.hpp:23
constexpr base_n_alphabet(char const (&str)[StringLength], bool case_insensitive=StringLength<=33, char padding_char='\0') noexcept
Construct an alphabet.
Definition base_n.hpp:36
constexpr char char_from_int(int8_t x) const noexcept
Get a character from an integer.
Definition base_n.hpp:79
Definition base_n.hpp:118
static constexpr void encode(ItIn ptr, ItIn last, ItOut output)
Encode bytes into a string.
Definition base_n.hpp:147
static std::string encode(ItIn first, ItIn last) noexcept
Encode bytes into a string.
Definition base_n.hpp:176
static constexpr std::string encode(std::span< std::byte const > bytes) noexcept
Encode bytes into a string.
Definition base_n.hpp:188
static constexpr ItIn decode(ItIn ptr, ItIn last, ItOut output)
Decodes a UTF-8 string into bytes.
Definition base_n.hpp:201
T back_inserter(T... args)
T copy(T... args)