HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
base_n.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../byte_string.hpp"
8#include "../utility/module.hpp"
9#include <span>
10#include <cstdint>
11#include <array>
12#include <string>
13#include <string_view>
14#include <bit>
15
16#pragma once
17
18namespace hi::inline v1 {
19namespace detail {
20
22 long long radix;
23 bool case_insensitive;
24 char padding_char;
25 std::array<int8_t, 256> int_from_char_table = {};
26 std::array<char, 127> char_from_int_table = {};
27
33 template<std::size_t StringLength>
34 constexpr base_n_alphabet(
35 char const (&str)[StringLength],
36 bool case_insensitive = StringLength <= 33,
37 char padding_char = '\0') noexcept :
38 radix(narrow_cast<long long>(StringLength - 1)), case_insensitive(case_insensitive), padding_char(padding_char)
39 {
40 static_assert(StringLength < 128);
41
42 // Mark the int_from_char_table to have invalid characters.
43 for (long long i = 0; i != 256; ++i) {
44 int_from_char_table[i] = -2;
45 }
46
47 // Mark white-space in the int_from_char_table as white-space.
48 int_from_char_table[std::bit_cast<uint8_t>(' ')] = -1;
49 int_from_char_table[std::bit_cast<uint8_t>('\t')] = -1;
50 int_from_char_table[std::bit_cast<uint8_t>('\r')] = -1;
51 int_from_char_table[std::bit_cast<uint8_t>('\n')] = -1;
52 int_from_char_table[std::bit_cast<uint8_t>('\f')] = -1;
53
54 if (padding_char != 0) {
55 int_from_char_table[std::bit_cast<uint8_t>(padding_char)] = -1;
56 }
57
58 for (long long i = 0; i != radix; ++i) {
59 auto c = str[i];
60 char_from_int_table[i] = c;
61
62 int_from_char_table[std::bit_cast<uint8_t>(c)] = narrow_cast<int8_t>(i);
63 if constexpr (StringLength <= 33) {
64 // Add an extra entry for case folded form.
65 if (c >= 'a' && c <= 'z') {
66 int_from_char_table[narrow_cast<uint8_t>((c - 'a') + 'A')] = narrow_cast<int8_t>(i);
67 } else if (c >= 'A' && c <= 'Z') {
68 int_from_char_table[narrow_cast<uint8_t>((c - 'A') + 'a')] = narrow_cast<int8_t>(i);
69 }
70 }
71 }
72 }
73
77 constexpr char char_from_int(int8_t x) const noexcept
78 {
79 hi_axiom(x < radix);
80 return char_from_int_table[x];
81 }
82
83 constexpr int8_t int_from_char(char c) const noexcept
84 {
85 return int_from_char_table[std::bit_cast<uint8_t>(c)];
86 }
87};
88
89constexpr auto base2_alphabet = base_n_alphabet{"01"};
90
91constexpr auto base8_alphabet = base_n_alphabet{"01234567"};
92
93constexpr auto base10_alphabet = base_n_alphabet{"0123456789"};
94
95constexpr auto base16_alphabet = base_n_alphabet{"0123456789ABCDEF"};
96
97constexpr auto base32_rfc4648_alphabet = base_n_alphabet{"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"};
98
99constexpr auto base32hex_rfc4648_alphabet = base_n_alphabet{"0123456789ABCDEFGHIJKLMNOPQRSTUV"};
100
101constexpr auto base64_rfc4648_alphabet =
102 base_n_alphabet{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", false, '='};
103
104constexpr auto base64url_rfc4648_alphabet =
105 base_n_alphabet{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", false, '='};
106
107constexpr auto base85_rfc1924_alphabet =
108 base_n_alphabet{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"};
109
110constexpr auto base85_btoa_alphabet =
111 base_n_alphabet{"!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu"};
112
113} // namespace detail
114
115template<detail::base_n_alphabet Alphabet, int CharsPerBlock, int BytesPerBlock>
116class base_n {
117public:
118 static constexpr detail::base_n_alphabet alphabet = Alphabet;
119 static constexpr char padding_char = alphabet.padding_char;
120 static constexpr long long radix = alphabet.radix;
121 static constexpr long long bytes_per_block = BytesPerBlock;
122 static constexpr long long chars_per_block = CharsPerBlock;
123 static_assert(bytes_per_block != 0, "radix must be 16, 32, 64 or 85");
124 static_assert(chars_per_block != 0, "radix must be 16, 32, 64 or 85");
125
126 template<typename T>
127 static constexpr T int_from_char(char c) noexcept
128 {
129 return narrow_cast<T>(alphabet.int_from_char(c));
130 }
131
132 template<typename T>
133 static constexpr char char_from_int(T x) noexcept
134 {
135 return alphabet.char_from_int(narrow_cast<int8_t>(x));
136 }
137
144 template<typename ItIn, typename ItOut>
145 static constexpr void encode(ItIn ptr, ItIn last, ItOut output)
146 {
147 long long byte_index_in_block = 0;
148 long long block = 0;
149
150 while (ptr != last) {
151 // Construct a block in big endian.
152 hilet shift = 8 * ((bytes_per_block - 1) - byte_index_in_block);
153 block |= static_cast<long long>(*(ptr++)) << shift;
154
155 if (++byte_index_in_block == bytes_per_block) {
156 encode_block(block, bytes_per_block, output);
157 block = 0;
158 byte_index_in_block = 0;
159 }
160 }
161
162 if (byte_index_in_block != 0) {
163 encode_block(block, byte_index_in_block, output);
164 }
165 }
166
173 template<typename ItIn>
174 static std::string encode(ItIn first, ItIn last) noexcept
175 {
176 std::string r;
177 encode(first, last, std::back_inserter(r));
178 return r;
179 }
180
186 static constexpr std::string encode(std::span<std::byte const> bytes) noexcept
187 {
188 return encode(begin(bytes), end(bytes));
189 }
190
198 template<typename ItIn, typename ItOut>
199 static constexpr ItIn decode(ItIn ptr, ItIn last, ItOut output)
200 {
201 int char_index_in_block = 0;
202 long long block = 0;
203
204 for (; ptr != last; ++ptr) {
205 hilet digit = int_from_char<long long>(*ptr);
206 if (digit == -1) {
207 // Whitespace is ignored.
208 continue;
209
210 } else if (digit == -2) {
211 // Other character means end
212 return ptr;
213
214 } else {
215 block *= radix;
216 block += digit;
217
218 if (++char_index_in_block == chars_per_block) {
219 decode_block(block, chars_per_block, output);
220 block = 0;
221 char_index_in_block = 0;
222 }
223 }
224 }
225
226 if (char_index_in_block != 0) {
227 // pad the block with zeros.
228 for (auto i = char_index_in_block; i != chars_per_block; ++i) {
229 block *= radix;
230 }
231 decode_block(block, char_index_in_block, output);
232 }
233 return ptr;
234 }
235
236 static bstring decode(std::string_view str)
237 {
238 auto r = bstring{};
239 auto i = decode(begin(str), end(str), std::back_inserter(r));
240 hi_check(i == end(str), "base-n encoded string not completely decoded");
241 return r;
242 }
243
244private:
245 template<typename ItOut>
246 static void encode_block(long long block, long long nr_bytes, ItOut output) noexcept
247 {
248 hilet padding = bytes_per_block - nr_bytes;
249
250 // Construct a block in little-endian, using easy division/modulo.
251 auto char_block = std::string{};
252 for (long long i = 0; i != chars_per_block; ++i) {
253 hilet v = block % radix;
254 block /= radix;
255
256 if (i < padding) {
257 hi_assume(v != 0);
258 if (padding_char != 0) {
259 char_block += padding_char;
260 }
261 } else {
262 char_block += char_from_int(v);
263 }
264 }
265
266 // A block should be output as a big-endian radix-number.
267 std::copy(rbegin(char_block), rend(char_block), output);
268 }
269
270 template<typename ItOut>
271 static constexpr void decode_block(long long block, long long nr_chars, ItOut output)
272 {
273 hilet padding = chars_per_block - nr_chars;
274
275 if (block and bytes_per_block == padding) {
276 throw parse_error("Invalid number of character to decode.");
277 }
278
279 // Construct a block in little-endian, using easy division/modulo.
280 for (long long i = 0; i != (bytes_per_block - padding); ++i) {
281 hilet shift = 8 * ((bytes_per_block - 1) - i);
282 hilet byte = static_cast<std::byte>((block >> shift) & 0xff);
283
284 *(output++) = byte;
285 }
286
287 // The output data will not contain the padding.
288 }
289};
290
291// Alphabet, CharsPerBlock, BytesPerBlock
292using base2 = base_n<detail::base2_alphabet, 8, 1>;
293using base8 = base_n<detail::base8_alphabet, 8, 3>;
294using base16 = base_n<detail::base16_alphabet, 2, 1>;
295using base32 = base_n<detail::base32_rfc4648_alphabet, 8, 5>;
296using base32hex = base_n<detail::base32hex_rfc4648_alphabet, 8, 5>;
297using base64 = base_n<detail::base64_rfc4648_alphabet, 4, 3>;
298using base64url = base_n<detail::base64url_rfc4648_alphabet, 4, 3>;
299using base85 = base_n<detail::base85_rfc1924_alphabet, 5, 4>;
300using ascii85 = base_n<detail::base85_btoa_alphabet, 5, 4>;
301
302} // namespace hi::inline v1
#define hi_check(expression, message,...)
Check if the expression is valid, or throw a parse_error.
Definition assert.hpp:110
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:253
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:13
@ shift
The shift key is being held.
Definition base_n.hpp:21
constexpr base_n_alphabet(char const (&str)[StringLength], bool case_insensitive=StringLength<=33, char padding_char='\0') noexcept
Construct an alphabet.
Definition base_n.hpp:34
constexpr char char_from_int(int8_t x) const noexcept
Get a character from an integer.
Definition base_n.hpp:77
Definition base_n.hpp:116
static constexpr void encode(ItIn ptr, ItIn last, ItOut output)
Encode bytes into a string.
Definition base_n.hpp:145
static std::string encode(ItIn first, ItIn last) noexcept
Encode bytes into a string.
Definition base_n.hpp:174
static constexpr std::string encode(std::span< std::byte const > bytes) noexcept
Encode bytes into a string.
Definition base_n.hpp:186
static constexpr ItIn decode(ItIn ptr, ItIn last, ItOut output)
Decodes a UTF-8 string into bytes.
Definition base_n.hpp:199
T back_inserter(T... args)
T copy(T... args)