HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
inflate.hpp
1// Copyright Take Vos 2020.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../utility/utility.hpp"
8#include "../container/container.hpp"
9#include "../parser/parser.hpp"
10#include "../macros.hpp"
11#include "huffman.hpp"
12#include <span>
13
14hi_export_module(hikogui.codec.inflate);
15
16hi_export namespace hi { inline namespace v1 {
17namespace detail {
18
19inline void inflate_copy_block(std::span<std::byte const> bytes, std::size_t& bit_offset, std::size_t max_size, bstring& r)
20{
21 auto offset = (bit_offset + 7) / 8;
22
23 auto LEN = **make_placement_ptr<little_uint16_buf_t>(bytes, offset);
24 [[maybe_unused]] auto NLEN = **make_placement_ptr<little_uint16_buf_t>(bytes, offset);
25
26 hi_check((offset + LEN) <= bytes.size(), "input buffer overrun");
27 hi_check((r.size() + LEN) <= max_size, "output buffer overrun");
28 r.append(&bytes[offset], LEN);
29
30 bit_offset = offset * 8;
31}
32
33[[nodiscard]] inline std::size_t
34inflate_decode_length(std::span<std::byte const> bytes, std::size_t& bit_offset, std::size_t symbol)
35{
36 switch (symbol) {
37 case 257:
38 return 3;
39 case 258:
40 return 4;
41 case 259:
42 return 5;
43 case 260:
44 return 6;
45 case 261:
46 return 7;
47 case 262:
48 return 8;
49 case 263:
50 return 9;
51 case 264:
52 return 10;
53 case 265:
54 return get_bits(bytes, bit_offset, 1) + 11;
55 case 266:
56 return get_bits(bytes, bit_offset, 1) + 13;
57 case 267:
58 return get_bits(bytes, bit_offset, 1) + 15;
59 case 268:
60 return get_bits(bytes, bit_offset, 1) + 17;
61 case 269:
62 return get_bits(bytes, bit_offset, 2) + 19;
63 case 270:
64 return get_bits(bytes, bit_offset, 2) + 23;
65 case 271:
66 return get_bits(bytes, bit_offset, 2) + 27;
67 case 272:
68 return get_bits(bytes, bit_offset, 2) + 31;
69 case 273:
70 return get_bits(bytes, bit_offset, 3) + 35;
71 case 274:
72 return get_bits(bytes, bit_offset, 3) + 43;
73 case 275:
74 return get_bits(bytes, bit_offset, 3) + 51;
75 case 276:
76 return get_bits(bytes, bit_offset, 3) + 59;
77 case 277:
78 return get_bits(bytes, bit_offset, 4) + 67;
79 case 278:
80 return get_bits(bytes, bit_offset, 4) + 83;
81 case 279:
82 return get_bits(bytes, bit_offset, 4) + 99;
83 case 280:
84 return get_bits(bytes, bit_offset, 4) + 115;
85 case 281:
86 return get_bits(bytes, bit_offset, 5) + 131;
87 case 282:
88 return get_bits(bytes, bit_offset, 5) + 163;
89 case 283:
90 return get_bits(bytes, bit_offset, 5) + 195;
91 case 284:
92 return get_bits(bytes, bit_offset, 5) + 227;
93 case 285:
94 return 258;
95 default:
96 throw parse_error(std::format("Literal/Length symbol out of range {}", symbol));
97 }
98}
99
100[[nodiscard]] inline std::size_t
101inflate_decode_distance(std::span<std::byte const> bytes, std::size_t& bit_offset, std::size_t symbol)
102{
103 switch (symbol) {
104 case 0:
105 return 1;
106 case 1:
107 return 2;
108 case 2:
109 return 3;
110 case 3:
111 return 4;
112 case 4:
113 return get_bits(bytes, bit_offset, 1) + 5;
114 case 5:
115 return get_bits(bytes, bit_offset, 1) + 7;
116 case 6:
117 return get_bits(bytes, bit_offset, 2) + 9;
118 case 7:
119 return get_bits(bytes, bit_offset, 2) + 13;
120 case 8:
121 return get_bits(bytes, bit_offset, 3) + 17;
122 case 9:
123 return get_bits(bytes, bit_offset, 3) + 25;
124 case 10:
125 return get_bits(bytes, bit_offset, 4) + 33;
126 case 11:
127 return get_bits(bytes, bit_offset, 4) + 49;
128 case 12:
129 return get_bits(bytes, bit_offset, 5) + 65;
130 case 13:
131 return get_bits(bytes, bit_offset, 5) + 97;
132 case 14:
133 return get_bits(bytes, bit_offset, 6) + 129;
134 case 15:
135 return get_bits(bytes, bit_offset, 6) + 193;
136 case 16:
137 return get_bits(bytes, bit_offset, 7) + 257;
138 case 17:
139 return get_bits(bytes, bit_offset, 7) + 385;
140 case 18:
141 return get_bits(bytes, bit_offset, 8) + 513;
142 case 19:
143 return get_bits(bytes, bit_offset, 8) + 769;
144 case 20:
145 return get_bits(bytes, bit_offset, 9) + 1025;
146 case 21:
147 return get_bits(bytes, bit_offset, 9) + 1537;
148 case 22:
149 return get_bits(bytes, bit_offset, 10) + 2049;
150 case 23:
151 return get_bits(bytes, bit_offset, 10) + 3073;
152 case 24:
153 return get_bits(bytes, bit_offset, 11) + 4097;
154 case 25:
155 return get_bits(bytes, bit_offset, 11) + 6145;
156 case 26:
157 return get_bits(bytes, bit_offset, 12) + 8193;
158 case 27:
159 return get_bits(bytes, bit_offset, 12) + 12289;
160 case 28:
161 return get_bits(bytes, bit_offset, 13) + 16385;
162 case 29:
163 return get_bits(bytes, bit_offset, 13) + 24577;
164 default:
165 throw parse_error(std::format("Distance symbol out of range {}", symbol));
166 }
167}
168
169inline void inflate_block(
170 std::span<std::byte const> bytes,
171 std::size_t& bit_offset,
172 std::size_t max_size,
173 huffman_tree<int16_t> const& literal_tree,
174 huffman_tree<int16_t> const& distance_tree,
175 bstring& r)
176{
177 while (true) {
178 // Test only every get_symbol, the trailer is at least 32 bits (Checksum)
179 // - 15 bits maximum huffman code.
180 // - 5 bits extra length.
181 // - 7 bits rounding up to byte.
182 hi_check(((bit_offset + 27) >> 3) <= bytes.size(), "Input buffer overrun");
183
184 auto const literal_symbol = literal_tree.get_symbol(bytes, bit_offset);
185
186 if (literal_symbol <= 255) {
187 hi_check(r.size() < max_size, "Output buffer overrun");
188 r.push_back(static_cast<std::byte>(literal_symbol));
189
190 } else if (literal_symbol == 256) {
191 // End-of-block.
192 return;
193
194 } else {
195 auto const length = inflate_decode_length(bytes, bit_offset, literal_symbol);
196 hi_check(r.size() + length <= max_size, "Output buffer overrun");
197
198 // Test only every get_symbol, the trailer is at least 32 bits (Checksum)
199 // - 15 bits maximum huffman code.
200 // - 7 bits rounding up to byte.
201 hi_check(((bit_offset + 22) >> 3) <= bytes.size(), "Input buffer overrun");
202 auto const distance_symbol = distance_tree.get_symbol(bytes, bit_offset);
203
204 // Test only every inflate_decode_distance, the trailer is at least 32 bits (Checksum)
205 // - 13 bits extra length.
206 // - 7 bits rounding up to byte.
207 hi_check(((bit_offset + 20) >> 3) <= bytes.size(), "Input buffer overrun");
208 auto const distance = inflate_decode_distance(bytes, bit_offset, distance_symbol);
209
210 hi_check(distance <= r.size(), "Distance beyond start of decompressed data");
211 auto src_i = r.size() - distance;
212 for (auto i = 0; i != length; ++i) {
213 r.push_back(r[src_i++]);
214 }
215 }
216 }
217}
218
219inline huffman_tree<int16_t> deflate_fixed_literal_tree = []() {
220 std::vector<uint8_t> lengths;
221
222 for (int i = 0; i <= 143; ++i) {
223 lengths.push_back(8);
224 }
225 for (int i = 144; i <= 255; ++i) {
226 lengths.push_back(9);
227 }
228 for (int i = 256; i <= 279; ++i) {
229 lengths.push_back(7);
230 }
231 for (int i = 280; i <= 287; ++i) {
232 lengths.push_back(8);
233 }
234
236}();
237
238inline huffman_tree<int16_t> deflate_fixed_distance_tree = []() {
239 std::vector<uint8_t> lengths;
240
241 for (int i = 0; i <= 31; ++i) {
242 lengths.push_back(5);
243 }
244
246}();
247
248inline void inflate_fixed_block(std::span<std::byte const> bytes, std::size_t& bit_offset, std::size_t max_size, bstring& r)
249{
250 inflate_block(bytes, bit_offset, max_size, deflate_fixed_literal_tree, deflate_fixed_distance_tree, r);
251}
252
253[[nodiscard]] inline huffman_tree<int16_t>
254inflate_code_lengths(std::span<std::byte const> bytes, std::size_t& bit_offset, std::size_t nr_symbols)
255{
256 // The symbols are in different order in the table.
257 constexpr auto symbols = std::array<int16_t, 19>{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
258
259 hi_check(((bit_offset + (3 * static_cast<std::size_t>(nr_symbols)) + 7) >> 3) <= bytes.size(), "Input buffer overrun");
260
261 auto lengths = std::vector<uint8_t>(symbols.size(), 0);
262 for (auto i = 0_uz; i != nr_symbols; ++i) {
263 auto const symbol = symbols[i];
264 lengths[symbol] = narrow_cast<uint8_t>(get_bits(bytes, bit_offset, 3));
265 }
267}
268
269inline std::vector<uint8_t> inflate_lengths(
270 std::span<std::byte const> bytes,
271 std::size_t& bit_offset,
272 std::size_t nr_symbols,
273 huffman_tree<int16_t> const& code_length_tree)
274{
275 auto r = std::vector<uint8_t>{};
276 r.reserve(nr_symbols);
277
278 auto prev_length = 0_uz;
279 while (r.size() < nr_symbols) {
280 // Test only every get_symbol, the trailer is at least 32 bits (Checksum)
281 // - 7 bits maximum huffman code.
282 // - 7 bits extra length.
283 // - 7 bits rounding up to byte.
284 hi_check(((bit_offset + 21) >> 3) <= bytes.size(), "Input buffer overrun");
285 auto const symbol = code_length_tree.get_symbol(bytes, bit_offset);
286
287 switch (symbol) {
288 case 16:
289 {
290 auto copy_length = get_bits(bytes, bit_offset, 2) + 3;
291 while (copy_length--) {
292 r.push_back(static_cast<uint8_t>(prev_length));
293 }
294 }
295 break;
296 case 17:
297 {
298 auto copy_length = get_bits(bytes, bit_offset, 3) + 3;
299 while (copy_length--) {
300 r.push_back(0);
301 }
302 }
303 break;
304 case 18:
305 {
306 auto copy_length = get_bits(bytes, bit_offset, 7) + 11;
307 while (copy_length--) {
308 r.push_back(0);
309 }
310 }
311 break;
312 default:
313 r.push_back(static_cast<uint8_t>(prev_length = symbol));
314 }
315 }
316
317 return r;
318}
319
320inline void inflate_dynamic_block(std::span<std::byte const> bytes, std::size_t& bit_offset, std::size_t max_size, bstring& r)
321{
322 // Test all lengths, the trailer is at least 32 bits (Checksum)
323 // - 14 bits lengths
324 // - 7 bits rounding up to byte.
325 hi_check(((bit_offset + 21) >> 3) <= bytes.size(), "Input buffer overrun");
326 auto const HLIT = get_bits(bytes, bit_offset, 5);
327 auto const HDIST = get_bits(bytes, bit_offset, 5);
328 auto const HCLEN = get_bits(bytes, bit_offset, 4);
329
330 auto const code_length_tree = inflate_code_lengths(bytes, bit_offset, HCLEN + 4);
331
332 auto const lengths = inflate_lengths(bytes, bit_offset, HLIT + HDIST + 258, code_length_tree);
333 hi_check(lengths[256] != 0, "The end-of-block symbol must be in the table");
334
335 auto const lengths_ptr = lengths.data();
336 hi_assert_not_null(lengths_ptr);
337 auto const literal_tree = huffman_tree<int16_t>::from_lengths(lengths_ptr, HLIT + 257);
338 auto const distance_tree = huffman_tree<int16_t>::from_lengths(&lengths_ptr[HLIT + 257], HDIST + 1);
339
340 inflate_block(bytes, bit_offset, max_size, literal_tree, distance_tree, r);
341}
342
343} // namespace detail
344
357hi_export [[nodiscard]] inline bstring
358inflate(std::span<std::byte const> bytes, std::size_t& offset, std::size_t max_size = 0x0100'0000)
359{
360 std::size_t bit_offset = offset * 8;
361
362 auto r = bstring{};
363
364 auto BFINAL = false;
365 do {
366 // Test all lengths, the trailer is at least 32 bits (Checksum)
367 // - 3 bits header
368 // - 7 bits rounding up to byte.
369 hi_check(((bit_offset + 10) >> 3) <= bytes.size(), "Input buffer overrun");
370
371 BFINAL = get_bit(bytes, bit_offset);
372 auto const BTYPE = get_bits(bytes, bit_offset, 2);
373
374 switch (BTYPE) {
375 case 0:
376 detail::inflate_copy_block(bytes, bit_offset, max_size, r);
377 break;
378 case 1:
379 detail::inflate_fixed_block(bytes, bit_offset, max_size, r);
380 break;
381 case 2:
382 detail::inflate_dynamic_block(bytes, bit_offset, max_size, r);
383 break;
384 default:
385 throw parse_error("Reserved block type");
386 }
387
388 } while (!BFINAL);
389
390 offset = (bit_offset + 7) / 8;
391 return r;
392}
393
394}} // namespace hi::v1
The HikoGUI namespace.
Definition array_generic.hpp:20
std::size_t get_bits(std::span< std::byte const > buffer, std::size_t &index, std::size_t length) noexcept
Read a bits from of span of bytes Bits are ordered LSB first.
Definition bits.hpp:55
bool get_bit(std::span< std::byte const > buffer, std::size_t &index) noexcept
Read a single bit from span of bytes Bits are ordered LSB first.
Definition bits.hpp:27
hi_export bstring inflate(std::span< std::byte const > bytes, std::size_t &offset, std::size_t max_size=0x0100 '0000)
Inflate compressed data using the deflate algorithm bytes should include at least 32 bit of trailer,...
Definition inflate.hpp:358
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
static huffman_tree from_lengths(uint8_t const *lengths, std::size_t nr_symbols)
Build a canonical-huffman table from a set of lengths.
Definition huffman.hpp:122
Exception thrown during parsing on an error.
Definition exception_intf.hpp:48
T data(T... args)
T distance(T... args)
T move(T... args)
T push_back(T... args)
T reserve(T... args)