HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
otype_cmap.hpp
1// Copyright Take Vos 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "otype_utilities.hpp"
8#include "font_char_map.hpp"
9#include "../macros.hpp"
10
11hi_export_module(hikogui.font.otype_cmap);
12
13hi_export namespace hi { inline namespace v1 {
14
15[[nodiscard]] inline std::span<std::byte const>
16otype_cmap_find(std::span<std::byte const> bytes, uint16_t platform_id, uint16_t platform_specific_id)
17{
18 struct header_type {
19 big_uint16_buf_t version;
20 big_uint16_buf_t num_tables;
21 };
22
23 struct entry_type {
24 big_uint16_buf_t platform_id;
25 big_uint16_buf_t platform_specific_id;
26 big_uint32_buf_t offset;
27 };
28
29 std::size_t offset = 0;
30
31 auto const& header = implicit_cast<header_type>(offset, bytes);
32 hi_check(*header.version == 0, "CMAP version is not 0");
33
34 auto const entries = implicit_cast<entry_type>(offset, bytes, *header.num_tables);
35
36 auto key = (truncate<uint32_t>(platform_id) << 16) | truncate<uint32_t>(platform_specific_id);
37 if (auto const entry = fast_binary_search_eq<std::endian::big>(entries, key)) {
38 return hi_check_subspan(bytes, *entry->offset);
39 } else {
40 return {};
41 }
42}
43
44[[nodiscard]] inline font_char_map otype_cmap_parse_map_4(std::span<std::byte const> over_sized_bytes)
45{
46 struct header_type {
47 big_uint16_buf_t format;
48 big_uint16_buf_t length;
49 big_uint16_buf_t language;
50 big_uint16_buf_t seg_count_x2;
51 big_uint16_buf_t search_range;
52 big_uint16_buf_t entry_selector;
53 big_uint16_buf_t range_shift;
54 };
55
56 auto offset = 0_uz;
57 auto const& header = implicit_cast<header_type>(offset, over_sized_bytes);
58 hi_axiom(*header.format == 4);
59 auto const length = *header.length;
60 auto const bytes = hi_check_subspan(over_sized_bytes, 0, length);
61
62 auto const seg_count = *header.seg_count_x2 / 2;
63
64 auto const end_codes = implicit_cast<big_uint16_buf_t>(offset, bytes, seg_count);
65 offset += sizeof(uint16_t); // reservedPad
66 auto const start_codes = implicit_cast<big_uint16_buf_t>(offset, bytes, seg_count);
67
68 auto const id_deltas = implicit_cast<big_uint16_buf_t>(offset, bytes, seg_count);
69
70 auto const id_range_offsets = implicit_cast<big_uint16_buf_t>(offset, bytes, seg_count);
71
72 auto const glyph_id_array_count = (bytes.size() - offset) / sizeof(big_uint16_buf_t);
73 auto const glyph_id_array = implicit_cast<big_uint16_buf_t>(offset, bytes, glyph_id_array_count);
74
75 auto r = font_char_map{};
76 r.reserve(seg_count);
77 auto prev_end_code_point = char32_t{0};
78 for (auto i = 0_uz; i != seg_count; ++i) {
79 auto const end_code_point = char_cast<char32_t>(*end_codes[i]);
80 auto const start_code_point = char_cast<char32_t>(*start_codes[i]);
81
82 hi_check(start_code_point <= end_code_point, "'cmap' subtable 4, start code-point must come before end code-point.");
83 hi_check(
84 i == 0 or prev_end_code_point < start_code_point,
85 "'cmap' subtable 4, all entries must be non-overlapping and ordered.");
86
87 if (start_code_point == 0xffff and end_code_point == 0xffff) {
88 // The last entry is a single character explicit terminator and does not need to be added.
89 break;
90 }
91
92 auto id_range_offset = wide_cast<size_t>(*id_range_offsets[i]);
93 if (id_range_offset == 0) {
94 // Simple modulo 65536 delta on the start_code_point to get a glyph_id.
95 auto start_glyph_id = *id_deltas[i];
96 start_glyph_id += char_cast<uint16_t>(start_code_point);
97
98 hi_check(
99 start_glyph_id + (end_code_point - start_code_point) + 1_uz < 0xffff,
100 "'cmap' subtable 4, glyph_id must be in range 0 to 0xfffe.");
101 r.add(start_code_point, end_code_point, start_glyph_id);
102
103 } else {
104 // The formula from the specification:
105 // `glyphIndex = *( &idRangeOffset[i] + idRangeOffset[i] / 2 + (c - startCode[i]) )`
106
107 // Get the offset to the glyph_id_array.
108 id_range_offset /= sizeof(big_uint16_buf_t);
109 // Get the index in the glyph_index_table. By subtracting the rest of the id_range_offsets table.
110 id_range_offset -= seg_count - i;
111
112 // When using the glyph_index_table, add glyphs one by one.
113 auto const code_point_count = end_code_point - start_code_point + 1;
114 for (auto j = 0_uz; j != code_point_count; ++j) {
115 auto const code_point = char_cast<char32_t>(start_code_point + j);
116
117 auto const glyph_id = *hi_check_at(glyph_id_array, id_range_offset + j);
118 hi_check(glyph_id < 0xfffe, "'cmap' subtable 4, glyph_id must be in range 0 to 0xfffe.");
119 r.add(code_point, code_point, glyph_id);
120 }
121 }
122
123 prev_end_code_point = end_code_point;
124 }
125
126 r.prepare();
127 return r;
128}
129
130[[nodiscard]] inline font_char_map otype_cmap_parse_map_6(std::span<std::byte const> over_sized_bytes)
131{
132 struct header_type {
133 big_uint16_buf_t format;
134 big_uint16_buf_t length;
135 big_uint16_buf_t language;
136 big_uint16_buf_t first_code;
137 big_uint16_buf_t entry_count;
138 };
139
140 auto offset = 0_uz;
141 auto const& header = implicit_cast<header_type>(offset, over_sized_bytes);
142 hi_axiom(*header.format == 6);
143 auto const bytes = hi_check_subspan(over_sized_bytes, 0, *header.length);
144
145 auto const entry_count = *header.entry_count;
146 auto const entries = implicit_cast<big_uint16_buf_t>(offset, bytes, entry_count);
147
148 auto r = font_char_map{};
149 r.reserve(entry_count);
150 auto code_point = char_cast<char32_t>(*header.first_code);
151 for (auto i = 0_uz; i != entry_count; ++i, ++code_point) {
152 auto const glyph_id = *entries[i];
153 hi_check(glyph_id < 0xfffe, "'cmap' subtable 6, glyph_id must be in range 0 to 0xfffe.");
154 r.add(code_point, code_point, glyph_id);
155 }
156
157 // Add terminator to the table.
158 r.prepare();
159 return r;
160}
161
162[[nodiscard]] inline font_char_map otype_cmap_parse_map_12(std::span<std::byte const> over_sized_bytes)
163{
164 struct header_type {
165 big_uint16_buf_t format;
166 big_uint16_buf_t reserved;
167 big_uint32_buf_t length;
168 big_uint32_buf_t language;
169 big_uint32_buf_t num_groups;
170 };
171
172 struct entry_type {
173 big_uint32_buf_t start_char_code;
174 big_uint32_buf_t end_char_code;
175 big_uint32_buf_t start_glyph_id;
176 };
177
178 auto offset = 0_uz;
179 auto const& header = implicit_cast<header_type>(offset, over_sized_bytes);
180 hi_axiom(*header.format == 12);
181 auto const bytes = hi_check_subspan(over_sized_bytes, 0, *header.length);
182
183 auto const entries = implicit_cast<entry_type>(offset, bytes, *header.num_groups);
184
185 auto r = font_char_map{};
186 r.reserve(*header.num_groups);
187 for (auto const& entry : entries) {
188 auto const start_code_point = char_cast<char32_t>(*entry.start_char_code);
189 auto const end_code_point = char_cast<char32_t>(*entry.end_char_code);
190 hi_check(start_code_point <= end_code_point, "'cmap' subtable 12, has invalid code-point range.");
191
192 auto const start_glyph_id = *entry.start_glyph_id;
193 hi_check(
194 start_glyph_id + (end_code_point - start_code_point) + 1_uz < 0xffff,
195 "'cmap' subtable 12, glyph_id must be in range 0 to 0xfffe.");
196 r.add(start_code_point, end_code_point, narrow_cast<uint16_t>(start_glyph_id));
197 }
198
199 // Add terminator to the table.
200 r.prepare();
201 return r;
202}
203
204[[nodiscard]] inline font_char_map otype_cmap_parse_map(std::span<std::byte const> bytes)
205{
206 // The first 16 bits of a cmap sub-table always contain the format.
207 auto const format = *implicit_cast<big_uint16_buf_t>(bytes);
208
209 switch (format) {
210 case 4:
211 return otype_cmap_parse_map_4(bytes);
212 case 6:
213 return otype_cmap_parse_map_6(bytes);
214 case 12:
215 return otype_cmap_parse_map_12(bytes);
216 default:
217 // Unknown format, let otype_cmap_parse try the next sub-table.
218 return {};
219 }
220}
221
222[[nodiscard]] inline font_char_map otype_cmap_parse(std::span<std::byte const> bytes)
223{
224 constexpr auto search_order = std::array{
225 std::pair{uint16_t{0}, uint16_t{4}}, // Unicode - Unicode 2.0 non-BMP
226 std::pair{uint16_t{0}, uint16_t{3}}, // Unicode - Unicode 2.0 BMP-only
227 std::pair{uint16_t{0}, uint16_t{2}}, // Unicode - ISO 10646 1993
228 std::pair{uint16_t{0}, uint16_t{1}}, // Unicode - Version 1.1
229 std::pair{uint16_t{3}, uint16_t{10}}, // Microsoft Windows - Unicode 32-bit
230 std::pair{uint16_t{3}, uint16_t{1}}, // Microsoft Windows - Unicode 16-bit
231 std::pair{uint16_t{3}, uint16_t{0}} // Microsoft Windows - Symbol.
232 };
233
234 for (auto const[platform_id, platform_specific_id] : search_order) {
235 if (auto const map_bytes = otype_cmap_find(bytes, platform_id, platform_specific_id); not map_bytes.empty()) {
236 if (auto r = otype_cmap_parse_map(map_bytes); not r.empty()) {
237 return r;
238 }
239 }
240 }
241
242 throw parse_error("'cmap' no compatible character map found.");
243}
244
245}} // namespace hi::v1
Defined font_char_map type.
The HikoGUI namespace.
Definition array_generic.hpp:20
DOXYGEN BUG.
Definition algorithm_misc.hpp:20