HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
char_converter.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "../utility/utility.hpp"
12#include "../macros.hpp"
13#include <string>
14#include <string_view>
15#include <bit>
16#include <compare>
17#include <array>
18#if defined(HI_HAS_SSE2)
19#include <emmintrin.h>
20#endif
21
22hi_export_module(hikogui.char_maps.char_converter);
23
24hi_export namespace hi { inline namespace v1 {
25
88template<fixed_string Encoding>
89struct char_map;
90
97template<fixed_string From, fixed_string To>
99public:
100 using from_encoder_type = char_map<From>;
101 using to_encoder_type = char_map<To>;
102 using from_char_type = from_encoder_type::char_type;
103 using to_char_type = to_encoder_type::char_type;
104 using from_string_type = std::basic_string<from_char_type>;
105 using to_string_type = std::basic_string<to_char_type>;
106
113 template<typename OutRange, typename InRange>
114 [[nodiscard]] constexpr OutRange convert(InRange&& src) const noexcept
115 {
116 using std::cbegin;
117 using std::cend;
118 using std::begin;
119 using std::end;
120
121 auto const[size, valid] = _size(cbegin(src), cend(src));
122
123 auto r = OutRange{};
124 if constexpr (From == To and std::is_same_v<InRange, OutRange>) {
125 if (valid) {
126 r = std::forward<InRange>(src);
127 // If and identity conversion is requested and the src is valid, then shortcut by return the src.
128 return r;
129 }
130 }
131
132 if (size == 0) {
133 return r;
134 }
135
136 r.resize(size);
137 if (From == To and valid) {
138 hi_axiom(size != 0);
139
140 using std::size;
141 std::memcpy(std::addressof(*begin(r)), std::addressof(*cbegin(src)), size(src) * sizeof(from_char_type));
142 } else {
143 _convert(cbegin(src), cend(src), begin(r));
144 }
145 return r;
146 }
147
155 template<typename OutRange, typename It, typename EndIt>
156 [[nodiscard]] constexpr OutRange convert(It first, EndIt last) const noexcept
157 {
158 using std::begin;
159
160 auto const[size, valid] = _size(first, last);
161 auto r = OutRange{};
162 if (size == 0) {
163 return r;
164 }
165
166 r.resize(size);
167 if (From == To and valid) {
168 hi_axiom(size != 0);
169
170 std::memcpy(std::addressof(*begin(r)), std::addressof(*first), std::distance(first, last) * sizeof(from_char_type));
171 } else {
172 _convert(first, last, begin(r));
173 }
174 return r;
175 }
176
185 template<typename OutRange = std::basic_string<to_char_type>>
186 [[nodiscard]] OutRange read(void const *ptr, size_t size, std::endian endian = std::endian::native) noexcept
187 {
188 hi_assert_not_null(ptr);
189
190 auto const num_chars = size / sizeof(from_char_type);
191
192 endian = from_encoder_type{}.guess_endian(ptr, size, endian);
193 if (endian == std::endian::native) {
194 if (floor(ptr, sizeof(from_char_type)) == ptr) {
195 return convert<OutRange>(
196 reinterpret_cast<from_char_type const *>(ptr), reinterpret_cast<from_char_type const *>(ptr) + num_chars);
197 } else {
199 tmp.resize(num_chars);
200 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
201 return convert<OutRange>(std::move(tmp));
202 }
203 } else {
205 tmp.resize(num_chars);
206 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
207 for (auto& c : tmp) {
208 c = std::byteswap(c);
209 }
210 return convert<OutRange>(std::move(tmp));
211 }
212 }
213
219 template<typename InRange>
220 [[nodiscard]] constexpr to_string_type operator()(InRange&& src) const noexcept
221 {
222 return convert<to_string_type>(std::forward<InRange>(src));
223 }
224
225private:
226#if defined(HI_HAS_SSE2)
227 using chunk16_type = __m128i;
228#else
229 using chunk16_type = void;
230#endif
231
232 constexpr static bool _has_read_ascii_chunk16 = true;
233 constexpr static bool _has_write_ascii_chunk16 = true;
234
235 template<typename It, typename EndIt>
236 constexpr void _size_ascii(It& it, EndIt last, size_t& count) const noexcept
237 {
238 if (not std::is_constant_evaluated()) {
239#if defined(HI_HAS_SSE2)
240 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
241 while (std::distance(it, last) >= 16) {
242 auto const chunk = from_encoder_type{}.read_ascii_chunk16(it);
243 auto const ascii_mask = _mm_movemask_epi8(chunk);
244 if (ascii_mask) {
245 // This chunk contains non-ASCII characters.
246 auto partial_count = std::countr_zero(truncate<uint16_t>(ascii_mask));
247 it += partial_count;
248 count += partial_count;
249 break;
250 }
251 it += 16;
252 count += 16;
253 }
254 }
255#endif
256 }
257 }
258
259 template<typename SrcIt, typename SrcEndIt, typename DstIt>
260 void _convert_ascii(SrcIt& src, SrcEndIt src_last, DstIt& dst) const noexcept
261 {
262 if (not std::is_constant_evaluated()) {
263#if defined(HI_HAS_SSE2)
264 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
265 while (std::distance(src, src_last) >= 16) {
266 auto const chunk = from_encoder_type{}.read_ascii_chunk16(src);
267 auto const ascii_mask = _mm_movemask_epi8(chunk);
268 if (ascii_mask) {
269 // This chunk contains non-ASCII characters.
270 break;
271 }
272 // The complete chunk only contains ASCII characters.
273 to_encoder_type{}.write_ascii_chunk16(chunk, dst);
274 src += 16;
275 dst += 16;
276 }
277 }
278#endif
279 }
280 }
281
282 template<typename It, typename EndIt>
283 [[nodiscard]] constexpr std::pair<size_t, bool> _size(It it, EndIt last) const noexcept
284 {
285 auto count = 0_uz;
286 auto valid = true;
287 while (true) {
288 // This loop toggles between converting chunks of ASCII characters and converting
289 // a single non-ASCII character.
290 _size_ascii(it, last, count);
291
292 if (it == last) {
293 break;
294 }
295
296 auto const[code_point, read_valid] = from_encoder_type{}.read(it, last);
297 valid &= read_valid;
298
299 auto const[write_count, write_valid] = to_encoder_type{}.size(code_point);
300 count += write_count;
301 valid &= write_valid;
302 }
303
304 return {count, valid};
305 }
306
307 template<typename SrcIt, typename SrcEndIt, typename DstIt>
308 void _convert(SrcIt src, SrcEndIt src_last, DstIt dst) const noexcept
309 {
310 while (true) {
311 // This loop toggles between converting chunks of ASCII characters and converting
312 // a single non-ASCII character.
313 _convert_ascii(src, src_last, dst);
314
315 if (src == src_last) {
316 break;
317 }
318
319 auto const[code_point, from_valid] = from_encoder_type{}.read(src, src_last);
320 to_encoder_type{}.write(code_point, dst);
321 }
322 }
323};
324
325}} // namespace hi::v1
@ begin
Start from the beginning of the file.
The HikoGUI namespace.
Definition array_generic.hpp:20
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
Character encoder/decoder template.
Definition char_converter.hpp:89
A converter between character encodings.
Definition char_converter.hpp:98
constexpr to_string_type operator()(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:220
constexpr OutRange convert(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:114
constexpr OutRange convert(It first, EndIt last) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:156
OutRange read(void const *ptr, size_t size, std::endian endian=std::endian::native) noexcept
Read text from a byte array.
Definition char_converter.hpp:186
T addressof(T... args)
T begin(T... args)
T count(T... args)
T distance(T... args)
T end(T... args)
T memcpy(T... args)
T move(T... args)
T resize(T... args)