HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
char_converter.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "../utility/module.hpp"
12#include <string>
13#include <string_view>
14#if defined(HI_HAS_SSE2)
15#include <emmintrin.h>
16#endif
17
18namespace hi { inline namespace v1 {
19
82template<fixed_string Encoding>
83struct char_map;
84
91template<fixed_string From, fixed_string To>
93public:
94 using from_encoder_type = char_map<From>;
95 using to_encoder_type = char_map<To>;
96 using from_char_type = from_encoder_type::char_type;
97 using to_char_type = to_encoder_type::char_type;
98 using from_string_type = std::basic_string<from_char_type>;
99 using to_string_type = std::basic_string<to_char_type>;
100
107 template<typename OutRange, typename InRange>
108 [[nodiscard]] constexpr OutRange convert(InRange&& src) const noexcept
109 {
110 using std::cbegin;
111 using std::cend;
112 using std::begin;
113 using std::end;
114
115 hilet[size, valid] = _size(cbegin(src), cend(src));
116
117 auto r = OutRange{};
118 if constexpr (From == To and std::is_same_v<InRange, OutRange>) {
119 if (valid) {
120 r = std::forward<InRange>(src);
121 // If and identity conversion is requested and the src is valid, then shortcut by return the src.
122 return r;
123 }
124 }
125
126 if (size == 0) {
127 return r;
128 }
129
130 r.resize(size);
131 if (From == To and valid) {
132 hi_axiom(size != 0);
133
134 using std::size;
135 std::memcpy(std::addressof(*begin(r)), std::addressof(*cbegin(src)), size(src) * sizeof(from_char_type));
136 } else {
137 _convert(cbegin(src), cend(src), begin(r));
138 }
139 return r;
140 }
141
149 template<typename OutRange, typename It, typename EndIt>
150 [[nodiscard]] constexpr OutRange convert(It first, EndIt last) const noexcept
151 {
152 using std::begin;
153
154 hilet[size, valid] = _size(first, last);
155 auto r = OutRange{};
156 if (size == 0) {
157 return r;
158 }
159
160 r.resize(size);
161 if (From == To and valid) {
162 hi_axiom(size != 0);
163
164 std::memcpy(std::addressof(*begin(r)), std::addressof(*first), std::distance(first, last) * sizeof(from_char_type));
165 } else {
166 _convert(first, last, begin(r));
167 }
168 return r;
169 }
170
179 template<typename OutRange = std::basic_string<to_char_type>>
180 [[nodiscard]] OutRange read(void const *ptr, size_t size, std::endian endian = std::endian::native) noexcept
181 {
183
184 hilet num_chars = size / sizeof(from_char_type);
185
186 endian = from_encoder_type{}.guess_endian(ptr, size, endian);
187 if (endian == std::endian::native) {
188 if (floor(ptr, sizeof(from_char_type)) == ptr) {
189 return convert<OutRange>(
190 reinterpret_cast<from_char_type const *>(ptr), reinterpret_cast<from_char_type const *>(ptr) + num_chars);
191 } else {
193 tmp.resize(num_chars);
194 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
195 return convert<OutRange>(std::move(tmp));
196 }
197 } else {
199 tmp.resize(num_chars);
200 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
201 for (auto& c : tmp) {
202 c = byte_swap(c);
203 }
204 return convert<OutRange>(std::move(tmp));
205 }
206 }
207
213 template<typename InRange>
214 [[nodiscard]] constexpr to_string_type operator()(InRange&& src) const noexcept
215 {
216 return convert<to_string_type>(std::forward<InRange>(src));
217 }
218
219private:
220#if defined(HI_HAS_SSE2)
221 using chunk16_type = __m128i;
222#else
223 using chunk16_type = void;
224#endif
225
226 constexpr static bool _has_read_ascii_chunk16 = true;
227 constexpr static bool _has_write_ascii_chunk16 = true;
228
229 template<typename It, typename EndIt>
230 [[nodiscard]] constexpr void _size_ascii(It& it, EndIt last, size_t& count) const noexcept
231 {
232 if (not std::is_constant_evaluated()) {
233#if defined(HI_HAS_SSE2)
234 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
235 while (std::distance(it, last) >= 16) {
236 hilet chunk = from_encoder_type{}.read_ascii_chunk16(it);
237 hilet ascii_mask = _mm_movemask_epi8(chunk);
238 if (ascii_mask) {
239 // This chunk contains non-ASCII characters.
240 auto partial_count = std::countr_zero(truncate<uint16_t>(ascii_mask));
241 it += partial_count;
242 count += partial_count;
243 break;
244 }
245 it += 16;
246 count += 16;
247 }
248 }
249#endif
250 }
251 }
252
253 template<typename SrcIt, typename SrcEndIt, typename DstIt>
254 void _convert_ascii(SrcIt& src, SrcEndIt src_last, DstIt& dst) const noexcept
255 {
256 if (not std::is_constant_evaluated()) {
257#if defined(HI_HAS_SSE2)
258 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
259 while (std::distance(src, src_last) >= 16) {
260 hilet chunk = from_encoder_type{}.read_ascii_chunk16(src);
261 hilet ascii_mask = _mm_movemask_epi8(chunk);
262 if (ascii_mask) {
263 // This chunk contains non-ASCII characters.
264 break;
265 }
266 // The complete chunk only contains ASCII characters.
267 to_encoder_type{}.write_ascii_chunk16(chunk, dst);
268 src += 16;
269 dst += 16;
270 }
271 }
272#endif
273 }
274 }
275
276 template<typename It, typename EndIt>
277 [[nodiscard]] constexpr std::pair<size_t, bool> _size(It it, EndIt last) const noexcept
278 {
279 auto count = 0_uz;
280 auto valid = true;
281 while (true) {
282 // This loop toggles between converting chunks of ASCII characters and converting
283 // a single non-ASCII character.
284 _size_ascii(it, last, count);
285
286 if (it == last) {
287 break;
288 }
289
290 hilet[code_point, read_valid] = from_encoder_type{}.read(it, last);
291 valid &= read_valid;
292
293 hilet[write_count, write_valid] = to_encoder_type{}.size(code_point);
294 count += write_count;
295 valid &= write_valid;
296 }
297
298 return {count, valid};
299 }
300
301 template<typename SrcIt, typename SrcEndIt, typename DstIt>
302 void _convert(SrcIt src, SrcEndIt src_last, DstIt dst) const noexcept
303 {
304 while (true) {
305 // This loop toggles between converting chunks of ASCII characters and converting
306 // a single non-ASCII character.
307 _convert_ascii(src, src_last, dst);
308
309 if (src == src_last) {
310 break;
311 }
312
313 hilet[code_point, from_valid] = from_encoder_type{}.read(src, src_last);
314 to_encoder_type{}.write(code_point, dst);
315 }
316 }
317};
318
319}} // namespace hi::v1
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_assert_not_null(x,...)
Assert if an expression is not nullptr.
Definition assert.hpp:223
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ begin
Start from the beginning of the file.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
Character encoder/decoder template.
Definition char_converter.hpp:83
A converter between character encodings.
Definition char_converter.hpp:92
constexpr to_string_type operator()(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:214
constexpr OutRange convert(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:108
constexpr OutRange convert(It first, EndIt last) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:150
OutRange read(void const *ptr, size_t size, std::endian endian=std::endian::native) noexcept
Read text from a byte array.
Definition char_converter.hpp:180
T addressof(T... args)
T begin(T... args)
T count(T... args)
T distance(T... args)
T end(T... args)
T memcpy(T... args)
T move(T... args)
T resize(T... args)