HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
char_converter.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "../fixed_string.hpp"
12#include "../memory.hpp"
13#include "../endian.hpp"
14#include <string>
15#include <string_view>
16#if defined(HI_HAS_SSE2)
17#include <emmintrin.h>
18#endif
19
20namespace hi { inline namespace v1 {
21
84template<fixed_string Encoding>
85struct char_map;
86
93template<fixed_string From, fixed_string To>
95public:
96 using from_encoder_type = char_map<From>;
97 using to_encoder_type = char_map<To>;
98 using from_char_type = from_encoder_type::char_type;
99 using to_char_type = to_encoder_type::char_type;
100 using from_string_type = std::basic_string<from_char_type>;
101 using to_string_type = std::basic_string<to_char_type>;
102
109 template<typename OutRange, typename InRange>
110 [[nodiscard]] constexpr OutRange convert(InRange&& src) const noexcept
111 {
112 using std::cbegin;
113 using std::cend;
114 using std::begin;
115 using std::end;
116
117 hilet[size, valid] = _size(cbegin(src), cend(src));
118
119 auto r = OutRange{};
120 if constexpr (From == To and std::is_same_v<InRange, OutRange>) {
121 if (valid) {
122 r = std::forward<InRange>(src);
123 // If and identity conversion is requested and the src is valid, then shortcut by return the src.
124 return r;
125 }
126 }
127
128 if (size == 0) {
129 return r;
130 }
131
132 r.resize(size);
133 if (From == To and valid) {
134 hi_axiom(size != 0);
135
136 using std::size;
137 std::memcpy(std::addressof(*begin(r)), std::addressof(*cbegin(src)), size(src) * sizeof(from_char_type));
138 } else {
139 _convert(cbegin(src), cend(src), begin(r));
140 }
141 return r;
142 }
143
151 template<typename OutRange, typename It, typename EndIt>
152 [[nodiscard]] constexpr OutRange convert(It first, EndIt last) const noexcept
153 {
154 using std::begin;
155
156 hilet[size, valid] = _size(first, last);
157 auto r = OutRange{};
158 if (size == 0) {
159 return r;
160 }
161
162 r.resize(size);
163 if (From == To and valid) {
164 hi_axiom(size != 0);
165
166 std::memcpy(std::addressof(*begin(r)), std::addressof(*first), std::distance(first, last) * sizeof(from_char_type));
167 } else {
168 _convert(first, last, begin(r));
169 }
170 return r;
171 }
172
181 template<typename OutRange = std::basic_string<to_char_type>>
182 [[nodiscard]] OutRange read(void const *ptr, size_t size, std::endian endian = std::endian::native) noexcept
183 {
184 hi_axiom(ptr != nullptr);
185
186 hilet num_chars = size / sizeof(from_char_type);
187
188 endian = from_encoder_type{}.guess_endian(ptr, size, endian);
189 if (endian == std::endian::native) {
190 if (floor(ptr, sizeof(from_char_type)) == ptr) {
191 return convert<OutRange>(
192 reinterpret_cast<from_char_type const *>(ptr), reinterpret_cast<from_char_type const *>(ptr) + num_chars);
193 } else {
195 tmp.resize(num_chars);
196 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
197 return convert<OutRange>(std::move(tmp));
198 }
199 } else {
201 tmp.resize(num_chars);
202 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
203 for (auto& c : tmp) {
204 c = byte_swap(c);
205 }
206 return convert<OutRange>(std::move(tmp));
207 }
208 }
209
215 template<typename InRange>
216 [[nodiscard]] constexpr to_string_type operator()(InRange&& src) const noexcept
217 {
218 return convert<to_string_type>(std::forward<InRange>(src));
219 }
220
221private:
222#if defined(HI_HAS_SSE2)
223 using chunk16_type = __m128i;
224#else
225 using chunk16_type = void;
226#endif
227
228 constexpr static bool _has_read_ascii_chunk16 = true;
229 constexpr static bool _has_write_ascii_chunk16 = true;
230
231 template<typename It, typename EndIt>
232 [[nodiscard]] constexpr void _size_ascii(It& it, EndIt last, size_t& count) const noexcept
233 {
234 if (not std::is_constant_evaluated()) {
235#if defined(HI_HAS_SSE2)
236 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
237 while (std::distance(it, last) >= 16) {
238 hilet chunk = from_encoder_type{}.read_ascii_chunk16(it);
239 hilet ascii_mask = _mm_movemask_epi8(chunk);
240 if (ascii_mask) {
241 // This chunk contains non-ASCII characters.
242 auto partial_count = std::countr_zero(truncate<uint16_t>(ascii_mask));
243 it += partial_count;
244 count += partial_count;
245 break;
246 }
247 it += 16;
248 count += 16;
249 }
250 }
251#endif
252 }
253 }
254
255 template<typename SrcIt, typename SrcEndIt, typename DstIt>
256 void _convert_ascii(SrcIt& src, SrcEndIt src_last, DstIt& dst) const noexcept
257 {
258 if (not std::is_constant_evaluated()) {
259#if defined(HI_HAS_SSE2)
260 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
261 while (std::distance(src, src_last) >= 16) {
262 hilet chunk = from_encoder_type{}.read_ascii_chunk16(src);
263 hilet ascii_mask = _mm_movemask_epi8(chunk);
264 if (ascii_mask) {
265 // This chunk contains non-ASCII characters.
266 break;
267 }
268 // The complete chunk only contains ASCII characters.
269 to_encoder_type{}.write_ascii_chunk16(chunk, dst);
270 src += 16;
271 dst += 16;
272 }
273 }
274#endif
275 }
276 }
277
278 template<typename It, typename EndIt>
279 [[nodiscard]] constexpr std::pair<size_t, bool> _size(It it, EndIt last) const noexcept
280 {
281 auto count = 0_uz;
282 auto valid = true;
283 while (true) {
284 // This loop toggles between converting chunks of ASCII characters and converting
285 // a single non-ASCII character.
286 _size_ascii(it, last, count);
287
288 if (it == last) {
289 break;
290 }
291
292 hilet[code_point, read_valid] = from_encoder_type{}.read(it, last);
293 valid &= read_valid;
294
295 hilet[write_count, write_valid] = to_encoder_type{}.size(code_point);
296 count += write_count;
297 valid &= write_valid;
298 }
299
300 return {count, valid};
301 }
302
303 template<typename SrcIt, typename SrcEndIt, typename DstIt>
304 void _convert(SrcIt src, SrcEndIt src_last, DstIt dst) const noexcept
305 {
306 while (true) {
307 // This loop toggles between converting chunks of ASCII characters and converting
308 // a single non-ASCII character.
309 _convert_ascii(src, src_last, dst);
310
311 if (src == src_last) {
312 break;
313 }
314
315 hilet[code_point, from_valid] = from_encoder_type{}.read(src, src_last);
316 to_encoder_type{}.write(code_point, dst);
317 }
318 }
319};
320
321}} // namespace hi::v1
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
DOXYGEN BUG.
Definition algorithm.hpp:15
The HikoGUI namespace.
Definition ascii.hpp:19
Character encoder/decoder template.
Definition char_converter.hpp:85
A converter between character encodings.
Definition char_converter.hpp:94
constexpr to_string_type operator()(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:216
constexpr OutRange convert(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:110
constexpr OutRange convert(It first, EndIt last) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:152
OutRange read(void const *ptr, size_t size, std::endian endian=std::endian::native) noexcept
Read text from a byte array.
Definition char_converter.hpp:182
T addressof(T... args)
T begin(T... args)
T count(T... args)
T distance(T... args)
T end(T... args)
T memcpy(T... args)
T move(T... args)
T resize(T... args)