HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
char_converter.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "../utility/utility.hpp"
12#include "../macros.hpp"
13#include <string>
14#include <string_view>
15#if defined(HI_HAS_SSE2)
16#include <emmintrin.h>
17#endif
18
19
20
21namespace hi { inline namespace v1 {
22
85template<fixed_string Encoding>
86struct char_map;
87
94template<fixed_string From, fixed_string To>
96public:
97 using from_encoder_type = char_map<From>;
98 using to_encoder_type = char_map<To>;
99 using from_char_type = from_encoder_type::char_type;
100 using to_char_type = to_encoder_type::char_type;
101 using from_string_type = std::basic_string<from_char_type>;
102 using to_string_type = std::basic_string<to_char_type>;
103
110 template<typename OutRange, typename InRange>
111 [[nodiscard]] constexpr OutRange convert(InRange&& src) const noexcept
112 {
113 using std::cbegin;
114 using std::cend;
115 using std::begin;
116 using std::end;
117
118 hilet[size, valid] = _size(cbegin(src), cend(src));
119
120 auto r = OutRange{};
121 if constexpr (From == To and std::is_same_v<InRange, OutRange>) {
122 if (valid) {
123 r = std::forward<InRange>(src);
124 // If and identity conversion is requested and the src is valid, then shortcut by return the src.
125 return r;
126 }
127 }
128
129 if (size == 0) {
130 return r;
131 }
132
133 r.resize(size);
134 if (From == To and valid) {
135 hi_axiom(size != 0);
136
137 using std::size;
138 std::memcpy(std::addressof(*begin(r)), std::addressof(*cbegin(src)), size(src) * sizeof(from_char_type));
139 } else {
140 _convert(cbegin(src), cend(src), begin(r));
141 }
142 return r;
143 }
144
152 template<typename OutRange, typename It, typename EndIt>
153 [[nodiscard]] constexpr OutRange convert(It first, EndIt last) const noexcept
154 {
155 using std::begin;
156
157 hilet[size, valid] = _size(first, last);
158 auto r = OutRange{};
159 if (size == 0) {
160 return r;
161 }
162
163 r.resize(size);
164 if (From == To and valid) {
165 hi_axiom(size != 0);
166
167 std::memcpy(std::addressof(*begin(r)), std::addressof(*first), std::distance(first, last) * sizeof(from_char_type));
168 } else {
169 _convert(first, last, begin(r));
170 }
171 return r;
172 }
173
182 template<typename OutRange = std::basic_string<to_char_type>>
183 [[nodiscard]] OutRange read(void const *ptr, size_t size, std::endian endian = std::endian::native) noexcept
184 {
185 hi_assert_not_null(ptr);
186
187 hilet num_chars = size / sizeof(from_char_type);
188
189 endian = from_encoder_type{}.guess_endian(ptr, size, endian);
190 if (endian == std::endian::native) {
191 if (floor(ptr, sizeof(from_char_type)) == ptr) {
192 return convert<OutRange>(
193 reinterpret_cast<from_char_type const *>(ptr), reinterpret_cast<from_char_type const *>(ptr) + num_chars);
194 } else {
197 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
199 }
200 } else {
203 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
204 for (auto& c : tmp) {
205 c = std::byteswap(c);
206 }
208 }
209 }
210
216 template<typename InRange>
217 [[nodiscard]] constexpr to_string_type operator()(InRange&& src) const noexcept
218 {
219 return convert<to_string_type>(std::forward<InRange>(src));
220 }
221
222private:
223#if defined(HI_HAS_SSE2)
224 using chunk16_type = __m128i;
225#else
226 using chunk16_type = void;
227#endif
228
229 constexpr static bool _has_read_ascii_chunk16 = true;
230 constexpr static bool _has_write_ascii_chunk16 = true;
231
232 template<typename It, typename EndIt>
233 [[nodiscard]] constexpr void _size_ascii(It& it, EndIt last, size_t& count) const noexcept
234 {
235 if (not std::is_constant_evaluated()) {
236#if defined(HI_HAS_SSE2)
237 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
238 while (std::distance(it, last) >= 16) {
239 hilet chunk = from_encoder_type{}.read_ascii_chunk16(it);
241 if (ascii_mask) {
242 // This chunk contains non-ASCII characters.
243 auto partial_count = std::countr_zero(truncate<uint16_t>(ascii_mask));
244 it += partial_count;
245 count += partial_count;
246 break;
247 }
248 it += 16;
249 count += 16;
250 }
251 }
252#endif
253 }
254 }
255
256 template<typename SrcIt, typename SrcEndIt, typename DstIt>
257 void _convert_ascii(SrcIt& src, SrcEndIt src_last, DstIt& dst) const noexcept
258 {
259 if (not std::is_constant_evaluated()) {
260#if defined(HI_HAS_SSE2)
261 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
262 while (std::distance(src, src_last) >= 16) {
263 hilet chunk = from_encoder_type{}.read_ascii_chunk16(src);
265 if (ascii_mask) {
266 // This chunk contains non-ASCII characters.
267 break;
268 }
269 // The complete chunk only contains ASCII characters.
270 to_encoder_type{}.write_ascii_chunk16(chunk, dst);
271 src += 16;
272 dst += 16;
273 }
274 }
275#endif
276 }
277 }
278
279 template<typename It, typename EndIt>
280 [[nodiscard]] constexpr std::pair<size_t, bool> _size(It it, EndIt last) const noexcept
281 {
282 auto count = 0_uz;
283 auto valid = true;
284 while (true) {
285 // This loop toggles between converting chunks of ASCII characters and converting
286 // a single non-ASCII character.
287 _size_ascii(it, last, count);
288
289 if (it == last) {
290 break;
291 }
292
293 hilet[code_point, read_valid] = from_encoder_type{}.read(it, last);
294 valid &= read_valid;
295
296 hilet[write_count, write_valid] = to_encoder_type{}.size(code_point);
298 valid &= write_valid;
299 }
300
301 return {count, valid};
302 }
303
304 template<typename SrcIt, typename SrcEndIt, typename DstIt>
305 void _convert(SrcIt src, SrcEndIt src_last, DstIt dst) const noexcept
306 {
307 while (true) {
308 // This loop toggles between converting chunks of ASCII characters and converting
309 // a single non-ASCII character.
310 _convert_ascii(src, src_last, dst);
311
312 if (src == src_last) {
313 break;
314 }
315
316 hilet[code_point, from_valid] = from_encoder_type{}.read(src, src_last);
317 to_encoder_type{}.write(code_point, dst);
318 }
319 }
320};
321
322}} // namespace hi::v1
@ begin
Start from the beginning of the file.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
Character encoder/decoder template.
Definition char_converter.hpp:86
A converter between character encodings.
Definition char_converter.hpp:95
constexpr to_string_type operator()(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:217
constexpr OutRange convert(InRange &&src) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:111
constexpr OutRange convert(It first, EndIt last) const noexcept
Convert text between the given encodings.
Definition char_converter.hpp:153
OutRange read(void const *ptr, size_t size, std::endian endian=std::endian::native) noexcept
Read text from a byte array.
Definition char_converter.hpp:183
T addressof(T... args)
T begin(T... args)
T count(T... args)
T distance(T... args)
T end(T... args)
T memcpy(T... args)
T move(T... args)
T resize(T... args)