HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
char_converter.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../fixed_string.hpp"
8#include "../memory.hpp"
9#include "../endian.hpp"
10#include <string>
11#include <string_view>
12#if defined(HI_HAS_SSE2)
13#include <emmintrin.h>
14#endif
15
16namespace hi::inline v1 {
17
33template<basic_fixed_string Encoding>
34struct char_map {
72};
73
74template<basic_fixed_string From, basic_fixed_string To>
76public:
79 using from_char_type = from_encoder_type::char_type;
80 using to_char_type = to_encoder_type::char_type;
83
84 template<typename OutRange, typename InRange>
85 [[nodiscard]] constexpr OutRange convert(InRange&& src) const noexcept
86 {
87 using std::cbegin;
88 using std::cend;
89 using std::begin;
90 using std::end;
91
92 hilet[size, valid] = _size(cbegin(src), cend(src));
93
94 auto r = OutRange{};
95 if constexpr (From == To and std::is_same_v<InRange, OutRange>) {
96 if (valid) {
97 r = std::forward<InRange>(src);
98 // If and identity conversion is requested and the src is valid, then shortcut by return the src.
99 return r;
100 }
101 }
102
103 if (size == 0) {
104 return r;
105 }
106
107 r.resize(size);
108 if (From == To and valid) {
109 hi_axiom(size != 0);
110
111 using std::size;
112 std::memcpy(std::addressof(*begin(r)), std::addressof(*cbegin(src)), size(src) * sizeof(from_char_type));
113 } else {
114 _convert(cbegin(src), cend(src), begin(r));
115 }
116 return r;
117 }
118
119 template<typename OutRange, typename It, typename EndIt>
120 [[nodiscard]] constexpr OutRange convert(It first, EndIt last) const noexcept
121 {
122 using std::begin;
123
124 hilet[size, valid] = _size(first, last);
125 auto r = OutRange{};
126 if (size == 0) {
127 return r;
128 }
129
130 r.resize(size);
131 if (From == To and valid) {
132 hi_axiom(size != 0);
133
134 std::memcpy(std::addressof(*begin(r)), std::addressof(*first), std::distance(first, last) * sizeof(from_char_type));
135 } else {
136 _convert(first, last, begin(r));
137 }
138 return r;
139 }
140
141 template<typename OutRange = std::basic_string<to_char_type>>
142 [[nodiscard]] OutRange read(void const *ptr, size_t size, std::endian endian = std::endian::native) noexcept
143 {
144 hi_axiom(ptr != nullptr);
145
146 hilet num_chars = size / sizeof(from_char_type);
147
148 endian = from_encoder_type{}.guess_endian(ptr, size, endian);
149 if (endian == std::endian::native) {
150 if (floor(ptr, sizeof(from_char_type)) == ptr) {
151 return convert<OutRange>(
152 reinterpret_cast<from_char_type const *>(ptr), reinterpret_cast<from_char_type const *>(ptr) + num_chars);
153 } else {
155 tmp.resize(num_chars);
156 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
157 return convert<OutRange>(std::move(tmp));
158 }
159 } else {
161 tmp.resize(num_chars);
162 std::memcpy(std::addressof(*tmp.begin()), ptr, num_chars * sizeof(from_char_type));
163 for (auto &c: tmp) {
164 c = byte_swap(c);
165 }
166 return convert<OutRange>(std::move(tmp));
167 }
168 }
169
170 template<typename InRange>
171 [[nodiscard]] constexpr to_string_type operator()(InRange&& src) const noexcept
172 {
173 return convert<to_string_type>(std::forward<InRange>(src));
174 }
175
176private:
177#if defined(HI_HAS_SSE2)
178 using chunk16_type = __m128i;
179#else
180 using chunk16_type = void;
181#endif
182
183 constexpr static bool _has_read_ascii_chunk16 = true;
184 constexpr static bool _has_write_ascii_chunk16 = true;
185
186 template<typename It, typename EndIt>
187 [[nodiscard]] constexpr void _size_ascii(It& it, EndIt last, size_t& count) const noexcept
188 {
189 if (not std::is_constant_evaluated()) {
190#if defined(HI_HAS_SSE2)
191 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
192 while (std::distance(it, last) >= 16) {
193 hilet chunk = from_encoder_type{}.read_ascii_chunk16(it);
194 hilet ascii_mask = _mm_movemask_epi8(chunk);
195 if (ascii_mask) {
196 // This chunk contains non-ASCII characters.
197 auto partial_count = std::countr_zero(truncate<uint16_t>(ascii_mask));
198 it += partial_count;
199 count += partial_count;
200 break;
201 }
202 it += 16;
203 count += 16;
204 }
205 }
206#endif
207 }
208 }
209
210 template<typename SrcIt, typename SrcEndIt, typename DstIt>
211 void _convert_ascii(SrcIt& src, SrcEndIt src_last, DstIt& dst) const noexcept
212 {
213 if (not std::is_constant_evaluated()) {
214#if defined(HI_HAS_SSE2)
215 if constexpr (_has_read_ascii_chunk16 and _has_write_ascii_chunk16) {
216 while (std::distance(src, src_last) >= 16) {
217 hilet chunk = from_encoder_type{}.read_ascii_chunk16(src);
218 hilet ascii_mask = _mm_movemask_epi8(chunk);
219 if (ascii_mask) {
220 // This chunk contains non-ASCII characters.
221 break;
222 }
223 // The complete chunk only contains ASCII characters.
224 to_encoder_type{}.write_ascii_chunk16(chunk, dst);
225 src += 16;
226 dst += 16;
227 }
228 }
229#endif
230 }
231 }
232
233 template<typename It, typename EndIt>
234 [[nodiscard]] constexpr std::pair<size_t, bool> _size(It it, EndIt last) const noexcept
235 {
236 auto count = 0_uz;
237 auto valid = true;
238 while (true) {
239 // This loop toggles between converting chunks of ASCII characters and converting
240 // a single non-ASCII character.
241 _size_ascii(it, last, count);
242
243 if (it == last) {
244 break;
245 }
246
247 hilet[code_point, read_valid] = from_encoder_type{}.read(it, last);
248 valid &= read_valid;
249
250 hilet[write_count, write_valid] = to_encoder_type{}.size(code_point);
251 count += write_count;
252 valid &= write_valid;
253 }
254
255 return {count, valid};
256 }
257
258 template<typename SrcIt, typename SrcEndIt, typename DstIt>
259 void _convert(SrcIt src, SrcEndIt src_last, DstIt dst) const noexcept
260 {
261 while (true) {
262 // This loop toggles between converting chunks of ASCII characters and converting
263 // a single non-ASCII character.
264 _convert_ascii(src, src_last, dst);
265
266 if (src == src_last) {
267 break;
268 }
269
270 hilet[code_point, from_valid] = from_encoder_type{}.read(src, src_last);
271 to_encoder_type{}.write(code_point, dst);
272 }
273 }
274};
275
276} // namespace hi::inline v1
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
Character encoder/decoder template.
Definition char_converter.hpp:34
Definition char_converter.hpp:75
T addressof(T... args)
T begin(T... args)
T distance(T... args)
T end(T... args)
T memcpy(T... args)
T move(T... args)
T resize(T... args)