37 using char_type = char;
38 using fallback_encoder_type =
char_map<
"cp-1252">;
39 using fallback_char_type = fallback_encoder_type::char_type;
41 [[nodiscard]]
constexpr std::endian guess_endian(
void const *ptr,
size_t size, std::endian endian)
const noexcept
43 return std::endian::native;
48 auto const str = std::string_view(&cu, 1_uz);
49 auto first = str.begin();
50 auto const[code_point, valid] = fallback_encoder_type{}.read(first, str.end());
51 return {code_point,
false};
54 template<
typename It,
typename EndIt>
57 if (it == last or (first_cu & 0xc0) == 0x80) {
60 return read_fallback(char_cast<char>(first_cu));
63 auto const length = narrow_cast<uint8_t>(std::countl_one(char_cast<uint8_t>(first_cu)));
64 hi_axiom(length >= 2);
70 auto cp = char_cast<char32_t>(cu);
73 cu = char_cast<char8_t>(*it);
77 if ((cu & 0xc0) != 0x80) {
80 return read_fallback(char_cast<char>(first_cu));
87 for (uint8_t actual_length = 2; actual_length != length; ++actual_length) {
90 return {0xfffd,
false};
93 cu = char_cast<char8_t>(*it);
94 if ((cu & 0b11'000000) != 0b10'000000) {
96 return {0xfffd,
false};
103 cp |= cu & 0b00'111111;
108 valid &= cp < 0x11'0000;
110 valid &= cp < 0xd800 or cp >= 0xe000;
112 valid &= length == narrow_cast<uint8_t>((cp > 0x7f) + (cp > 0x7ff) + (cp > 0xffff) + 1);
114 return {0xfffd,
false};
120 template<
typename It,
typename EndIt>
123 hi_axiom(it != last);
125 auto cu = char_cast<char8_t>(*it);
127 if (not to_bool(cu & 0x80)) [[likely]] {
129 return {char_cast<char32_t>(cu),
true};
132 return read(it, last, cu);
138 hi_axiom(code_point < 0x11'0000);
139 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
141 return {narrow_cast<uint8_t>((code_point > 0x7f) + (code_point > 0x7ff) + (code_point > 0xffff) + 1),
true};
144 template<
typename It>
145 constexpr void write(
char32_t code_point, It& dst)
const noexcept
147 hi_axiom(code_point < 0x11'0000);
148 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
150 auto const num_cu = truncate<uint8_t>((code_point > 0x7f) + (code_point > 0x7ff) + (code_point > 0xffff));
152 auto leading_ones = char_cast<int8_t>(uint8_t{0x80});
153 leading_ones >>= num_cu;
158 auto shift = num_cu * 6;
160 auto cu = truncate<uint8_t>(code_point >> shift);
161 cu |= truncate<uint8_t>(leading_ones);
172 cu = truncate<uint8_t>(code_point >> shift);
184#if defined(HI_HAS_SSE2)
185 template<
typename It>
186 hi_force_inline __m128i read_ascii_chunk16(It it)
const noexcept
188 return _mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(
std::addressof(*it)));
191 template<
typename It>
192 hi_force_inline
void write_ascii_chunk16(__m128i chunk, It dst)
const noexcept
194 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
std::addressof(*dst)), chunk);