HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_16.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
8#pragma once
9
10#include "char_converter.hpp"
11
12hi_warning_push();
13// C26490: Don't use reinterpret_cast.
14// Needed for SIMD intrinsics.
15hi_warning_ignore_msvc(26490);
16
17namespace hi { inline namespace v1 {
18
23template<>
24struct char_map<"utf-16"> {
25 using char_type = char16_t;
26
27 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
28 {
30 auto *ptr_ = static_cast<uint8_t const *>(ptr);
32
33 if (size < 2) {
34 return std::endian::native;
35 } else {
36 // Check for BOM.
37 if (ptr_[0] == 0xfe and ptr_[1] == 0xff) {
38 return std::endian::big;
39 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe) {
40 return std::endian::little;
41 }
42
43 // Check for sequences of zeros.
44 auto count = std::array<size_t, 2>{};
45 for (auto i = 0; i != size; ++i) {
46 count[i % 2] = ptr_[i] == 0 ? count[i % 2] + 1 : 0;
47 if (count[i % 2] >= 8) {
48 return i % 2 == 0 ? std::endian::big : std::endian::little;
49 }
50 }
51
52 return endian;
53 }
54 }
55
56 template<typename It, typename EndIt>
57 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
58 {
59 hi_axiom(it != last);
60
61 if (auto cu = *it++; cu < 0xd800) {
62 return {char_cast<char32_t>(cu), true};
63
64 } else if (cu < 0xdc00) {
65 if (it == last) {
66 // first surrogate at end of string.
67 return {0xfffd, false};
68
69 } else {
70 auto cp = char_cast<char32_t>(cu & 0x03ff);
71 cu = *it;
72 if (cu >= 0xdc00 and cu < 0xe000) {
73 ++it;
74 cp <<= 10;
75 cp |= cu & 0x03ff;
76 cp += 0x01'0000;
77 return {cp, true};
78
79 } else {
80 // unpaired surrogate.
81 return {0xfffd, false};
82 }
83 }
84
85 } else if (cu < 0xe000) {
86 // Invalid low surrogate.
87 return {0xfffd, false};
88
89 } else {
90 return {cu, true};
91 }
92 }
93
94 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
95 {
96 hi_axiom(code_point < 0x11'0000);
97 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
98 return {truncate<uint8_t>((code_point >= 0x01'0000) + 1), true};
99 }
100
101 template<typename It>
102 constexpr void write(char32_t code_point, It& dst) const noexcept
103 {
104 hi_axiom(code_point <= 0x10'ffff);
105 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
106
107 if (hilet tmp = truncate<int32_t>(code_point) - 0x1'0000; tmp >= 0) {
108 *dst++ = char_cast<char16_t>((tmp >> 10) + 0xd800);
109 *dst++ = char_cast<char16_t>((tmp & 0x3ff) + 0xdc00);
110
111 } else {
112 *dst++ = char_cast<char16_t>(code_point);
113 }
114 }
115
116#if defined(HI_HAS_SSE2)
117 template<typename It>
118 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
119 {
120 // Load the UTF-16 data.
121 hilet lo = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
122 it += 8;
123 hilet hi = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
124
125 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
126 // - bit 15 must be '0'.
127 // - if bit 15 was originally set than we need to set any of bits [14:8].
128
129 // Positive numbers -> 0b0000'0000
130 // Negative numbers -> 0b1000'0000
131 hilet sign_lo = _mm_srai_epi16(lo, 15);
132 hilet sign_hi = _mm_srai_epi16(hi, 15);
133 hilet sign = _mm_packs_epi16(sign_lo, sign_hi);
134
135 // ASCII -> 0b0ccc'cccc
136 // positive numbers -> 0b1???'????
137 // negative numbers -> 0b0000'0000
138 hilet chunk = _mm_packus_epi16(lo, hi);
139
140 // ASCII -> 0b0ccc'cccc
141 // positive numbers -> 0b1???'????
142 // negative numbers -> 0b1000'0000
143 return _mm_or_si128(chunk, sign);
144 }
145
146 template<typename It>
147 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
148 {
149 hilet zero = _mm_setzero_si128();
150 hilet lo = _mm_unpacklo_epi8(chunk, zero);
151 hilet hi = _mm_unpackhi_epi8(chunk, zero);
152
153 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), lo);
154 dst += 8;
155 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), hi);
156 }
157#endif
158};
159
160}} // namespace hi::v1
161
162hi_warning_pop();
Definition of the char_converter<From,To> functor.
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hi_assert_not_null(x,...)
Assert if an expression is not nullptr.
Definition assert.hpp:223
#define hi_axiom_not_null(expression,...)
Assert if an expression is not nullptr.
Definition assert.hpp:257
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ read
Allow read access to a file.
@ write
Allow write access to a file.
DOXYGEN BUG.
Definition algorithm.hpp:13
geometry/margins.hpp
Definition cache.hpp:11
Character encoder/decoder template.
Definition char_converter.hpp:83
T addressof(T... args)