HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_16.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
8#pragma once
9
10#include "char_converter.hpp"
11
12namespace hi { inline namespace v1 {
13
18template<>
19struct char_map<"utf-16"> {
20 using char_type = char16_t;
21
22 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
23 {
24 hi_axiom(ptr != nullptr);
25 auto *ptr_ = reinterpret_cast<uint8_t const *>(ptr);
26
27 if (size < 2) {
28 return std::endian::native;
29 } else {
30 // Check for BOM.
31 if (ptr_[0] == 0xfe and ptr_[1] == 0xff) {
32 return std::endian::big;
33 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe) {
34 return std::endian::little;
35 }
36
37 // Check for sequences of zeros.
38 auto count = std::array<size_t, 2>{};
39 for (auto i = 0; i != size; ++i) {
40 count[i % 2] = ptr_[i] == 0 ? count[i % 2] + 1 : 0;
41 if (count[i % 2] >= 8) {
42 return i % 2 == 0 ? std::endian::big : std::endian::little;
43 }
44 }
45
46 return endian;
47 }
48 }
49
50 template<typename It, typename EndIt>
51 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
52 {
53 hi_axiom(it != last);
54
55 if (auto cu = *it++; cu < 0xd800) {
56 return {char_cast<char32_t>(cu), true};
57
58 } else if (cu < 0xdc00) {
59 if (it == last) {
60 // first surrogate at end of string.
61 return {0xfffd, false};
62
63 } else {
64 auto cp = char_cast<char32_t>(cu & 0x03ff);
65 cu = *it;
66 if (cu >= 0xdc00 and cu < 0xe000) {
67 ++it;
68 cp <<= 10;
69 cp |= cu & 0x03ff;
70 cp += 0x01'0000;
71 return {cp, true};
72
73 } else {
74 // unpaired surrogate.
75 return {0xfffd, false};
76 }
77 }
78
79 } else if (cu < 0xe000) {
80 // Invalid low surrogate.
81 return {0xfffd, false};
82
83 } else {
84 return {cu, true};
85 }
86 }
87
88 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
89 {
90 hi_axiom(code_point < 0x11'0000);
91 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
92 return {truncate<uint8_t>((code_point >= 0x01'0000) + 1), true};
93 }
94
95 template<typename It>
96 constexpr void write(char32_t code_point, It& dst) const noexcept
97 {
98 hi_axiom(code_point <= 0x10'ffff);
99 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
100
101 if (auto tmp = truncate<int32_t>(code_point) - 0x1'0000; tmp >= 0) {
102 *dst++ = char_cast<char16_t>((tmp >> 10) + 0xd800);
103 *dst++ = char_cast<char16_t>((tmp & 0x3ff) + 0xdc00);
104
105 } else {
106 *dst++ = char_cast<char16_t>(code_point);
107 }
108 }
109
110#if defined(HI_HAS_SSE2)
111 template<typename It>
112 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
113 {
114 // Load the UTF-16 data.
115 auto lo = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
116 it += 8;
117 auto hi = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
118
119 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
120 // - bit 15 must be '0'.
121 // - if bit 15 was originally set than we need to set any of bits [14:8].
122
123 // Positive numbers -> 0b0000'0000
124 // Negative numbers -> 0b1000'0000
125 auto sign_lo = _mm_srai_epi16(lo, 15);
126 auto sign_hi = _mm_srai_epi16(hi, 15);
127 auto sign = _mm_packs_epi16(sign_lo, sign_hi);
128
129 // ASCII -> 0b0ccc'cccc
130 // positive numbers -> 0b1???'????
131 // negative numbers -> 0b0000'0000
132 auto chunk = _mm_packus_epi16(lo, hi);
133
134 // ASCII -> 0b0ccc'cccc
135 // positive numbers -> 0b1???'????
136 // negative numbers -> 0b1000'0000
137 return _mm_or_si128(chunk, sign);
138 }
139
140 template<typename It>
141 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
142 {
143 auto zero = _mm_setzero_si128();
144 auto lo = _mm_unpacklo_epi8(chunk, zero);
145 auto hi = _mm_unpackhi_epi8(chunk, zero);
146
147 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), lo);
148 dst += 8;
149 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), hi);
150 }
151#endif
152};
153
154}} // namespace hi::v1
Definition of the char_converter<From,To> functor.
@ read
Allow read access to a file.
@ write
Allow write access to a file.
DOXYGEN BUG.
Definition algorithm.hpp:15
The HikoGUI namespace.
Definition ascii.hpp:19
Character encoder/decoder template.
Definition char_converter.hpp:85
T addressof(T... args)