HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_16.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "char_converter.hpp"
8
9namespace hi::inline v1 {
10
11template<>
12struct char_map<"utf-16"> {
13 using char_type = char16_t;
14
15 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
16 {
17 hi_axiom(ptr != nullptr);
18 auto *ptr_ = reinterpret_cast<uint8_t const *>(ptr);
19
20 if (size < 2) {
21 return std::endian::native;
22 } else {
23 // Check for BOM.
24 if (ptr_[0] == 0xfe and ptr_[1] == 0xff) {
25 return std::endian::big;
26 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe) {
27 return std::endian::little;
28 }
29
30 // Check for sequences of zeros.
31 auto count = std::array<size_t, 2>{};
32 for (auto i = 0; i != size; ++i) {
33 count[i % 2] = ptr_[i] == 0 ? count[i % 2] + 1 : 0;
34 if (count[i % 2] >= 8) {
35 return i % 2 == 0 ? std::endian::big : std::endian::little;
36 }
37 }
38
39 return endian;
40 }
41 }
42
43 template<typename It, typename EndIt>
44 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It &it, EndIt last) const noexcept
45 {
46 hi_axiom(it != last);
47
48 if (auto cu = *it++; cu < 0xd800) {
49 return {char_cast<char32_t>(cu), true};
50
51 } else if (cu < 0xdc00) {
52 if (it == last) {
53 // first surrogate at end of string.
54 return {0xfffd, false};
55
56 } else {
57 auto cp = char_cast<char32_t>(cu & 0x03ff);
58 cu = *it;
59 if (cu >= 0xdc00 and cu < 0xe000) {
60 ++it;
61 cp <<= 10;
62 cp |= cu & 0x03ff;
63 cp += 0x01'0000;
64 return {cp, true};
65
66 } else {
67 // unpaired surrogate.
68 return {0xfffd, false};
69 }
70 }
71
72 } else if (cu < 0xe000) {
73 // Invalid low surrogate.
74 return {0xfffd, false};
75
76 } else {
77 return {cu, true};
78 }
79 }
80
81 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
82 {
83 hi_axiom(code_point < 0x11'0000);
84 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
85 return {truncate<uint8_t>((code_point >= 0x01'0000) + 1), true};
86 }
87
88 template<typename It>
89 constexpr void write(char32_t code_point, It& dst) const noexcept
90 {
91 hi_axiom(code_point <= 0x10'ffff);
92 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
93
94 if (auto tmp = truncate<int32_t>(code_point) - 0x1'0000; tmp >= 0) {
95 *dst++ = char_cast<char16_t>((tmp >> 10) + 0xd800);
96 *dst++ = char_cast<char16_t>((tmp & 0x3ff) + 0xdc00);
97
98 } else {
99 *dst++ = char_cast<char16_t>(code_point);
100 }
101 }
102
103#if defined(HI_HAS_SSE2)
104 template<typename It>
105 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
106 {
107 // Load the UTF-16 data.
108 auto lo = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
109 it += 8;
110 auto hi = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
111
112 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
113 // - bit 15 must be '0'.
114 // - if bit 15 was originally set than we need to set any of bits [14:8].
115
116 // Positive numbers -> 0b0000'0000
117 // Negative numbers -> 0b1000'0000
118 auto sign_lo = _mm_srai_epi16(lo, 15);
119 auto sign_hi = _mm_srai_epi16(hi, 15);
120 auto sign = _mm_packs_epi16(sign_lo, sign_hi);
121
122 // ASCII -> 0b0ccc'cccc
123 // positive numbers -> 0b1???'????
124 // negative numbers -> 0b0000'0000
125 auto chunk = _mm_packus_epi16(lo, hi);
126
127 // ASCII -> 0b0ccc'cccc
128 // positive numbers -> 0b1???'????
129 // negative numbers -> 0b1000'0000
130 return _mm_or_si128(chunk, sign);
131 }
132
133 template<typename It>
134 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
135 {
136 auto zero = _mm_setzero_si128();
137 auto lo = _mm_unpacklo_epi8(chunk, zero);
138 auto hi = _mm_unpackhi_epi8(chunk, zero);
139
140 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), lo);
141 dst += 8;
142 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), hi);
143 }
144#endif
145};
146
147} // namespace hi::inline v1
Character encoder/decoder template.
Definition char_converter.hpp:34
T addressof(T... args)