HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_16.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
8#pragma once
9
10#include "char_converter.hpp"
11#include "../utility/utility.hpp"
12#include "../macros.hpp"
13#include <bit>
14#include <cstdint>
15#include <utility>
16#include <compare>
17#if defined(HI_HAS_SSE2)
18#include <emmintrin.h>
19#endif
20
21hi_export_module(hikogui.char_maps.utf_16);
22
23hi_warning_push();
24// C26490: Don't use reinterpret_cast.
25// Needed for SIMD intrinsics.
26hi_warning_ignore_msvc(26490);
27
28hi_export namespace hi { inline namespace v1 {
29
34template<>
35struct char_map<"utf-16"> {
36 using char_type = char16_t;
37
38 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
39 {
40 hi_assert_not_null(ptr);
41 auto *ptr_ = static_cast<uint8_t const *>(ptr);
42 hi_axiom_not_null(ptr_);
43
44 if (size < 2) {
45 return std::endian::native;
46 } else {
47 // Check for BOM.
48 if (ptr_[0] == 0xfe and ptr_[1] == 0xff) {
49 return std::endian::big;
50 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe) {
51 return std::endian::little;
52 }
53
54 // Check for sequences of zeros.
55 auto count = std::array<size_t, 2>{};
56 for (auto i = 0; i != size; ++i) {
57 count[i % 2] = ptr_[i] == 0 ? count[i % 2] + 1 : 0;
58 if (count[i % 2] >= 8) {
59 return i % 2 == 0 ? std::endian::big : std::endian::little;
60 }
61 }
62
63 return endian;
64 }
65 }
66
67 template<typename It, typename EndIt>
68 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
69 {
70 hi_axiom(it != last);
71
72 if (auto cu = *it++; cu < 0xd800) {
73 return {char_cast<char32_t>(cu), true};
74
75 } else if (cu < 0xdc00) {
76 if (it == last) {
77 // first surrogate at end of string.
78 return {0xfffd, false};
79
80 } else {
81 auto cp = char_cast<char32_t>(cu & 0x03ff);
82 cu = *it;
83 if (cu >= 0xdc00 and cu < 0xe000) {
84 ++it;
85 cp <<= 10;
86 cp |= cu & 0x03ff;
87 cp += 0x01'0000;
88 return {cp, true};
89
90 } else {
91 // unpaired surrogate.
92 return {0xfffd, false};
93 }
94 }
95
96 } else if (cu < 0xe000) {
97 // Invalid low surrogate.
98 return {0xfffd, false};
99
100 } else {
101 return {cu, true};
102 }
103 }
104
105 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
106 {
107 hi_axiom(code_point < 0x11'0000);
108 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
109 return {truncate<uint8_t>((code_point >= 0x01'0000) + 1), true};
110 }
111
112 template<typename It>
113 constexpr void write(char32_t code_point, It& dst) const noexcept
114 {
115 hi_axiom(code_point <= 0x10'ffff);
116 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
117
118 if (auto const tmp = truncate<int32_t>(code_point) - 0x1'0000; tmp >= 0) {
119 *dst++ = char_cast<char16_t>((tmp >> 10) + 0xd800);
120 *dst++ = char_cast<char16_t>((tmp & 0x3ff) + 0xdc00);
121
122 } else {
123 *dst++ = char_cast<char16_t>(code_point);
124 }
125 }
126
127#if defined(HI_HAS_SSE2)
128 template<typename It>
129 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
130 {
131 // Load the UTF-16 data.
132 auto const lo = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
133 it += 8;
134 auto const hi = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
135
136 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
137 // - bit 15 must be '0'.
138 // - if bit 15 was originally set than we need to set any of bits [14:8].
139
140 // Positive numbers -> 0b0000'0000
141 // Negative numbers -> 0b1000'0000
142 auto const sign_lo = _mm_srai_epi16(lo, 15);
143 auto const sign_hi = _mm_srai_epi16(hi, 15);
144 auto const sign = _mm_packs_epi16(sign_lo, sign_hi);
145
146 // ASCII -> 0b0ccc'cccc
147 // positive numbers -> 0b1???'????
148 // negative numbers -> 0b0000'0000
149 auto const chunk = _mm_packus_epi16(lo, hi);
150
151 // ASCII -> 0b0ccc'cccc
152 // positive numbers -> 0b1???'????
153 // negative numbers -> 0b1000'0000
154 return _mm_or_si128(chunk, sign);
155 }
156
157 template<typename It>
158 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
159 {
160 auto const zero = _mm_setzero_si128();
161 auto const lo = _mm_unpacklo_epi8(chunk, zero);
162 auto const hi = _mm_unpackhi_epi8(chunk, zero);
163
164 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), lo);
165 dst += 8;
166 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), hi);
167 }
168#endif
169};
170
171}} // namespace hi::v1
172
173hi_warning_pop();
Definition of the char_converter<From,To> functor.
@ read
Allow read access to a file.
@ write
Allow write access to a file.
The HikoGUI namespace.
Definition array_generic.hpp:20
@ zero
The number was zero, and this means something in the current language.
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
Character encoder/decoder template.
Definition char_converter.hpp:89
T addressof(T... args)