HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_32.hpp
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "char_converter.hpp"
8
9namespace hi::inline v1 {
10
11template<>
12struct char_map<"utf-32"> {
13 using char_type = char32_t;
14
15 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
16 {
17 hi_axiom(ptr != nullptr);
18 auto *ptr_ = reinterpret_cast<uint8_t const *>(ptr);
19
20 if (size < 4) {
21 return std::endian::native;
22 } else {
23 // Check for BOM.
24 if (ptr_[0] == 0x00 and ptr_[1] == 0x00 and ptr_[2] == 0xfe and ptr_[3] == 0xff) {
25 return std::endian::big;
26 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe and ptr_[2] == 0x00 and ptr_[3] == 0x00) {
27 return std::endian::little;
28 }
29
30 // Check for sequences of zeros.
31 auto count = std::array<size_t,4>{};
32 for (auto i = 0; i != size; ++i) {
33 count[i % 4] = ptr_[i] == 0 ? count[i % 4] + 1 : 0;
34
35 if (i % 4 == 0 and count[0] >= 8) {
36 return std::endian::big;
37 } else if (i % 4 == 3 and count[3] >= 8) {
38 return std::endian::little;
39 }
40 }
41
42 return endian;
43 }
44 }
45
46 template<typename It, typename EndIt>
47 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
48 {
49 hi_axiom(it != last);
50
51 if (auto cu = *it++; cu < 0xd800) {
52 return {cu, true};
53
54 } else if (cu < 0xe000) {
55 // Surrogates
56 return {0xfffd, false};
57
58 } else if (cu < 0x11'0000) {
59 return {cu, true};
60
61 } else {
62 // Out-of-range
63 return {0xfffd, false};
64 }
65 }
66
67 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
68 {
69 hi_axiom(code_point < 0x11'0000);
70 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
71 return {uint8_t{1}, true};
72 }
73
74 template<typename It>
75 constexpr void write(char32_t code_point, It& dst) const noexcept
76 {
77 hi_axiom(code_point < 0x11'0000);
78 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
79
80 *dst++ = code_point;
81 }
82
83#if defined(HI_HAS_SSE2)
84 template<typename It>
85 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
86 {
87 // Load the UTF-16 data.
88 hilet c0 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
89 it += 4;
90 hilet c1 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
91 it += 4;
92 hilet c2 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
93 it += 4;
94 hilet c3 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
95
96 hilet lo = _mm_packs_epi32(c0, c1);
97 hilet hi = _mm_packs_epi32(c2, c3);
98
99 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
100 // - bit 15 must be '0'.
101 // - if bit 15 was originally set than we need to set any of bits [14:8].
102
103 // Positive numbers -> 0b0000'0000
104 // Negative numbers -> 0b1000'0000
105 hilet sign_lo = _mm_srai_epi16(lo, 15);
106 hilet sign_hi = _mm_srai_epi16(hi, 15);
107 hilet sign = _mm_packs_epi16(sign_lo, sign_hi);
108
109 // ASCII -> 0b0ccc'cccc
110 // positive numbers -> 0b1???'????
111 // negative numbers -> 0b0000'0000
112 hilet chunk = _mm_packus_epi16(lo, hi);
113
114 // ASCII -> 0b0ccc'cccc
115 // positive numbers -> 0b1???'????
116 // negative numbers -> 0b1000'0000
117 return _mm_or_si128(chunk, sign);
118 }
119
120 template<typename It>
121 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
122 {
123 hilet zero = _mm_setzero_si128();
124 hilet lo = _mm_unpacklo_epi8(chunk, zero);
125 hilet hi = _mm_unpackhi_epi8(chunk, zero);
126
127 hilet c0 = _mm_unpacklo_epi8(lo, zero);
128 hilet c1 = _mm_unpackhi_epi8(lo, zero);
129 hilet c2 = _mm_unpacklo_epi8(hi, zero);
130 hilet c3 = _mm_unpackhi_epi8(hi, zero);
131
132 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c0);
133 dst += 4;
134 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c1);
135 dst += 4;
136 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c2);
137 dst += 4;
138 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c3);
139 }
140#endif
141};
142
143} // namespace hi::inline v1
#define hilet
Invariant should be the default for variables.
Definition required.hpp:23
Character encoder/decoder template.
Definition char_converter.hpp:34
T addressof(T... args)