HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_32.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "char_converter.hpp"
12
13namespace hi { inline namespace v1 {
14
19template<>
20struct char_map<"utf-32"> {
21 using char_type = char32_t;
22
23 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
24 {
26 auto *ptr_ = reinterpret_cast<uint8_t const *>(ptr);
27
28 if (size < 4) {
29 return std::endian::native;
30 } else {
31 // Check for BOM.
32 if (ptr_[0] == 0x00 and ptr_[1] == 0x00 and ptr_[2] == 0xfe and ptr_[3] == 0xff) {
33 return std::endian::big;
34 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe and ptr_[2] == 0x00 and ptr_[3] == 0x00) {
35 return std::endian::little;
36 }
37
38 // Check for sequences of zeros.
39 auto count = std::array<size_t, 4>{};
40 for (auto i = 0; i != size; ++i) {
41 count[i % 4] = ptr_[i] == 0 ? count[i % 4] + 1 : 0;
42
43 if (i % 4 == 0 and count[0] >= 8) {
44 return std::endian::big;
45 } else if (i % 4 == 3 and count[3] >= 8) {
46 return std::endian::little;
47 }
48 }
49
50 return endian;
51 }
52 }
53
54 template<typename It, typename EndIt>
55 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
56 {
57 hi_axiom(it != last);
58
59 if (auto cu = *it++; cu < 0xd800) {
60 return {cu, true};
61
62 } else if (cu < 0xe000) {
63 // Surrogates
64 return {0xfffd, false};
65
66 } else if (cu < 0x11'0000) {
67 return {cu, true};
68
69 } else {
70 // Out-of-range
71 return {0xfffd, false};
72 }
73 }
74
75 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
76 {
77 hi_axiom(code_point < 0x11'0000);
78 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
79 return {uint8_t{1}, true};
80 }
81
82 template<typename It>
83 constexpr void write(char32_t code_point, It& dst) const noexcept
84 {
85 hi_axiom(code_point < 0x11'0000);
86 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
87
88 *dst++ = code_point;
89 }
90
91#if defined(HI_HAS_SSE2)
92 template<typename It>
93 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
94 {
95 // Load the UTF-16 data.
96 hilet c0 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
97 it += 4;
98 hilet c1 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
99 it += 4;
100 hilet c2 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
101 it += 4;
102 hilet c3 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
103
104 hilet lo = _mm_packs_epi32(c0, c1);
105 hilet hi = _mm_packs_epi32(c2, c3);
106
107 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
108 // - bit 15 must be '0'.
109 // - if bit 15 was originally set than we need to set any of bits [14:8].
110
111 // Positive numbers -> 0b0000'0000
112 // Negative numbers -> 0b1000'0000
113 hilet sign_lo = _mm_srai_epi16(lo, 15);
114 hilet sign_hi = _mm_srai_epi16(hi, 15);
115 hilet sign = _mm_packs_epi16(sign_lo, sign_hi);
116
117 // ASCII -> 0b0ccc'cccc
118 // positive numbers -> 0b1???'????
119 // negative numbers -> 0b0000'0000
120 hilet chunk = _mm_packus_epi16(lo, hi);
121
122 // ASCII -> 0b0ccc'cccc
123 // positive numbers -> 0b1???'????
124 // negative numbers -> 0b1000'0000
125 return _mm_or_si128(chunk, sign);
126 }
127
128 template<typename It>
129 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
130 {
131 hilet zero = _mm_setzero_si128();
132 hilet lo = _mm_unpacklo_epi8(chunk, zero);
133 hilet hi = _mm_unpackhi_epi8(chunk, zero);
134
135 hilet c0 = _mm_unpacklo_epi8(lo, zero);
136 hilet c1 = _mm_unpackhi_epi8(lo, zero);
137 hilet c2 = _mm_unpacklo_epi8(hi, zero);
138 hilet c3 = _mm_unpackhi_epi8(hi, zero);
139
140 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c0);
141 dst += 4;
142 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c1);
143 dst += 4;
144 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c2);
145 dst += 4;
146 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c3);
147 }
148#endif
149};
150
151}} // namespace hi::v1
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:133
#define hi_assert_not_null(x,...)
Assert if an expression is not nullptr.
Definition assert.hpp:118
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
Definition of the char_converter<From,To> functor.
@ read
Allow read access to a file.
@ write
Allow write access to a file.
DOXYGEN BUG.
Definition algorithm.hpp:15
geometry/margins.hpp
Definition assert.hpp:18
Character encoder/decoder template.
Definition char_converter.hpp:85
T addressof(T... args)