HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_32.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "char_converter.hpp"
12#include "../macros.hpp"
13
14hi_warning_push();
15// C26490: Don't use reinterpret_cast.
16// Needed for SIMD intrinsics.
17hi_warning_ignore_msvc(26490);
18
19namespace hi { inline namespace v1 {
20
25template<>
26struct char_map<"utf-32"> {
27 using char_type = char32_t;
28
29 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
30 {
31 hi_assert_not_null(ptr);
32 auto *ptr_ = static_cast<uint8_t const *>(ptr);
33 hi_axiom_not_null(ptr_);
34
35 if (size < 4) {
36 return std::endian::native;
37 } else {
38 // Check for BOM.
39 if (ptr_[0] == 0x00 and ptr_[1] == 0x00 and ptr_[2] == 0xfe and ptr_[3] == 0xff) {
40 return std::endian::big;
41 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe and ptr_[2] == 0x00 and ptr_[3] == 0x00) {
42 return std::endian::little;
43 }
44
45 // Check for sequences of zeros.
46 auto count = std::array<size_t, 4>{};
47 for (auto i = 0; i != size; ++i) {
48 count[i % 4] = ptr_[i] == 0 ? count[i % 4] + 1 : 0;
49
50 if (i % 4 == 0 and count[0] >= 8) {
51 return std::endian::big;
52 } else if (i % 4 == 3 and count[3] >= 8) {
53 return std::endian::little;
54 }
55 }
56
57 return endian;
58 }
59 }
60
61 template<typename It, typename EndIt>
62 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
63 {
64 hi_axiom(it != last);
65
66 if (auto cu = *it++; cu < 0xd800) {
67 return {cu, true};
68
69 } else if (cu < 0xe000) {
70 // Surrogates
71 return {0xfffd, false};
72
73 } else if (cu < 0x11'0000) {
74 return {cu, true};
75
76 } else {
77 // Out-of-range
78 return {0xfffd, false};
79 }
80 }
81
82 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
83 {
84 hi_axiom(code_point < 0x11'0000);
85 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
86 return {uint8_t{1}, true};
87 }
88
89 template<typename It>
90 constexpr void write(char32_t code_point, It& dst) const noexcept
91 {
92 hi_axiom(code_point < 0x11'0000);
93 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
94
95 *dst++ = code_point;
96 }
97
98#if defined(HI_HAS_SSE2)
99 template<typename It>
100 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
101 {
102 // Load the UTF-16 data.
103 hilet c0 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
104 it += 4;
105 hilet c1 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
106 it += 4;
107 hilet c2 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
108 it += 4;
109 hilet c3 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
110
111 hilet lo = _mm_packs_epi32(c0, c1);
112 hilet hi = _mm_packs_epi32(c2, c3);
113
114 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
115 // - bit 15 must be '0'.
116 // - if bit 15 was originally set than we need to set any of bits [14:8].
117
118 // Positive numbers -> 0b0000'0000
119 // Negative numbers -> 0b1000'0000
120 hilet sign_lo = _mm_srai_epi16(lo, 15);
121 hilet sign_hi = _mm_srai_epi16(hi, 15);
123
124 // ASCII -> 0b0ccc'cccc
125 // positive numbers -> 0b1???'????
126 // negative numbers -> 0b0000'0000
127 hilet chunk = _mm_packus_epi16(lo, hi);
128
129 // ASCII -> 0b0ccc'cccc
130 // positive numbers -> 0b1???'????
131 // negative numbers -> 0b1000'0000
132 return _mm_or_si128(chunk, sign);
133 }
134
135 template<typename It>
136 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
137 {
138 hilet zero = _mm_setzero_si128();
139 hilet lo = _mm_unpacklo_epi8(chunk, zero);
140 hilet hi = _mm_unpackhi_epi8(chunk, zero);
141
142 hilet c0 = _mm_unpacklo_epi8(lo, zero);
143 hilet c1 = _mm_unpackhi_epi8(lo, zero);
144 hilet c2 = _mm_unpacklo_epi8(hi, zero);
145 hilet c3 = _mm_unpackhi_epi8(hi, zero);
146
147 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c0);
148 dst += 4;
149 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c1);
150 dst += 4;
151 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c2);
152 dst += 4;
153 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c3);
154 }
155#endif
156};
157
158}} // namespace hi::v1
159
160hi_warning_pop();
Definition of the char_converter<From,To> functor.
@ read
Allow read access to a file.
@ write
Allow write access to a file.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
@ zero
The number was zero, and this means something in the current language.
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
Character encoder/decoder template.
Definition char_converter.hpp:86
T addressof(T... args)