HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
utf_32.hpp
Go to the documentation of this file.
1// Copyright Take Vos 2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
9#pragma once
10
11#include "char_converter.hpp"
12#include "../utility/utility.hpp"
13#include "../macros.hpp"
14#include <bit>
15#include <cstdint>
16#include <utility>
17#include <compare>
18#if defined(HI_HAS_SSE2)
19#include <emmintrin.h>
20#endif
21
22hi_export_module(hikogui.char_maps.utf_32);
23
24hi_warning_push();
25// C26490: Don't use reinterpret_cast.
26// Needed for SIMD intrinsics.
27hi_warning_ignore_msvc(26490);
28
29hi_export namespace hi { inline namespace v1 {
30
35template<>
36struct char_map<"utf-32"> {
37 using char_type = char32_t;
38
39 [[nodiscard]] std::endian guess_endian(void const *ptr, size_t size, std::endian endian) const noexcept
40 {
41 hi_assert_not_null(ptr);
42 auto *ptr_ = static_cast<uint8_t const *>(ptr);
43 hi_axiom_not_null(ptr_);
44
45 if (size < 4) {
46 return std::endian::native;
47 } else {
48 // Check for BOM.
49 if (ptr_[0] == 0x00 and ptr_[1] == 0x00 and ptr_[2] == 0xfe and ptr_[3] == 0xff) {
50 return std::endian::big;
51 } else if (ptr_[0] == 0xff and ptr_[1] == 0xfe and ptr_[2] == 0x00 and ptr_[3] == 0x00) {
52 return std::endian::little;
53 }
54
55 // Check for sequences of zeros.
56 auto count = std::array<size_t, 4>{};
57 for (auto i = 0; i != size; ++i) {
58 count[i % 4] = ptr_[i] == 0 ? count[i % 4] + 1 : 0;
59
60 if (i % 4 == 0 and count[0] >= 8) {
61 return std::endian::big;
62 } else if (i % 4 == 3 and count[3] >= 8) {
63 return std::endian::little;
64 }
65 }
66
67 return endian;
68 }
69 }
70
71 template<typename It, typename EndIt>
72 [[nodiscard]] constexpr std::pair<char32_t, bool> read(It& it, EndIt last) const noexcept
73 {
74 hi_axiom(it != last);
75
76 if (auto cu = *it++; cu < 0xd800) {
77 return {cu, true};
78
79 } else if (cu < 0xe000) {
80 // Surrogates
81 return {0xfffd, false};
82
83 } else if (cu < 0x11'0000) {
84 return {cu, true};
85
86 } else {
87 // Out-of-range
88 return {0xfffd, false};
89 }
90 }
91
92 [[nodiscard]] constexpr std::pair<uint8_t, bool> size(char32_t code_point) const noexcept
93 {
94 hi_axiom(code_point < 0x11'0000);
95 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
96 return {uint8_t{1}, true};
97 }
98
99 template<typename It>
100 constexpr void write(char32_t code_point, It& dst) const noexcept
101 {
102 hi_axiom(code_point < 0x11'0000);
103 hi_axiom(not(code_point >= 0xd800 and code_point < 0xe000));
104
105 *dst++ = code_point;
106 }
107
108#if defined(HI_HAS_SSE2)
109 template<typename It>
110 hi_force_inline __m128i read_ascii_chunk16(It it) const noexcept
111 {
112 // Load the UTF-16 data.
113 auto const c0 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
114 it += 4;
115 auto const c1 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
116 it += 4;
117 auto const c2 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
118 it += 4;
119 auto const c3 = _mm_loadu_si128(reinterpret_cast<__m128i const *>(std::addressof(*it)));
120
121 auto const lo = _mm_packs_epi32(c0, c1);
122 auto const hi = _mm_packs_epi32(c2, c3);
123
124 // To get _mm_packus_epi16() to work we need to prepare the data as follows:
125 // - bit 15 must be '0'.
126 // - if bit 15 was originally set than we need to set any of bits [14:8].
127
128 // Positive numbers -> 0b0000'0000
129 // Negative numbers -> 0b1000'0000
130 auto const sign_lo = _mm_srai_epi16(lo, 15);
131 auto const sign_hi = _mm_srai_epi16(hi, 15);
132 auto const sign = _mm_packs_epi16(sign_lo, sign_hi);
133
134 // ASCII -> 0b0ccc'cccc
135 // positive numbers -> 0b1???'????
136 // negative numbers -> 0b0000'0000
137 auto const chunk = _mm_packus_epi16(lo, hi);
138
139 // ASCII -> 0b0ccc'cccc
140 // positive numbers -> 0b1???'????
141 // negative numbers -> 0b1000'0000
142 return _mm_or_si128(chunk, sign);
143 }
144
145 template<typename It>
146 hi_force_inline void write_ascii_chunk16(__m128i chunk, It dst) const noexcept
147 {
148 auto const zero = _mm_setzero_si128();
149 auto const lo = _mm_unpacklo_epi8(chunk, zero);
150 auto const hi = _mm_unpackhi_epi8(chunk, zero);
151
152 auto const c0 = _mm_unpacklo_epi8(lo, zero);
153 auto const c1 = _mm_unpackhi_epi8(lo, zero);
154 auto const c2 = _mm_unpacklo_epi8(hi, zero);
155 auto const c3 = _mm_unpackhi_epi8(hi, zero);
156
157 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c0);
158 dst += 4;
159 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c1);
160 dst += 4;
161 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c2);
162 dst += 4;
163 _mm_storeu_si128(reinterpret_cast<__m128i *>(std::addressof(*dst)), c3);
164 }
165#endif
166};
167
168}} // namespace hi::v1
169
170hi_warning_pop();
Definition of the char_converter<From,To> functor.
@ read
Allow read access to a file.
@ write
Allow write access to a file.
The HikoGUI namespace.
Definition array_generic.hpp:20
@ zero
The number was zero, and this means something in the current language.
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
Character encoder/decoder template.
Definition char_converter.hpp:89
T addressof(T... args)