HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_i8x16_sse2.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/utility.hpp"
9#include "../macros.hpp"
10#include <array>
11#include <ostream>
12
13
14
15hi_warning_push();
16// C26472: Don't use a static_cast for arithmetic conversions.
17// This is a low level type.
18hi_warning_ignore_msvc(26472);
19
20namespace hi { inline namespace v1 {
21
22#ifdef HI_HAS_SSE2
23
39struct native_i8x16 {
40 using value_type = int8_t;
41 constexpr static size_t size = 4;
42 using register_type = __m128i;
43 using array_type = std::array<value_type, size>;
44
46
47 native_i8x16(native_i8x16 const&) noexcept = default;
51
55
56 [[nodiscard]] explicit native_i8x16(register_type other) noexcept : v(other) {}
57
58 [[nodiscard]] explicit operator register_type() const noexcept
59 {
60 return v;
61 }
62
83 value_type a,
84 value_type b = value_type{0},
85 value_type c = value_type{0},
86 value_type d = value_type{0},
87 value_type e = value_type{0},
88 value_type f = value_type{0},
89 value_type g = value_type{0},
90 value_type h = value_type{0},
91 value_type i = value_type{0},
92 value_type j = value_type{0},
93 value_type k = value_type{0},
94 value_type l = value_type{0},
95 value_type m = value_type{0},
96 value_type n = value_type{0},
97 value_type o = value_type{0},
98 value_type p = value_type{0}
99 ) noexcept :
100 v(_mm_set_epi8(p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a))
101 {
102 }
103
104 [[nodiscard]] explicit native_i8x16(value_type const *other) noexcept :
105 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other)))
106 {
107 }
108
109 void store(value_type *out) const noexcept
110 {
111 hi_axiom_not_null(out);
112 _mm_storeu_si128(reinterpret_cast<register_type *>(out), v);
113 }
114
115 [[nodiscard]] explicit native_i8x16(void const *other) noexcept : v(_mm_loadu_si128(static_cast<register_type const *>(other)))
116 {
117 }
118
119 void store(void *out) const noexcept
120 {
121 hi_axiom_not_null(out);
122 _mm_storeu_si128(static_cast<register_type *>(out), v);
123 }
124
125 [[nodiscard]] explicit native_i8x16(std::span<value_type const> other) noexcept
126 {
127 hi_axiom(other.size() >= size);
128 v = _mm_loadu_si128(reinterpret_cast<register_type const *>(other.data()));
129 }
130
131 void store(std::span<value_type> out) const noexcept
132 {
133 hi_axiom(out.size() >= size);
134 _mm_storeu_si128(reinterpret_cast<register_type *>(out.data()), v);
135 }
136
137 [[nodiscard]] explicit native_i8x16(array_type other) noexcept :
138 v(_mm_loadu_si128(reinterpret_cast<register_type const *>(other.data())))
139 {
140 }
141
142 [[nodiscard]] explicit operator array_type() const noexcept
143 {
144 auto r = array_type{};
145 _mm_storeu_si128(reinterpret_cast<register_type *>(r.data()), v);
146 return r;
147 }
148
149#ifdef AVX512F
150 [[nodiscard]] explicit native_i8x16(native_f32x16 const& a) noexcept;
151 [[nodiscard]] explicit native_i8x16(native_u32x16 const& a) noexcept;
152#endif
153
163 [[nodiscard]] static native_i8x16 broadcast(value_type a) noexcept
164 {
165 return native_i8x16{_mm_set1_epi8(a)};
166 }
167
189 [[nodiscard]] static native_i8x16 broadcast(native_i8x16 a) noexcept
190 {
191#ifdef HI_HAS_AVX2
193#elif HI_HAS_SSSE3
195#else
196 // Create a mask for 1 byte each 32 bit word, AND it with a.v.
197 auto tmp = _mm_undefined_si128();
199 tmp = _mm_slli_epi32(tmp, 24);
200 tmp = _mm_and_si128(tmp, a.v);
201
202 // Broadcast the first byte to all the bytes in the first 32 bit word.
205
206 // Broadcast the first 32 bit word to all 4 32 bit words.
207 tmp = _mm_shuffle_epi32(tmp, 0b00'00'00'00);
208 return native_i8x16{tmp};
209#endif
210 }
211
213 {
214 hilet tmp = _mm_undefined_si128();
216 }
217
220 [[nodiscard]] size_t mask() const noexcept
221 {
223 }
224
225 [[nodiscard]] friend bool equal(native_i8x16 a, native_i8x16 b) noexcept
226 {
227 return (a == b).mask() == 0b1111'1111'1111'1111;
228 }
229
230 [[nodiscard]] friend native_i8x16 operator==(native_i8x16 a, native_i8x16 b) noexcept
231 {
232 return native_i8x16{_mm_cmpeq_epi8(a.v, b.v)};
233 }
234
236 {
237 return ~(a == b);
238 }
239
240 [[nodiscard]] friend native_i8x16 operator<(native_i8x16 a, native_i8x16 b) noexcept
241 {
242 return native_i8x16{_mm_cmplt_epi8(a.v, b.v)};
243 }
244
246 {
247 return native_i8x16{_mm_cmpgt_epi8(a.v, b.v)};
248 }
249
251 {
252 return ~(a > b);
253 }
254
256 {
257 return ~(a < b);
258 }
259
260 [[nodiscard]] friend native_i8x16 operator+(native_i8x16 a) noexcept
261 {
262 return a;
263 }
264
265 [[nodiscard]] friend native_i8x16 operator-(native_i8x16 a) noexcept
266 {
267 return native_i8x16{} - a;
268 }
269
270 [[nodiscard]] friend native_i8x16 operator+(native_i8x16 a, native_i8x16 b) noexcept
271 {
272 return native_i8x16{_mm_add_epi8(a.v, b.v)};
273 }
274
275 [[nodiscard]] friend native_i8x16 operator-(native_i8x16 a, native_i8x16 b) noexcept
276 {
277 return native_i8x16{_mm_sub_epi8(a.v, b.v)};
278 }
279
280 [[nodiscard]] friend native_i8x16 operator&(native_i8x16 a, native_i8x16 b) noexcept
281 {
282 return native_i8x16{_mm_and_si128(a.v, b.v)};
283 }
284
285 [[nodiscard]] friend native_i8x16 operator|(native_i8x16 a, native_i8x16 b) noexcept
286 {
287 return native_i8x16{_mm_or_si128(a.v, b.v)};
288 }
289
290 [[nodiscard]] friend native_i8x16 operator^(native_i8x16 a, native_i8x16 b) noexcept
291 {
292 return native_i8x16{_mm_xor_si128(a.v, b.v)};
293 }
294
295 [[nodiscard]] friend native_i8x16 operator~(native_i8x16 a) noexcept
296 {
297 auto ones = _mm_undefined_si128();
299 return native_i8x16{_mm_andnot_si128(a.v, ones)};
300 }
301
302 [[nodiscard]] friend native_i8x16 min(native_i8x16 a, native_i8x16 b) noexcept
303 {
304#if HI_HAS_SSE4_1
305 return native_i8x16{_mm_min_epi8(a.v, b.v)};
306#else
307 hilet mask = a < b;
308 return (mask & a) | not_and(mask, b);
309#endif
310 }
311
312 [[nodiscard]] friend native_i8x16 max(native_i8x16 a, native_i8x16 b) noexcept
313 {
314#if HI_HAS_SSE4_1
315 return native_i8x16{_mm_max_epi8(a.v, b.v)};
316#else
317 hilet mask = a > b;
318 return (mask & a) | not_and(mask, b);
319#endif
320 }
321
322 [[nodiscard]] friend native_i8x16 abs(native_i8x16 a) noexcept
323 {
324#if HI_HAS_SSSE3
325 return native_i8x16{_mm_abs_epi8(a.v)};
326#else
327 hilet mask = a > native_i8x16{};
328 return (mask & a) | not_and(mask, -a);
329#endif
330 }
331
338 template<size_t Mask>
339 [[nodiscard]] friend native_i8x16 set_zero(native_i8x16 a) noexcept
340 {
341 static_assert(Mask <= 0b1111);
342#ifdef HI_HAS_SSE4_1
344#else
345 hilet mask = from_mask<Mask>();
346 return not_and(mask, a);
347#endif
348 }
349
357 template<size_t Index>
358 [[nodiscard]] friend native_i8x16 insert(native_i8x16 a, value_type b) noexcept
359 {
360 static_assert(Index < 4);
361
362#ifdef HI_HAS_SSE4_1
363 return native_i8x16{_mm_insert_epi8(a.v, b, Index)};
364#else
365 hilet mask = from_mask<1_uz << Index>();
366 return not_and(mask, a) | (mask & broadcast(b));
367#endif
368 }
369
376 template<size_t Index>
377 [[nodiscard]] friend value_type get(native_i8x16 a) noexcept
378 {
379#ifdef HI_HAS_SSE4_1
380 return static_cast<value_type>(_mm_extract_epi8(a.v, Index));
381#else
382 auto r = static_cast<array_type>(a);
383 return std::get<Index>(r);
384#endif
385 }
386
392 [[nodiscard]] friend native_i8x16 not_and(native_i8x16 a, native_i8x16 b) noexcept
393 {
394 return native_i8x16{_mm_andnot_si128(a.v, b.v)};
395 }
396
397 friend std::ostream& operator<<(std::ostream& a, native_i8x16 b) noexcept
398 {
399 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
400 }
401};
402
403#endif
404
405}} // namespace hi::v1
406
407hi_warning_pop();
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T equal(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)