HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
simd.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_f32x4_sse.hpp"
8#include "native_f64x4_avx.hpp"
9#include "native_i32x4_sse2.hpp"
10#include "native_i64x4_avx2.hpp"
11#include "native_u32x4_sse2.hpp"
12#include "native_simd_conversions_x86.hpp"
13
14#include "../utility/utility.hpp"
15#include "../macros.hpp"
16#include <cstdint>
17#include <ostream>
18#include <string>
19#include <array>
20#include <type_traits>
21#include <concepts>
22#include <bit>
23#include <climits>
24#include <utility>
25
26
27
28hi_warning_push();
29// C4702 unreachable code: Suppressed due intrinsics and std::is_constant_evaluated()
30hi_warning_ignore_msvc(4702);
31// C26467: Converting from floating point to unsigned integral types results in non-portable code if...
32// We are trying to get SIMD intrinsics to work the same as scalar functions.
33hi_warning_ignore_msvc(26467);
34// C26472: Don't use static_cast for arithmetic conversions.
35// We are trying to get SIMD intrinsics to work the same as scalar functions.
36hi_warning_ignore_msvc(26472)
37
38#define HI_X_runtime_evaluate_if_valid(...) \
39 do { \
40 if (not std::is_constant_evaluated()) { \
41 if constexpr (requires { __VA_ARGS__; }) { \
42 return __VA_ARGS__; \
43 } \
44 } \
45 } while (false)
46
47#define HI_X_accessor(index, name) \
48 [[nodiscard]] constexpr T name() const noexcept \
49 requires(N > index) \
50 { \
51 HI_X_runtime_evaluate_if_valid(get<index>(reg())); \
52 return std::get<index>(v); \
53 } \
54\
55 [[nodiscard]] constexpr T& name() noexcept \
56 requires(N > index) \
57 { \
58 return std::get<index>(v); \
59 }
60
61#define HI_X_binary_math_op(op) \
62 [[nodiscard]] friend constexpr simd operator op(simd lhs, simd rhs) noexcept \
63 requires(requires(value_type a, value_type b) { a op b; }) \
64 { \
65 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs.reg()}); \
66\
67 auto r = simd{}; \
68 for (std::size_t i = 0; i != N; ++i) { \
69 r[i] = lhs[i] op rhs[i]; \
70 } \
71 return r; \
72 }
73
74#define HI_X_binary_bit_op(op) \
75 [[nodiscard]] friend constexpr simd operator op(simd lhs, simd rhs) noexcept \
76 { \
77 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs.reg()}); \
78\
79 auto r = simd{}; \
80 for (std::size_t i = 0; i != N; ++i) { \
81 hilet lhs_vi = std::bit_cast<unsigned_type>(lhs[i]); \
82 hilet rhs_vi = std::bit_cast<unsigned_type>(rhs[i]); \
83 hilet r_vi = static_cast<unsigned_type>(lhs_vi op rhs_vi); \
84 r[i] = std::bit_cast<value_type>(r_vi); \
85 } \
86 return r; \
87 }
88
89#define HI_X_binary_shift_op(op) \
90 [[nodiscard]] friend constexpr simd operator op(simd const& lhs, unsigned int rhs) noexcept \
91 { \
92 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs}); \
93\
94 auto r = simd{}; \
95 for (std::size_t i = 0; i != N; ++i) { \
96 r.v[i] = lhs.v[i] op rhs; \
97 } \
98 return r; \
99 }
100
101#define HI_X_binary_cmp_op(op) \
102 [[nodiscard]] friend constexpr simd operator op(simd lhs, simd rhs) noexcept \
103 requires(requires(value_type a, value_type b) { a op b; }) \
104 { \
105 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs.reg()}); \
106\
107 auto r = simd{}; \
108 for (std::size_t i = 0; i != N; ++i) { \
109 r[i] = lhs[i] op rhs[i] ? ones_value : zero_value; \
110 } \
111 return r; \
112 }
113
114#define HI_X_binary_op_broadcast(op) \
115 [[nodiscard]] friend constexpr auto operator op(value_type lhs, simd rhs) noexcept \
116 requires(requires(value_type a, value_type b) { a op b; }) \
117 { \
118 return broadcast(lhs) op rhs; \
119 } \
120\
121 [[nodiscard]] friend constexpr auto operator op(simd lhs, value_type rhs) noexcept \
122 requires(requires(value_type a, value_type b) { a op b; }) \
123 { \
124 return lhs op broadcast(rhs); \
125 }
126
127#define HI_X_inplace_op(long_op, short_op) \
128 constexpr simd& operator long_op(auto rhs) noexcept \
129 requires(requires { *this short_op rhs; }) \
130 { \
131 return *this = *this short_op rhs; \
132 }
133
134namespace hi::inline v1 {
135
136template<numeric_limited T, std::size_t N>
137struct simd {
138 using value_type = T;
139 constexpr static size_t size = N;
140
141 using unsigned_type = make_uintxx_t<sizeof(value_type) * CHAR_BIT>;
142
143 constexpr static bool has_native_type = requires { typename native_simd<value_type, size>::value_type; };
144 using native_type = native_simd<value_type, size>;
145
146 using array_type = std::array<value_type, size>;
147 using size_type = typename array_type::size_type;
148 using difference_type = typename array_type::difference_type;
149 using reference = typename array_type::reference;
150 using const_reference = typename array_type::const_reference;
151 using pointer = typename array_type::pointer;
152 using const_pointer = typename array_type::const_pointer;
153 using iterator = typename array_type::iterator;
154 using const_iterator = typename array_type::const_iterator;
155
156 constexpr static value_type zero_value = value_type{};
157 constexpr static value_type ones_value = std::bit_cast<value_type>(std::numeric_limits<unsigned_type>::max());
158
159 array_type v;
160
161 constexpr simd() noexcept
162 {
163 if (not std::is_constant_evaluated()) {
164 if constexpr (requires { *this = simd{native_type{}}; }) {
165 *this = simd{native_type{}};
166 }
167 }
168 v = array_type{};
169 }
170
171 constexpr simd(simd const& rhs) noexcept = default;
172 constexpr simd(simd&& rhs) noexcept = default;
173 constexpr simd& operator=(simd const& rhs) noexcept = default;
174 constexpr simd& operator=(simd&& rhs) noexcept = default;
175
176 template<numeric_limited U>
177 [[nodiscard]] constexpr explicit simd(simd<U, N> const& other) noexcept
178 {
179 if (not std::is_constant_evaluated()) {
180 if constexpr (requires { *this = simd{native_type{other.reg()}}; }) {
181 *this = simd{native_type{other.reg()}};
182 return;
183 }
184 }
185
186 for (std::size_t i = 0; i != N; ++i) {
187 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
188 // SSE conversion round floats before converting to integer.
189 v[i] = static_cast<value_type>(std::round(other[i]));
190 } else {
191 v[i] = static_cast<value_type>(other[i]);
192 }
193 }
194 }
195
196 template<numeric_limited U>
197 [[nodiscard]] constexpr explicit simd(simd<U, size / 2> const& a, simd<U, size / 2> const& b) noexcept
198 {
199 if (not std::is_constant_evaluated()) {
200 if constexpr (requires { simd{native_type{a.reg(), b.reg()}}; }) {
201 *this = simd{native_type{a.reg(), b.reg()}};
202 return;
203 }
204 }
205
206 for (std::size_t i = 0; i != size; ++i) {
207 hilet tmp = i < (size / 2) ? a[i] : b[i];
208 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
209 // SSE conversion round floats before converting to integer.
210 v[i] = static_cast<value_type>(std::round(tmp));
211 } else {
212 v[i] = static_cast<value_type>(tmp);
213 }
214 }
215 }
216
217 template<std::convertible_to<value_type>... Args>
218 [[nodiscard]] constexpr explicit simd(value_type first, Args... args) noexcept
219 {
220 if (not std::is_constant_evaluated()) {
221 if constexpr (requires { simd{native_type{first, static_cast<value_type>(args)...}}; }) {
222 *this = simd{native_type{first, static_cast<value_type>(args)...}};
223 return;
224 }
225 }
226
227 v = array_type{first, static_cast<value_type>(args)...};
228 }
229
230 [[nodiscard]] constexpr static simd broadcast(T rhs) noexcept
231 {
232 HI_X_runtime_evaluate_if_valid(simd{native_type::broadcast(rhs)});
233
234 auto r = simd{};
235 for (std::size_t i = 0; i != N; ++i) {
236 r[i] = rhs;
237 }
238 return r;
239 }
240
241 [[nodiscard]] constexpr static simd epsilon() noexcept
242 {
243 if constexpr (std::is_floating_point_v<T>) {
244 return broadcast(std::numeric_limits<T>::epsilon());
245 } else {
246 return broadcast(T{0});
247 }
248 }
249
250 [[nodiscard]] simd(std::array<T, N> const& rhs) noexcept : v(rhs) {}
251
252 simd& operator=(std::array<T, N> const& rhs) noexcept
253 {
254 v = rhs;
255 return *this;
256 }
257
258 [[nodiscard]] operator std::array<T, N>() const noexcept
259 {
260 return v;
261 }
262
263 [[nodiscard]] explicit simd(native_type rhs) noexcept
264 requires(requires { typename native_type::value_type; })
265 : v(static_cast<array_type>(rhs))
266 {
267 }
268
269 [[nodiscard]] auto reg() const noexcept
270 requires(requires { typename native_type::value_type; })
271 {
272 return native_type{v};
273 }
274
275 template<numeric_limited O, size_t M>
276 [[nodiscard]] constexpr static simd cast_from(simd<O, M> const& rhs) noexcept
277 requires(sizeof(simd<O, M>) == sizeof(simd))
278 {
279 HI_X_runtime_evaluate_if_valid(simd{native_type::cast_from(rhs.reg())});
280
281 return std::bit_cast<simd>(rhs);
282 }
283
288 template<std::size_t S>
289 [[nodiscard]] constexpr static simd load(std::byte const *ptr) noexcept
290 {
291 HI_X_runtime_evaluate_if_valid(simd{native_type{ptr}});
292
293 auto r = simd{};
294 std::memcpy(&r, ptr, S);
295 return r;
296 }
297
302 [[nodiscard]] constexpr static simd load(std::byte const *ptr) noexcept
303 {
304 HI_X_runtime_evaluate_if_valid(simd{native_type{ptr}});
305
306 auto r = simd{};
307 std::memcpy(&r, ptr, sizeof(r));
308 return r;
309 }
310
315 [[nodiscard]] constexpr static simd load(T const *ptr) noexcept
316 {
317 HI_X_runtime_evaluate_if_valid(simd{native_type{ptr}});
318
319 auto r = simd{};
320 std::memcpy(&r, ptr, sizeof(r));
321 return r;
322 }
323
324 template<std::size_t S>
325 constexpr void store(std::byte *ptr) const noexcept
326 {
327 HI_X_runtime_evaluate_if_valid(reg().store(ptr));
328 std::memcpy(ptr, this, S);
329 }
330
334 constexpr void store(std::byte *ptr) const noexcept
335 {
336 HI_X_runtime_evaluate_if_valid(reg().store(ptr));
337 store<sizeof(*this)>(ptr);
338 }
339
340 [[nodiscard]] constexpr size_t mask() const noexcept
341 {
342 HI_X_runtime_evaluate_if_valid(reg().mask());
343
344 auto r = 0_uz;
345 for (auto i = N; i != 0; --i) {
346 r <<= 1;
347 r |= std::bit_cast<unsigned_type>(v[i - 1]) >> (sizeof(unsigned_type) * CHAR_BIT - 1);
348 }
349 return r;
350 }
351
352 [[nodiscard]] constexpr T const& operator[](std::size_t i) const noexcept
353 {
354 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
355 hi_axiom(i < N);
356 return v[i];
357 }
358
359 [[nodiscard]] constexpr T& operator[](std::size_t i) noexcept
360 {
361 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
362 hi_axiom(i < N);
363 return v[i];
364 }
365
366 [[nodiscard]] constexpr reference front() noexcept
367 {
368 return v.front();
369 }
370
371 [[nodiscard]] constexpr const_reference front() const noexcept
372 {
373 return v.front();
374 }
375
376 [[nodiscard]] constexpr reference back() noexcept
377 {
378 return v.back();
379 }
380
381 [[nodiscard]] constexpr const_reference back() const noexcept
382 {
383 return v.back();
384 }
385
386 [[nodiscard]] constexpr pointer data() noexcept
387 {
388 return v.data();
389 }
390
391 [[nodiscard]] constexpr const_pointer data() const noexcept
392 {
393 return v.data();
394 }
395
396 [[nodiscard]] constexpr iterator begin() noexcept
397 {
398 return v.begin();
399 }
400
401 [[nodiscard]] constexpr const_iterator begin() const noexcept
402 {
403 return v.begin();
404 }
405
406 [[nodiscard]] constexpr const_iterator cbegin() const noexcept
407 {
408 return v.cbegin();
409 }
410
411 [[nodiscard]] constexpr iterator end() noexcept
412 {
413 return v.end();
414 }
415
416 [[nodiscard]] constexpr const_iterator end() const noexcept
417 {
418 return v.end();
419 }
420
421 [[nodiscard]] constexpr const_iterator cend() const noexcept
422 {
423 return v.cend();
424 }
425
426 [[nodiscard]] constexpr bool empty() const noexcept
427 {
428 return v.empty();
429 }
430
431 HI_X_accessor(0, x);
432 HI_X_accessor(1, y);
433 HI_X_accessor(2, z);
434 HI_X_accessor(3, w);
435 HI_X_accessor(0, r);
436 HI_X_accessor(1, g);
437 HI_X_accessor(2, b);
438 HI_X_accessor(3, a);
439 HI_X_accessor(0, width);
440 HI_X_accessor(1, height);
441 HI_X_accessor(2, depth);
442
443 HI_X_binary_math_op(+);
444 HI_X_binary_math_op(-);
445 HI_X_binary_math_op(*);
446 HI_X_binary_math_op(/);
447 HI_X_binary_math_op(%);
448
449 HI_X_binary_cmp_op(==);
450 HI_X_binary_cmp_op(!=);
451 HI_X_binary_cmp_op(<);
452 HI_X_binary_cmp_op(>);
453 HI_X_binary_cmp_op(<=);
454 HI_X_binary_cmp_op(>=);
455
456 HI_X_binary_bit_op(^);
457 HI_X_binary_bit_op(&);
458 HI_X_binary_bit_op(|);
459 HI_X_binary_shift_op(<<);
460 HI_X_binary_shift_op(>>);
461
462 [[nodiscard]] friend constexpr bool equal(simd lhs, simd rhs) noexcept
463 {
464 HI_X_runtime_evaluate_if_valid(equal(lhs.reg(), rhs.reg()));
465
466 for (auto i = 0_uz; i != N; ++i) {
467 if (lhs.v[i] != rhs.v[i]) {
468 return false;
469 }
470 }
471 return true;
472 }
473
474 HI_X_binary_op_broadcast(==);
475 HI_X_binary_op_broadcast(!=);
476 HI_X_binary_op_broadcast(<);
477 HI_X_binary_op_broadcast(>);
478 HI_X_binary_op_broadcast(<=);
479 HI_X_binary_op_broadcast(>=);
480 HI_X_binary_op_broadcast(+);
481 HI_X_binary_op_broadcast(-);
482 HI_X_binary_op_broadcast(*);
483 HI_X_binary_op_broadcast(/);
484 HI_X_binary_op_broadcast(%);
485 HI_X_binary_op_broadcast(&);
486 HI_X_binary_op_broadcast(|);
487 HI_X_binary_op_broadcast(^);
488
489 HI_X_inplace_op(+=, +);
490 HI_X_inplace_op(-=, -);
491 HI_X_inplace_op(*=, *);
492 HI_X_inplace_op(/=, /);
493 HI_X_inplace_op(%=, %);
494 HI_X_inplace_op(|=, |);
495 HI_X_inplace_op(&=, &);
496 HI_X_inplace_op(^=, ^);
497 HI_X_inplace_op(<<=, <<);
498 HI_X_inplace_op(>>=, >>);
499
504 template<std::size_t I>
505 [[nodiscard]] friend constexpr T& get(simd& rhs) noexcept
506 {
507 static_assert(I < N, "Index out of bounds");
508 return std::get<I>(rhs.v);
509 }
510
515 template<std::size_t I>
516 [[nodiscard]] friend constexpr T get(simd const& rhs) noexcept
517 {
518 static_assert(I < N, "Index out of bounds");
519 HI_X_runtime_evaluate_if_valid(get<I>(rhs.reg()));
520 return std::get<I>(rhs.v);
521 }
522
530 template<std::size_t I>
531 [[nodiscard]] constexpr friend simd insert(simd const& lhs, value_type rhs) noexcept
532 {
533 static_assert(I < size);
534 HI_X_runtime_evaluate_if_valid(simd{insert<I>(lhs.reg(), rhs)});
535
536 auto r = lhs;
537 std::get<I>(r.v) = rhs;
538 return r;
539 }
540
545 template<std::size_t Mask = ~std::size_t{0}>
546 [[nodiscard]] friend constexpr simd set_zero(simd rhs) noexcept
547 {
548 HI_X_runtime_evaluate_if_valid(simd{set_zero<Mask>(rhs.reg())});
549
550 auto r = simd{};
551 for (std::size_t i = 0; i != N; ++i) {
552 if (to_bool((Mask >> i) & 1)) {
553 r.v[i] = T{0};
554 } else {
555 r.v[i] = rhs.v[i];
556 }
557 }
558 return r;
559 }
560
568 template<std::size_t Mask>
569 [[nodiscard]] friend constexpr simd blend(simd const& lhs, simd const& rhs) noexcept
570 {
571 HI_X_runtime_evaluate_if_valid(simd{blend<Mask>(lhs.reg(), rhs.reg())});
572
573 auto r = simd{};
574 for (std::size_t i = 0; i != N; ++i) {
575 r[i] = to_bool((Mask >> i) & 1) ? rhs[i] : lhs[i];
576 }
577 return r;
578 }
579
582 [[nodiscard]] friend constexpr simd blend(simd const& a, simd const& b, simd const& mask)
583 {
584 HI_X_runtime_evaluate_if_valid(simd{blend(a.reg(), b.reg(), mask.reg())});
585
586 auto r = simd{};
587 for (std::size_t i = 0; i != N; ++i) {
588 r[i] = mask[i] != T{0} ? b[i] : a[i];
589 }
590 return r;
591 }
592
597 template<std::size_t Mask>
598 [[nodiscard]] friend constexpr simd neg(simd rhs) noexcept
599 {
600 return blend<Mask>(rhs, -rhs);
601 }
602
603 [[nodiscard]] friend constexpr simd operator-(simd const& rhs) noexcept
604 {
605 HI_X_runtime_evaluate_if_valid(simd{-rhs.reg()});
606 return T{0} - rhs;
607 }
608
609 [[nodiscard]] friend constexpr simd abs(simd const& rhs) noexcept
610 {
611 HI_X_runtime_evaluate_if_valid(simd{abs(rhs.reg())});
612 return max(rhs, -rhs);
613 }
614
615 [[nodiscard]] friend constexpr simd rcp(simd const& rhs) noexcept
616 {
617 HI_X_runtime_evaluate_if_valid(simd{rcp(rhs.reg())});
618 return T{1} / rhs;
619 }
620
621 [[nodiscard]] friend constexpr simd sqrt(simd const& rhs) noexcept
622 {
623 HI_X_runtime_evaluate_if_valid(simd{sqrt(rhs.reg())});
624
625 auto r = simd{};
626 for (std::size_t i = 0; i != N; ++i) {
627 r[i] = std::sqrt(rhs.v[i]);
628 }
629 return r;
630 }
631
632 [[nodiscard]] friend constexpr simd rcp_sqrt(simd const& rhs) noexcept
633 {
634 HI_X_runtime_evaluate_if_valid(simd{rcp_sqrt(rhs.reg())});
635 return rcp(sqrt(rhs));
636 }
637
638 [[nodiscard]] friend constexpr simd floor(simd const& rhs) noexcept
639 requires(std::is_floating_point_v<value_type>)
640 {
641 HI_X_runtime_evaluate_if_valid(simd{floor(rhs.reg())});
642
643 auto r = simd{};
644 for (std::size_t i = 0; i != N; ++i) {
645 r[i] = std::floor(rhs.v[i]);
646 }
647 return r;
648 }
649
650 [[nodiscard]] friend constexpr simd ceil(simd const& rhs) noexcept
651 requires(std::is_floating_point_v<value_type>)
652 {
653 HI_X_runtime_evaluate_if_valid(simd{ceil(rhs.reg())});
654
655 auto r = simd{};
656 for (std::size_t i = 0; i != N; ++i) {
657 r[i] = std::ceil(rhs.v[i]);
658 }
659 return r;
660 }
661
662 [[nodiscard]] friend constexpr simd round(simd const& rhs) noexcept
663 requires(std::is_floating_point_v<value_type>)
664 {
665 HI_X_runtime_evaluate_if_valid(simd{round(rhs.reg())});
666
667 auto r = simd{};
668 for (std::size_t i = 0; i != N; ++i) {
669 r[i] = std::round(rhs.v[i]);
670 }
671 return r;
672 }
673
681 template<std::size_t Mask>
682 [[nodiscard]] hi_force_inline friend constexpr T dot(simd const& lhs, simd const& rhs) noexcept
683 {
684 HI_X_runtime_evaluate_if_valid(get<0>(dot<Mask>(lhs.reg(), rhs.reg())));
685
686 auto r = T{};
687 for (std::size_t i = 0; i != N; ++i) {
688 if (to_bool(Mask & (1_uz << i))) {
689 r += lhs.v[i] * rhs.v[i];
690 }
691 }
692 return r;
693 }
694
701 template<std::size_t Mask>
702 [[nodiscard]] friend T hypot(simd const& rhs) noexcept
703 requires(std::is_floating_point_v<value_type>)
704 {
705 HI_X_runtime_evaluate_if_valid(get<0>(sqrt(dot<Mask>(rhs.reg(), rhs.reg()))));
706 return std::sqrt(dot<Mask>(rhs, rhs));
707 }
708
715 template<std::size_t Mask>
716 [[nodiscard]] hi_force_inline friend constexpr T squared_hypot(simd const& rhs) noexcept
717 {
718 HI_X_runtime_evaluate_if_valid(get<0>(dot<Mask>(rhs.reg(), rhs.reg())));
719 return dot<Mask>(rhs, rhs);
720 }
721
727 template<std::size_t Mask>
728 [[nodiscard]] friend constexpr T rcp_hypot(simd const& rhs) noexcept
729 {
730 HI_X_runtime_evaluate_if_valid(get<0>(rcp_sqrt(dot<Mask>(rhs.reg(), rhs.reg()))));
731 return 1.0f / hypot<Mask>(rhs);
732 }
733
741 template<std::size_t Mask>
742 [[nodiscard]] friend constexpr simd normalize(simd const& rhs) noexcept
743 {
744 HI_X_runtime_evaluate_if_valid(simd{rhs * rcp_sqrt(dot<Mask>(rhs.reg(), rhs.reg()))});
745
746 hilet rcp_hypot_ = rcp_hypot<Mask>(rhs);
747
748 auto r = simd{};
749 for (std::size_t i = 0; i != N; ++i) {
750 if (to_bool(Mask & (1_uz << i))) {
751 r.v[i] = rhs.v[i] * rcp_hypot_;
752 }
753 }
754 return r;
755 }
756
761 [[nodiscard]] friend constexpr simd rotl(simd const& lhs, unsigned int rhs) noexcept
762 {
763 hi_axiom(rhs > 0 and rhs < sizeof(value_type) * CHAR_BIT);
764
765 hilet remainder = narrow_cast<unsigned int>(sizeof(value_type) * CHAR_BIT - rhs);
766
767 return (lhs << rhs) | (lhs >> remainder);
768 }
769
774 [[nodiscard]] friend constexpr simd rotr(simd const& lhs, unsigned int rhs) noexcept
775 {
776 hi_axiom(rhs > 0 and rhs < sizeof(value_type) * CHAR_BIT);
777
778 hilet remainder = narrow_cast<unsigned int>(sizeof(value_type) * CHAR_BIT - rhs);
779
780 return (lhs >> rhs) | (lhs << remainder);
781 }
782
783 [[nodiscard]] friend constexpr simd min(simd const& lhs, simd const& rhs) noexcept
784 {
785 HI_X_runtime_evaluate_if_valid(simd{min(lhs.reg(), rhs.reg())});
786
787 auto r = simd{};
788 for (std::size_t i = 0; i != N; ++i) {
789 r.v[i] = std::min(lhs.v[i], rhs.v[i]);
790 }
791 return r;
792 }
793
794 [[nodiscard]] friend constexpr simd max(simd const& lhs, simd const& rhs) noexcept
795 {
796 HI_X_runtime_evaluate_if_valid(simd{max(lhs.reg(), rhs.reg())});
797
798 auto r = simd{};
799 for (std::size_t i = 0; i != N; ++i) {
800 r.v[i] = std::max(lhs.v[i], rhs.v[i]);
801 }
802 return r;
803 }
804
805 [[nodiscard]] friend constexpr simd clamp(simd const& lhs, simd const& low, simd const& high) noexcept
806 {
807 return min(max(lhs, low), high);
808 }
809
810 [[nodiscard]] friend constexpr simd hadd(simd const& lhs, simd const& rhs) noexcept
811 {
812 HI_X_runtime_evaluate_if_valid(simd{horizontal_add(lhs.reg(), rhs.reg())});
813
814 hi_axiom(N % 2 == 0);
815
816 auto r = simd{};
817
818 std::size_t src_i = 0;
819 std::size_t dst_i = 0;
820 while (src_i != N) {
821 auto tmp = lhs[src_i++];
822 tmp += lhs[src_i++];
823 r.v[dst_i++] = tmp;
824 }
825
826 src_i = 0;
827 while (src_i != N) {
828 auto tmp = rhs[src_i++];
829 tmp += rhs[src_i++];
830 r.v[dst_i++] = tmp;
831 }
832 return r;
833 }
834
835 [[nodiscard]] friend constexpr simd hsub(simd const& lhs, simd const& rhs) noexcept
836 {
837 HI_X_runtime_evaluate_if_valid(simd{horizontal_sub(lhs.reg(), rhs.reg())});
838
839 hi_axiom(N % 2 == 0);
840
841 auto r = simd{};
842
843 std::size_t src_i = 0;
844 std::size_t dst_i = 0;
845 while (src_i != N) {
846 auto tmp = lhs[src_i++];
847 tmp -= lhs[src_i++];
848 r.v[dst_i++] = tmp;
849 }
850
851 src_i = 0;
852 while (src_i != N) {
853 auto tmp = rhs[src_i++];
854 tmp -= rhs[src_i++];
855 r.v[dst_i++] = tmp;
856 }
857 return r;
858 }
859
864 template<std::size_t Mask>
865 [[nodiscard]] friend constexpr simd addsub(simd const& lhs, simd const& rhs) noexcept
866 {
867 constexpr std::size_t not_mask = (1 << N) - 1;
868 return lhs + neg<Mask ^ not_mask>(rhs);
869 }
870
873 [[nodiscard]] friend constexpr simd cross_2D(simd const& rhs) noexcept
874 requires(N >= 2)
875 {
876 return simd{-rhs.y(), rhs.x()};
877 }
878
881 [[nodiscard]] friend constexpr simd normal_2D(simd const& rhs) noexcept
882 requires(N >= 2)
883 {
884 return normalize<0b0011>(cross_2D(rhs));
885 }
886
890 [[nodiscard]] friend constexpr float cross_2D(simd const& lhs, simd const& rhs) noexcept
891 requires(N >= 2)
892 {
893 hilet tmp1 = rhs.yxwz();
894 hilet tmp2 = lhs * tmp1;
895 hilet tmp3 = hsub(tmp2, tmp2);
896 return get<0>(tmp3);
897 }
898
899 // x=a.y*b.z - a.z*b.y
900 // y=a.z*b.x - a.x*b.z
901 // z=a.x*b.y - a.y*b.x
902 // w=a.w*b.w - a.w*b.w
903 [[nodiscard]] constexpr friend simd cross_3D(simd const& lhs, simd const& rhs) noexcept
904 requires(N == 4)
905 {
906 hilet a_left = lhs.yzxw();
907 hilet b_left = rhs.zxyw();
908 hilet left = a_left * b_left;
909
910 hilet a_right = lhs.zxyw();
911 hilet b_right = rhs.yzxw();
912 hilet right = a_right * b_right;
913 return left - right;
914 }
915
916 [[nodiscard]] constexpr static simd byte_srl_shuffle_indices(unsigned int rhs)
917 requires(std::is_same_v<value_type, int8_t> and size == 16)
918 {
919 static_assert(std::endian::native == std::endian::little);
920
921 auto r = simd{};
922 for (auto i = 0; i != 16; ++i) {
923 if ((i + rhs) < 16) {
924 r[i] = narrow_cast<int8_t>(i + rhs);
925 } else {
926 // Indices set to -1 result in a zero after a byte shuffle.
927 r[i] = -1;
928 }
929 }
930 return r;
931 }
932
933 [[nodiscard]] constexpr static simd byte_sll_shuffle_indices(unsigned int rhs)
934 requires(std::is_same_v<value_type, int8_t> and size == 16)
935 {
936 static_assert(std::endian::native == std::endian::little);
937
938 auto r = simd{};
939 for (auto i = 0; i != 16; ++i) {
940 if ((i - rhs) >= 0) {
941 r[i] = narrow_cast<int8_t>(i - rhs);
942 } else {
943 // Indices set to -1 result in a zero after a byte shuffle.
944 r[i] = -1;
945 }
946 }
947 return r;
948 }
949
952 [[nodiscard]] friend constexpr simd permute(simd const& lhs, simd const& rhs) noexcept
953 requires(std::is_integral_v<value_type>)
954 {
955 HI_X_runtime_evaluate_if_valid(simd{permute(lhs.reg(), rhs.reg())});
956
957 auto r = simd{};
958 for (std::size_t i = 0; i != N; ++i) {
959 if (rhs[i] >= 0) {
960 r[i] = lhs[rhs[i] & 0xf];
961 } else {
962 r[i] = 0;
963 }
964 }
965
966 return r;
967 }
968
971 [[nodiscard]] friend constexpr simd midpoint(simd const& p1, simd const& p2) noexcept
972 {
973 return (p1 + p2) * 0.5f;
974 }
975
978 [[nodiscard]] friend constexpr simd reflect_point(simd const& p, simd const anchor) noexcept
979 {
980 return anchor - (p - anchor);
981 }
982
983 hi_warning_push();
984 // C26494 Variable '...' is uninitialized. Always initialize an object (type.5).
985 // Internal to _MM_TRANSPOSE4_PS
986 hi_warning_ignore_msvc(26494);
987 template<typename... Columns>
988 [[nodiscard]] friend constexpr std::array<simd, size> transpose(Columns const&...columns) noexcept
989 {
990 static_assert(sizeof...(Columns) == size, "Can only transpose square matrices");
991
992 if (not std::is_constant_evaluated()) {
993 if constexpr (requires { transpose(columns.reg()...); }) {
994 hilet tmp = transpose(columns.reg()...);
995 auto r = std::array<simd, size>{};
996 for (auto i = 0_uz; i != size; ++i) {
997 r[i] = simd{tmp[i]};
998 }
999 return r;
1000 }
1001 }
1002
1003 auto r = std::array<simd, N>{};
1004 auto f = [&r, &columns... ]<std::size_t... Ints>(std::index_sequence<Ints...>)
1005 {
1006 auto tf = [&r](auto i, auto v) {
1007 for (std::size_t j = 0; j != N; ++j) {
1008 r[j][i] = v[j];
1009 }
1010 return 0;
1011 };
1012 static_cast<void>((tf(Ints, columns) + ...));
1013 };
1014 f(std::make_index_sequence<sizeof...(columns)>{});
1015 return r;
1016 }
1017 hi_warning_pop();
1018
1019 [[nodiscard]] constexpr friend simd composit(simd const& under, simd const& over) noexcept
1020 requires(N == 4 && std::is_floating_point_v<T>)
1021 {
1022 if (get<3>(over) <= value_type{0}) {
1023 // fully transparent.
1024 return under;
1025 }
1026 if (get<3>(over) >= value_type{1}) {
1027 // fully opaque;
1028 return over;
1029 }
1030
1031 hilet over_alpha = over.wwww();
1032 hilet under_alpha = under.wwww();
1033
1034 hilet over_color = over.xyz1();
1035 hilet under_color = under.xyz1();
1036
1037 hilet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
1038
1039 return output_color / output_color.www1();
1040 }
1041
1042 [[nodiscard]] constexpr friend simd composit(simd const& under, simd const& over) noexcept
1043 requires(std::is_same_v<value_type, float16> and size == 4)
1044 {
1045 return simd{composit(static_cast<simd<float, 4>>(under), static_cast<simd<float, 4>>(over))};
1046 }
1047
1048 [[nodiscard]] friend std::string to_string(simd const& rhs) noexcept
1049 {
1050 auto r = std::string{};
1051
1052 r += '(';
1053 for (std::size_t i = 0; i != N; ++i) {
1054 if (i != 0) {
1055 r += "; ";
1056 }
1057 r += std::format("{}", rhs[i]);
1058 }
1059 r += ')';
1060 return r;
1061 }
1062
1063 friend std::ostream& operator<<(std::ostream& lhs, simd const& rhs)
1064 {
1065 return lhs << to_string(rhs);
1066 }
1067
1072 template<std::size_t FromElement, std::size_t ToElement>
1073 [[nodiscard]] constexpr friend simd insert(simd const& lhs, simd const& rhs)
1074 {
1075 HI_X_runtime_evaluate_if_valid(simd{insert<FromElement, ToElement>(lhs.reg(), rhs.reg())});
1076
1077 auto r = simd{};
1078 for (std::size_t i = 0; i != N; ++i) {
1079 r[i] = (i == ToElement) ? rhs[FromElement] : lhs[i];
1080 }
1081
1082 return r;
1083 }
1084
1092 template<fixed_string Order>
1093 [[nodiscard]] constexpr simd swizzle() const
1094 {
1095 static_assert(Order.size() <= N);
1096
1097 HI_X_runtime_evaluate_if_valid(simd{reg().swizzle<Order>()});
1098
1099 auto r = simd{};
1100 swizzle_detail<0, Order>(r);
1101 return r;
1102 }
1103
1104#define SWIZZLE(name, str) \
1105 [[nodiscard]] constexpr simd name() const noexcept \
1106 requires(sizeof(str) - 1 <= N) \
1107 { \
1108 return swizzle<str>(); \
1109 }
1110
1111#define SWIZZLE_4D(name, str) \
1112 SWIZZLE(name##0, str "0") \
1113 SWIZZLE(name##1, str "1") \
1114 SWIZZLE(name##x, str "a") \
1115 SWIZZLE(name##y, str "b") \
1116 SWIZZLE(name##z, str "c") \
1117 SWIZZLE(name##w, str "d")
1118
1119#define SWIZZLE_3D(name, str) \
1120 SWIZZLE_4D(name##0, str "0") \
1121 SWIZZLE_4D(name##1, str "1") \
1122 SWIZZLE_4D(name##x, str "a") \
1123 SWIZZLE_4D(name##y, str "b") \
1124 SWIZZLE_4D(name##z, str "c") \
1125 SWIZZLE_4D(name##w, str "d") \
1126 SWIZZLE(name##0, str "0") \
1127 SWIZZLE(name##1, str "1") \
1128 SWIZZLE(name##x, str "a") \
1129 SWIZZLE(name##y, str "b") \
1130 SWIZZLE(name##z, str "c") \
1131 SWIZZLE(name##w, str "d")
1132
1133#define SWIZZLE_2D(name, str) \
1134 SWIZZLE_3D(name##0, str "0") \
1135 SWIZZLE_3D(name##1, str "1") \
1136 SWIZZLE_3D(name##x, str "a") \
1137 SWIZZLE_3D(name##y, str "b") \
1138 SWIZZLE_3D(name##z, str "c") \
1139 SWIZZLE_3D(name##w, str "d") \
1140 SWIZZLE(name##0, str "0") \
1141 SWIZZLE(name##1, str "1") \
1142 SWIZZLE(name##x, str "a") \
1143 SWIZZLE(name##y, str "b") \
1144 SWIZZLE(name##z, str "c") \
1145 SWIZZLE(name##w, str "d")
1146
1147 SWIZZLE_2D(_0, "0")
1148 SWIZZLE_2D(_1, "1")
1149 SWIZZLE_2D(x, "a")
1150 SWIZZLE_2D(y, "b")
1151 SWIZZLE_2D(z, "c")
1152 SWIZZLE_2D(w, "d")
1153
1154#undef SWIZZLE
1155#undef SWIZZLE_2D
1156#undef SWIZZLE_3D
1157#undef SWIZZLE_4D
1158
1159 template<size_t I, fixed_string Order>
1160 constexpr void swizzle_detail(simd& r) const noexcept
1161 {
1162 static_assert(I < size);
1163
1164 // Get the source element, or '0'.
1165 constexpr char c = I < Order.size() ? get<I>(Order) : '0';
1166
1167 if constexpr (c == '1') {
1168 r = insert<I>(r, value_type{1});
1169
1170 } else if constexpr (c == '0') {
1171 r = insert<I>(r, value_type{0});
1172
1173 } else if constexpr (c >= 'a' and c <= 'v') {
1174 constexpr size_t src_index = c - 'a';
1175 static_assert(src_index < size);
1176
1177 r = insert<I>(r, get<src_index>(*this));
1178
1179 } else if constexpr (c >= 'w' and c <= 'z') {
1180 constexpr size_t src_index = c == 'x' ? 0 : c == 'y' ? 1 : c == 'z' ? 2 : 3;
1181 static_assert(src_index < size);
1182
1183 r = insert<I>(r, get<src_index>(*this));
1184
1185 } else {
1186 hi_static_no_default();
1187 }
1188
1189 if constexpr (I + 1 < size) {
1190 swizzle_detail<I + 1, Order>(r);
1191 }
1192 }
1193};
1194
1195using i8x1 = simd<int8_t, 1>;
1196using i8x2 = simd<int8_t, 2>;
1197using i8x4 = simd<int8_t, 4>;
1198using i8x8 = simd<int8_t, 8>;
1199using i8x16 = simd<int8_t, 16>;
1200using i8x32 = simd<int8_t, 32>;
1201using i8x64 = simd<int8_t, 64>;
1202
1203using u8x1 = simd<uint8_t, 1>;
1204using u8x2 = simd<uint8_t, 2>;
1205using u8x4 = simd<uint8_t, 4>;
1206using u8x8 = simd<uint8_t, 8>;
1207using u8x16 = simd<uint8_t, 16>;
1208using u8x32 = simd<uint8_t, 32>;
1209using u8x64 = simd<uint8_t, 64>;
1210
1211using i16x1 = simd<int16_t, 1>;
1212using i16x2 = simd<int16_t, 2>;
1213using i16x4 = simd<int16_t, 4>;
1214using i16x8 = simd<int16_t, 8>;
1217
1218using u16x1 = simd<uint16_t, 1>;
1219using u16x2 = simd<uint16_t, 2>;
1220using u16x4 = simd<uint16_t, 4>;
1221using u16x8 = simd<uint16_t, 8>;
1224
1225using f16x4 = simd<float16, 4>;
1226
1227using i32x1 = simd<int32_t, 1>;
1228using i32x2 = simd<int32_t, 2>;
1229using i32x4 = simd<int32_t, 4>;
1230using i32x8 = simd<int32_t, 8>;
1232
1233using u32x1 = simd<uint32_t, 1>;
1234using u32x2 = simd<uint32_t, 2>;
1235using u32x4 = simd<uint32_t, 4>;
1236using u32x8 = simd<uint32_t, 8>;
1238
1239using f32x1 = simd<float, 1>;
1240using f32x2 = simd<float, 2>;
1241using f32x4 = simd<float, 4>;
1242using f32x8 = simd<float, 8>;
1243using f32x16 = simd<float, 16>;
1244
1245using i64x1 = simd<int64_t, 1>;
1246using i64x2 = simd<int64_t, 2>;
1247using i64x4 = simd<int64_t, 4>;
1248using i64x8 = simd<int64_t, 8>;
1249
1250using u64x1 = simd<uint64_t, 1>;
1251using u64x2 = simd<uint64_t, 2>;
1252using u64x4 = simd<uint64_t, 4>;
1253using u64x8 = simd<uint64_t, 8>;
1254
1255using f64x1 = simd<double, 1>;
1256using f64x2 = simd<double, 2>;
1257using f64x4 = simd<double, 4>;
1258using f64x8 = simd<double, 8>;
1259
1260} // namespace hi::inline v1
1261
1262template<class T, std::size_t N>
1263struct std::tuple_size<hi::simd<T, N>> : std::integral_constant<std::size_t, N> {};
1264
1265template<std::size_t I, class T, std::size_t N>
1266struct std::tuple_element<I, hi::simd<T, N>> {
1267 using type = T;
1268};
1269
1270template<typename T, size_t N>
1271struct std::equal_to<::hi::simd<T, N>> {
1272 constexpr bool operator()(::hi::simd<T, N> const& lhs, ::hi::simd<T, N> const& rhs) const noexcept
1273 {
1274 return equal(lhs, rhs);
1275 }
1276};
1277
1278// Add equality operator to Google-test internal namespace so that ASSERT_EQ() work.
1279template<typename T, size_t N>
1280inline bool operator==(::hi::simd<T, N> lhs, ::hi::simd<T, N> rhs) noexcept
1281{
1282 return std::equal_to{}(lhs, rhs);
1283}
1284
1285// Add equality operator to Google-test internal namespace so that ASSERT_NE() work.
1286template<typename T, size_t N>
1287inline bool operator!=(::hi::simd<T, N> lhs, ::hi::simd<T, N> rhs) noexcept
1288{
1289 return not std::equal_to<::hi::simd<T, N>>{}(lhs, rhs);
1290}
1291
1292#undef HI_X_accessor
1293#undef HI_X_binary_cmp_op
1294#undef HI_X_binary_math_op
1295#undef HI_X_binary_bit_op
1296#undef HI_X_binary_shift_op
1297#undef HI_X_binary_op_broadcast
1298#undef HI_X_inplace_op
1299#undef HI_X_runtime_evaluate_if_valid
1300
1301hi_warning_pop();
@ other
The gui_event does not have associated data.
STL namespace.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
hi_export void composit(pixmap_span< sfloat_rgba16 > dst, hi::color color, graphic_path const &mask) noexcept
Composit color onto the destination image where the mask is solid.
Definition graphic_path.hpp:667
constexpr Out load(In const *src) noexcept
Unaligned Load of a numeric value from an array.
Definition endian.hpp:93
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T begin(T... args)
T ceil(T... args)
T end(T... args)
T equal(T... args)
T floor(T... args)
T hypot(T... args)
T left(T... args)
T max(T... args)
T memcpy(T... args)
T min(T... args)
T operator!=(T... args)
T remainder(T... args)
T round(T... args)
T sqrt(T... args)
T to_string(T... args)