HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
simd.hpp
1// Copyright Take Vos 2020-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_f32x4_sse.hpp"
8#include "native_f64x4_avx.hpp"
9#include "native_i32x4_sse2.hpp"
10#include "native_i64x4_avx2.hpp"
11#include "native_u32x4_sse2.hpp"
12#include "native_simd_conversions_x86.hpp"
13
14#include "../utility/module.hpp"
15#include <cstdint>
16#include <ostream>
17#include <string>
18#include <array>
19#include <type_traits>
20#include <concepts>
21#include <bit>
22#include <climits>
23#include <utility>
24
25hi_warning_push();
26// C4702 unreachable code: Suppressed due intrinsics and std::is_constant_evaluated()
27hi_warning_ignore_msvc(4702);
28// C26467: Converting from floating point to unsigned integral types results in non-portable code if...
29// We are trying to get SIMD intrinsics to work the same as scalar functions.
30hi_warning_ignore_msvc(26467);
31// C26472: Don't use static_cast for arithmetic conversions.
32// We are trying to get SIMD intrinsics to work the same as scalar functions.
33hi_warning_ignore_msvc(26472)
34
35#define HI_X_runtime_evaluate_if_valid(...) \
36 do { \
37 if (not std::is_constant_evaluated()) { \
38 if constexpr (requires { __VA_ARGS__; }) { \
39 return __VA_ARGS__; \
40 } \
41 } \
42 } while (false)
43
44#define HI_X_accessor(index, name) \
45 [[nodiscard]] constexpr T name() const noexcept \
46 requires(N > index) \
47 { \
48 HI_X_runtime_evaluate_if_valid(get<index>(reg())); \
49 return std::get<index>(v); \
50 } \
51\
52 [[nodiscard]] constexpr T& name() noexcept \
53 requires(N > index) \
54 { \
55 return std::get<index>(v); \
56 }
57
58#define HI_X_binary_math_op(op) \
59 [[nodiscard]] friend constexpr simd operator op(simd lhs, simd rhs) noexcept \
60 requires(requires(value_type a, value_type b) { a op b; }) \
61 { \
62 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs.reg()}); \
63\
64 auto r = simd{}; \
65 for (std::size_t i = 0; i != N; ++i) { \
66 r[i] = lhs[i] op rhs[i]; \
67 } \
68 return r; \
69 }
70
71#define HI_X_binary_bit_op(op) \
72 [[nodiscard]] friend constexpr simd operator op(simd lhs, simd rhs) noexcept \
73 { \
74 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs.reg()}); \
75\
76 auto r = simd{}; \
77 for (std::size_t i = 0; i != N; ++i) { \
78 hilet lhs_vi = std::bit_cast<unsigned_type>(lhs[i]); \
79 hilet rhs_vi = std::bit_cast<unsigned_type>(rhs[i]); \
80 hilet r_vi = static_cast<unsigned_type>(lhs_vi op rhs_vi); \
81 r[i] = std::bit_cast<value_type>(r_vi); \
82 } \
83 return r; \
84 }
85
86#define HI_X_binary_shift_op(op) \
87 [[nodiscard]] friend constexpr simd operator op(simd const& lhs, unsigned int rhs) noexcept \
88 { \
89 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs}); \
90\
91 auto r = simd{}; \
92 for (std::size_t i = 0; i != N; ++i) { \
93 r.v[i] = lhs.v[i] op rhs; \
94 } \
95 return r; \
96 }
97
98#define HI_X_binary_cmp_op(op) \
99 [[nodiscard]] friend constexpr simd operator op(simd lhs, simd rhs) noexcept \
100 requires(requires(value_type a, value_type b) { a op b; }) \
101 { \
102 HI_X_runtime_evaluate_if_valid(simd{lhs.reg() op rhs.reg()}); \
103\
104 auto r = simd{}; \
105 for (std::size_t i = 0; i != N; ++i) { \
106 r[i] = lhs[i] op rhs[i] ? ones_value : zero_value; \
107 } \
108 return r; \
109 }
110
111#define HI_X_binary_op_broadcast(op) \
112 [[nodiscard]] friend constexpr auto operator op(value_type lhs, simd rhs) noexcept \
113 requires(requires(value_type a, value_type b) { a op b; }) \
114 { \
115 return broadcast(lhs) op rhs; \
116 } \
117\
118 [[nodiscard]] friend constexpr auto operator op(simd lhs, value_type rhs) noexcept \
119 requires(requires(value_type a, value_type b) { a op b; }) \
120 { \
121 return lhs op broadcast(rhs); \
122 }
123
124#define HI_X_inplace_op(long_op, short_op) \
125 constexpr simd& operator long_op(auto rhs) noexcept \
126 requires(requires { *this short_op rhs; }) \
127 { \
128 return *this = *this short_op rhs; \
129 }
130
131namespace hi::inline v1 {
132
133template<numeric_limited T, std::size_t N>
134struct simd {
135 using value_type = T;
136 constexpr static size_t size = N;
137
138 using unsigned_type = make_uintxx_t<sizeof(value_type) * CHAR_BIT>;
139
140 constexpr static bool has_native_type = requires { typename native_simd<value_type, size>::value_type; };
141 using native_type = native_simd<value_type, size>;
142
143 using array_type = std::array<value_type, size>;
144 using size_type = typename array_type::size_type;
145 using difference_type = typename array_type::difference_type;
146 using reference = typename array_type::reference;
147 using const_reference = typename array_type::const_reference;
148 using pointer = typename array_type::pointer;
149 using const_pointer = typename array_type::const_pointer;
150 using iterator = typename array_type::iterator;
151 using const_iterator = typename array_type::const_iterator;
152
153 constexpr static value_type zero_value = value_type{};
154 constexpr static value_type ones_value = std::bit_cast<value_type>(std::numeric_limits<unsigned_type>::max());
155
156 array_type v;
157
158 constexpr simd() noexcept
159 {
160 if (not std::is_constant_evaluated()) {
161 if constexpr (requires { *this = simd{native_type{}}; }) {
162 *this = simd{native_type{}};
163 }
164 }
165 v = array_type{};
166 }
167
168 constexpr simd(simd const& rhs) noexcept = default;
169 constexpr simd(simd&& rhs) noexcept = default;
170 constexpr simd& operator=(simd const& rhs) noexcept = default;
171 constexpr simd& operator=(simd&& rhs) noexcept = default;
172
173 template<numeric_limited U>
174 [[nodiscard]] constexpr explicit simd(simd<U, N> const& other) noexcept
175 {
176 if (not std::is_constant_evaluated()) {
177 if constexpr (requires { *this = simd{native_type{other.reg()}}; }) {
178 *this = simd{native_type{other.reg()}};
179 return;
180 }
181 }
182
183 for (std::size_t i = 0; i != N; ++i) {
184 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
185 // SSE conversion round floats before converting to integer.
186 v[i] = static_cast<value_type>(std::round(other[i]));
187 } else {
188 v[i] = static_cast<value_type>(other[i]);
189 }
190 }
191 }
192
193 template<numeric_limited U>
194 [[nodiscard]] constexpr explicit simd(simd<U, size / 2> const& a, simd<U, size / 2> const& b) noexcept
195 {
196 if (not std::is_constant_evaluated()) {
197 if constexpr (requires { simd{native_type{a.reg(), b.reg()}}; }) {
198 *this = simd{native_type{a.reg(), b.reg()}};
199 return;
200 }
201 }
202
203 for (std::size_t i = 0; i != size; ++i) {
204 hilet tmp = i < (size / 2) ? a[i] : b[i];
205 if constexpr (std::is_integral_v<T> and std::is_floating_point_v<U>) {
206 // SSE conversion round floats before converting to integer.
207 v[i] = static_cast<value_type>(std::round(tmp));
208 } else {
209 v[i] = static_cast<value_type>(tmp);
210 }
211 }
212 }
213
214 template<std::convertible_to<value_type>... Args>
215 [[nodiscard]] constexpr explicit simd(value_type first, Args... args) noexcept
216 {
217 if (not std::is_constant_evaluated()) {
218 if constexpr (requires { simd{native_type{first, static_cast<value_type>(args)...}}; }) {
219 *this = simd{native_type{first, static_cast<value_type>(args)...}};
220 return;
221 }
222 }
223
224 v = array_type{first, static_cast<value_type>(args)...};
225 }
226
227 [[nodiscard]] static constexpr simd broadcast(T rhs) noexcept
228 {
229 HI_X_runtime_evaluate_if_valid(simd{native_type::broadcast(rhs)});
230
231 auto r = simd{};
232 for (std::size_t i = 0; i != N; ++i) {
233 r[i] = rhs;
234 }
235 return r;
236 }
237
238 [[nodiscard]] static constexpr simd epsilon() noexcept
239 {
240 if constexpr (std::is_floating_point_v<T>) {
241 return broadcast(std::numeric_limits<T>::epsilon());
242 } else {
243 return broadcast(T{0});
244 }
245 }
246
247 [[nodiscard]] simd(std::array<T, N> const& rhs) noexcept : v(rhs) {}
248
249 simd& operator=(std::array<T, N> const& rhs) noexcept
250 {
251 v = rhs;
252 return *this;
253 }
254
255 [[nodiscard]] operator std::array<T, N>() const noexcept
256 {
257 return v;
258 }
259
260 [[nodiscard]] explicit simd(native_type rhs) noexcept
261 requires(requires { typename native_type::value_type; })
262 : v(static_cast<array_type>(rhs))
263 {
264 }
265
266 [[nodiscard]] auto reg() const noexcept
267 requires(requires { typename native_type::value_type; })
268 {
269 return native_type{v};
270 }
271
272 template<numeric_limited O, size_t M>
273 [[nodiscard]] constexpr static simd cast_from(simd<O, M> const& rhs) noexcept
274 requires(sizeof(simd<O, M>) == sizeof(simd))
275 {
276 HI_X_runtime_evaluate_if_valid(simd{native_type::cast_from(rhs.reg())});
277
278 return std::bit_cast<simd>(rhs);
279 }
280
285 template<std::size_t S>
286 [[nodiscard]] static constexpr simd load(std::byte const *ptr) noexcept
287 {
288 HI_X_runtime_evaluate_if_valid(simd{native_type{ptr}});
289
290 auto r = simd{};
291 std::memcpy(&r, ptr, S);
292 return r;
293 }
294
299 [[nodiscard]] static constexpr simd load(std::byte const *ptr) noexcept
300 {
301 HI_X_runtime_evaluate_if_valid(simd{native_type{ptr}});
302
303 auto r = simd{};
304 std::memcpy(&r, ptr, sizeof(r));
305 return r;
306 }
307
312 [[nodiscard]] static constexpr simd load(T const *ptr) noexcept
313 {
314 HI_X_runtime_evaluate_if_valid(simd{native_type{ptr}});
315
316 auto r = simd{};
317 std::memcpy(&r, ptr, sizeof(r));
318 return r;
319 }
320
321 template<std::size_t S>
322 constexpr void store(std::byte *ptr) const noexcept
323 {
324 HI_X_runtime_evaluate_if_valid(reg().store(ptr));
325 std::memcpy(ptr, this, S);
326 }
327
331 constexpr void store(std::byte *ptr) const noexcept
332 {
333 HI_X_runtime_evaluate_if_valid(reg().store(ptr));
334 store<sizeof(*this)>(ptr);
335 }
336
337 [[nodiscard]] constexpr size_t mask() const noexcept
338 {
339 HI_X_runtime_evaluate_if_valid(reg().mask());
340
341 auto r = 0_uz;
342 for (auto i = N; i != 0; --i) {
343 r <<= 1;
344 r |= std::bit_cast<unsigned_type>(v[i - 1]) >> (sizeof(unsigned_type) * CHAR_BIT - 1);
345 }
346 return r;
347 }
348
349 [[nodiscard]] constexpr T const& operator[](std::size_t i) const noexcept
350 {
351 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
352 hi_axiom(i < N);
353 return v[i];
354 }
355
356 [[nodiscard]] constexpr T& operator[](std::size_t i) noexcept
357 {
358 static_assert(std::endian::native == std::endian::little, "Indices need to be reversed on big endian machines");
359 hi_axiom(i < N);
360 return v[i];
361 }
362
363 [[nodiscard]] constexpr reference front() noexcept
364 {
365 return v.front();
366 }
367
368 [[nodiscard]] constexpr const_reference front() const noexcept
369 {
370 return v.front();
371 }
372
373 [[nodiscard]] constexpr reference back() noexcept
374 {
375 return v.back();
376 }
377
378 [[nodiscard]] constexpr const_reference back() const noexcept
379 {
380 return v.back();
381 }
382
383 [[nodiscard]] constexpr pointer data() noexcept
384 {
385 return v.data();
386 }
387
388 [[nodiscard]] constexpr const_pointer data() const noexcept
389 {
390 return v.data();
391 }
392
393 [[nodiscard]] constexpr iterator begin() noexcept
394 {
395 return v.begin();
396 }
397
398 [[nodiscard]] constexpr const_iterator begin() const noexcept
399 {
400 return v.begin();
401 }
402
403 [[nodiscard]] constexpr const_iterator cbegin() const noexcept
404 {
405 return v.cbegin();
406 }
407
408 [[nodiscard]] constexpr iterator end() noexcept
409 {
410 return v.end();
411 }
412
413 [[nodiscard]] constexpr const_iterator end() const noexcept
414 {
415 return v.end();
416 }
417
418 [[nodiscard]] constexpr const_iterator cend() const noexcept
419 {
420 return v.cend();
421 }
422
423 [[nodiscard]] constexpr bool empty() const noexcept
424 {
425 return v.empty();
426 }
427
428 HI_X_accessor(0, x);
429 HI_X_accessor(1, y);
430 HI_X_accessor(2, z);
431 HI_X_accessor(3, w);
432 HI_X_accessor(0, r);
433 HI_X_accessor(1, g);
434 HI_X_accessor(2, b);
435 HI_X_accessor(3, a);
436 HI_X_accessor(0, width);
437 HI_X_accessor(1, height);
438 HI_X_accessor(2, depth);
439
440 HI_X_binary_math_op(+);
441 HI_X_binary_math_op(-);
442 HI_X_binary_math_op(*);
443 HI_X_binary_math_op(/);
444 HI_X_binary_math_op(%);
445
446 HI_X_binary_cmp_op(==);
447 HI_X_binary_cmp_op(!=);
448 HI_X_binary_cmp_op(<);
449 HI_X_binary_cmp_op(>);
450 HI_X_binary_cmp_op(<=);
451 HI_X_binary_cmp_op(>=);
452
453 HI_X_binary_bit_op(^);
454 HI_X_binary_bit_op(&);
455 HI_X_binary_bit_op(|);
456 HI_X_binary_shift_op(<<);
457 HI_X_binary_shift_op(>>);
458
459 [[nodiscard]] friend constexpr bool equal(simd lhs, simd rhs) noexcept
460 {
461 HI_X_runtime_evaluate_if_valid(equal(lhs.reg(), rhs.reg()));
462
463 for (auto i = 0_uz; i != N; ++i) {
464 if (lhs.v[i] != rhs.v[i]) {
465 return false;
466 }
467 }
468 return true;
469 }
470
471 HI_X_binary_op_broadcast(==);
472 HI_X_binary_op_broadcast(!=);
473 HI_X_binary_op_broadcast(<);
474 HI_X_binary_op_broadcast(>);
475 HI_X_binary_op_broadcast(<=);
476 HI_X_binary_op_broadcast(>=);
477 HI_X_binary_op_broadcast(+);
478 HI_X_binary_op_broadcast(-);
479 HI_X_binary_op_broadcast(*);
480 HI_X_binary_op_broadcast(/);
481 HI_X_binary_op_broadcast(%);
482 HI_X_binary_op_broadcast(&);
483 HI_X_binary_op_broadcast(|);
484 HI_X_binary_op_broadcast(^);
485
486 HI_X_inplace_op(+=, +);
487 HI_X_inplace_op(-=, -);
488 HI_X_inplace_op(*=, *);
489 HI_X_inplace_op(/=, /);
490 HI_X_inplace_op(%=, %);
491 HI_X_inplace_op(|=, |);
492 HI_X_inplace_op(&=, &);
493 HI_X_inplace_op(^=, ^);
494 HI_X_inplace_op(<<=, <<);
495 HI_X_inplace_op(>>=, >>);
496
501 template<std::size_t I>
502 [[nodiscard]] friend constexpr T& get(simd& rhs) noexcept
503 {
504 static_assert(I < N, "Index out of bounds");
505 return std::get<I>(rhs.v);
506 }
507
512 template<std::size_t I>
513 [[nodiscard]] friend constexpr T get(simd const& rhs) noexcept
514 {
515 static_assert(I < N, "Index out of bounds");
516 HI_X_runtime_evaluate_if_valid(get<I>(rhs.reg()));
517 return std::get<I>(rhs.v);
518 }
519
527 template<std::size_t I>
528 [[nodiscard]] constexpr friend simd insert(simd const& lhs, value_type rhs) noexcept
529 {
530 static_assert(I < size);
531 HI_X_runtime_evaluate_if_valid(simd{insert<I>(lhs.reg(), rhs)});
532
533 auto r = lhs;
534 std::get<I>(r.v) = rhs;
535 return r;
536 }
537
542 template<std::size_t Mask = ~std::size_t{0}>
543 [[nodiscard]] friend constexpr simd set_zero(simd rhs) noexcept
544 {
545 HI_X_runtime_evaluate_if_valid(simd{set_zero<Mask>(rhs.reg())});
546
547 auto r = simd{};
548 for (std::size_t i = 0; i != N; ++i) {
549 if (to_bool((Mask >> i) & 1)) {
550 r.v[i] = T{0};
551 } else {
552 r.v[i] = rhs.v[i];
553 }
554 }
555 return r;
556 }
557
565 template<std::size_t Mask>
566 [[nodiscard]] friend constexpr simd blend(simd const& lhs, simd const& rhs) noexcept
567 {
568 HI_X_runtime_evaluate_if_valid(simd{blend<Mask>(lhs.reg(), rhs.reg())});
569
570 auto r = simd{};
571 for (std::size_t i = 0; i != N; ++i) {
572 r[i] = to_bool((Mask >> i) & 1) ? rhs[i] : lhs[i];
573 }
574 return r;
575 }
576
579 [[nodiscard]] friend constexpr simd blend(simd const& a, simd const& b, simd const& mask)
580 {
581 HI_X_runtime_evaluate_if_valid(simd{blend(a.reg(), b.reg(), mask.reg())});
582
583 auto r = simd{};
584 for (std::size_t i = 0; i != N; ++i) {
585 r[i] = mask[i] != T{0} ? b[i] : a[i];
586 }
587 return r;
588 }
589
594 template<std::size_t Mask>
595 [[nodiscard]] friend constexpr simd neg(simd rhs) noexcept
596 {
597 return blend<Mask>(rhs, -rhs);
598 }
599
600 [[nodiscard]] friend constexpr simd operator-(simd const& rhs) noexcept
601 {
602 HI_X_runtime_evaluate_if_valid(simd{-rhs.reg()});
603 return T{0} - rhs;
604 }
605
606 [[nodiscard]] friend constexpr simd abs(simd const& rhs) noexcept
607 {
608 HI_X_runtime_evaluate_if_valid(simd{abs(rhs.reg())});
609 return max(rhs, -rhs);
610 }
611
612 [[nodiscard]] friend constexpr simd rcp(simd const& rhs) noexcept
613 {
614 HI_X_runtime_evaluate_if_valid(simd{rcp(rhs.reg())});
615 return T{1} / rhs;
616 }
617
618 [[nodiscard]] friend constexpr simd sqrt(simd const& rhs) noexcept
619 {
620 HI_X_runtime_evaluate_if_valid(simd{sqrt(rhs.reg())});
621
622 auto r = simd{};
623 for (std::size_t i = 0; i != N; ++i) {
624 r[i] = std::sqrt(rhs.v[i]);
625 }
626 return r;
627 }
628
629 [[nodiscard]] friend constexpr simd rcp_sqrt(simd const& rhs) noexcept
630 {
631 HI_X_runtime_evaluate_if_valid(simd{rcp_sqrt(rhs.reg())});
632 return rcp(sqrt(rhs));
633 }
634
635 [[nodiscard]] friend constexpr simd floor(simd const& rhs) noexcept
636 requires(std::is_floating_point_v<value_type>)
637 {
638 HI_X_runtime_evaluate_if_valid(simd{floor(rhs.reg())});
639
640 auto r = simd{};
641 for (std::size_t i = 0; i != N; ++i) {
642 r[i] = std::floor(rhs.v[i]);
643 }
644 return r;
645 }
646
647 [[nodiscard]] friend constexpr simd ceil(simd const& rhs) noexcept
648 requires(std::is_floating_point_v<value_type>)
649 {
650 HI_X_runtime_evaluate_if_valid(simd{ceil(rhs.reg())});
651
652 auto r = simd{};
653 for (std::size_t i = 0; i != N; ++i) {
654 r[i] = std::ceil(rhs.v[i]);
655 }
656 return r;
657 }
658
659 [[nodiscard]] friend constexpr simd round(simd const& rhs) noexcept
660 requires(std::is_floating_point_v<value_type>)
661 {
662 HI_X_runtime_evaluate_if_valid(simd{round(rhs.reg())});
663
664 auto r = simd{};
665 for (std::size_t i = 0; i != N; ++i) {
666 r[i] = std::round(rhs.v[i]);
667 }
668 return r;
669 }
670
678 template<std::size_t Mask>
679 [[nodiscard]] hi_force_inline friend constexpr T dot(simd const& lhs, simd const& rhs) noexcept
680 {
681 HI_X_runtime_evaluate_if_valid(get<0>(dot<Mask>(lhs.reg(), rhs.reg())));
682
683 auto r = T{};
684 for (std::size_t i = 0; i != N; ++i) {
685 if (to_bool(Mask & (1_uz << i))) {
686 r += lhs.v[i] * rhs.v[i];
687 }
688 }
689 return r;
690 }
691
698 template<std::size_t Mask>
699 [[nodiscard]] friend T hypot(simd const& rhs) noexcept
700 requires(std::is_floating_point_v<value_type>)
701 {
702 HI_X_runtime_evaluate_if_valid(get<0>(sqrt(dot<Mask>(rhs.reg(), rhs.reg()))));
703 return std::sqrt(dot<Mask>(rhs, rhs));
704 }
705
712 template<std::size_t Mask>
713 [[nodiscard]] hi_force_inline friend constexpr T squared_hypot(simd const& rhs) noexcept
714 {
715 HI_X_runtime_evaluate_if_valid(get<0>(dot<Mask>(rhs.reg(), rhs.reg())));
716 return dot<Mask>(rhs, rhs);
717 }
718
724 template<std::size_t Mask>
725 [[nodiscard]] friend constexpr T rcp_hypot(simd const& rhs) noexcept
726 {
727 HI_X_runtime_evaluate_if_valid(get<0>(rcp_sqrt(dot<Mask>(rhs.reg(), rhs.reg()))));
728 return 1.0f / hypot<Mask>(rhs);
729 }
730
738 template<std::size_t Mask>
739 [[nodiscard]] friend constexpr simd normalize(simd const& rhs) noexcept
740 {
741 HI_X_runtime_evaluate_if_valid(simd{rhs * rcp_sqrt(dot<Mask>(rhs.reg(), rhs.reg()))});
742
743 hilet rcp_hypot_ = rcp_hypot<Mask>(rhs);
744
745 auto r = simd{};
746 for (std::size_t i = 0; i != N; ++i) {
747 if (to_bool(Mask & (1_uz << i))) {
748 r.v[i] = rhs.v[i] * rcp_hypot_;
749 }
750 }
751 return r;
752 }
753
758 [[nodiscard]] friend constexpr simd rotl(simd const& lhs, unsigned int rhs) noexcept
759 {
760 hi_axiom(rhs > 0 and rhs < sizeof(value_type) * CHAR_BIT);
761
762 hilet remainder = narrow_cast<unsigned int>(sizeof(value_type) * CHAR_BIT - rhs);
763
764 return (lhs << rhs) | (lhs >> remainder);
765 }
766
771 [[nodiscard]] friend constexpr simd rotr(simd const& lhs, unsigned int rhs) noexcept
772 {
773 hi_axiom(rhs > 0 and rhs < sizeof(value_type) * CHAR_BIT);
774
775 hilet remainder = narrow_cast<unsigned int>(sizeof(value_type) * CHAR_BIT - rhs);
776
777 return (lhs >> rhs) | (lhs << remainder);
778 }
779
780 [[nodiscard]] friend constexpr simd min(simd const& lhs, simd const& rhs) noexcept
781 {
782 HI_X_runtime_evaluate_if_valid(simd{min(lhs.reg(), rhs.reg())});
783
784 auto r = simd{};
785 for (std::size_t i = 0; i != N; ++i) {
786 r.v[i] = std::min(lhs.v[i], rhs.v[i]);
787 }
788 return r;
789 }
790
791 [[nodiscard]] friend constexpr simd max(simd const& lhs, simd const& rhs) noexcept
792 {
793 HI_X_runtime_evaluate_if_valid(simd{max(lhs.reg(), rhs.reg())});
794
795 auto r = simd{};
796 for (std::size_t i = 0; i != N; ++i) {
797 r.v[i] = std::max(lhs.v[i], rhs.v[i]);
798 }
799 return r;
800 }
801
802 [[nodiscard]] friend constexpr simd clamp(simd const& lhs, simd const& low, simd const& high) noexcept
803 {
804 return min(max(lhs, low), high);
805 }
806
807 [[nodiscard]] friend constexpr simd hadd(simd const& lhs, simd const& rhs) noexcept
808 {
809 HI_X_runtime_evaluate_if_valid(simd{horizontal_add(lhs.reg(), rhs.reg())});
810
811 hi_axiom(N % 2 == 0);
812
813 auto r = simd{};
814
815 std::size_t src_i = 0;
816 std::size_t dst_i = 0;
817 while (src_i != N) {
818 auto tmp = lhs[src_i++];
819 tmp += lhs[src_i++];
820 r.v[dst_i++] = tmp;
821 }
822
823 src_i = 0;
824 while (src_i != N) {
825 auto tmp = rhs[src_i++];
826 tmp += rhs[src_i++];
827 r.v[dst_i++] = tmp;
828 }
829 return r;
830 }
831
832 [[nodiscard]] friend constexpr simd hsub(simd const& lhs, simd const& rhs) noexcept
833 {
834 HI_X_runtime_evaluate_if_valid(simd{horizontal_sub(lhs.reg(), rhs.reg())});
835
836 hi_axiom(N % 2 == 0);
837
838 auto r = simd{};
839
840 std::size_t src_i = 0;
841 std::size_t dst_i = 0;
842 while (src_i != N) {
843 auto tmp = lhs[src_i++];
844 tmp -= lhs[src_i++];
845 r.v[dst_i++] = tmp;
846 }
847
848 src_i = 0;
849 while (src_i != N) {
850 auto tmp = rhs[src_i++];
851 tmp -= rhs[src_i++];
852 r.v[dst_i++] = tmp;
853 }
854 return r;
855 }
856
861 template<std::size_t Mask>
862 [[nodiscard]] friend constexpr simd addsub(simd const& lhs, simd const& rhs) noexcept
863 {
864 constexpr std::size_t not_mask = (1 << N) - 1;
865 return lhs + neg<Mask ^ not_mask>(rhs);
866 }
867
870 [[nodiscard]] friend constexpr simd cross_2D(simd const& rhs) noexcept
871 requires(N >= 2)
872 {
873 return simd{-rhs.y(), rhs.x()};
874 }
875
878 [[nodiscard]] friend constexpr simd normal_2D(simd const& rhs) noexcept
879 requires(N >= 2)
880 {
881 return normalize<0b0011>(cross_2D(rhs));
882 }
883
887 [[nodiscard]] friend constexpr float cross_2D(simd const& lhs, simd const& rhs) noexcept
888 requires(N >= 2)
889 {
890 hilet tmp1 = rhs.yxwz();
891 hilet tmp2 = lhs * tmp1;
892 hilet tmp3 = hsub(tmp2, tmp2);
893 return get<0>(tmp3);
894 }
895
896 // x=a.y*b.z - a.z*b.y
897 // y=a.z*b.x - a.x*b.z
898 // z=a.x*b.y - a.y*b.x
899 // w=a.w*b.w - a.w*b.w
900 [[nodiscard]] constexpr friend simd cross_3D(simd const& lhs, simd const& rhs) noexcept
901 requires(N == 4)
902 {
903 hilet a_left = lhs.yzxw();
904 hilet b_left = rhs.zxyw();
905 hilet left = a_left * b_left;
906
907 hilet a_right = lhs.zxyw();
908 hilet b_right = rhs.yzxw();
909 hilet right = a_right * b_right;
910 return left - right;
911 }
912
913 [[nodiscard]] static constexpr simd byte_srl_shuffle_indices(unsigned int rhs)
914 requires(std::is_same_v<value_type, int8_t> and size == 16)
915 {
916 static_assert(std::endian::native == std::endian::little);
917
918 auto r = simd{};
919 for (auto i = 0; i != 16; ++i) {
920 if ((i + rhs) < 16) {
921 r[i] = narrow_cast<int8_t>(i + rhs);
922 } else {
923 // Indices set to -1 result in a zero after a byte shuffle.
924 r[i] = -1;
925 }
926 }
927 return r;
928 }
929
930 [[nodiscard]] static constexpr simd byte_sll_shuffle_indices(unsigned int rhs)
931 requires(std::is_same_v<value_type, int8_t> and size == 16)
932 {
933 static_assert(std::endian::native == std::endian::little);
934
935 auto r = simd{};
936 for (auto i = 0; i != 16; ++i) {
937 if ((i - rhs) >= 0) {
938 r[i] = narrow_cast<int8_t>(i - rhs);
939 } else {
940 // Indices set to -1 result in a zero after a byte shuffle.
941 r[i] = -1;
942 }
943 }
944 return r;
945 }
946
949 [[nodiscard]] friend constexpr simd permute(simd const& lhs, simd const& rhs) noexcept
950 requires(std::is_integral_v<value_type>)
951 {
952 HI_X_runtime_evaluate_if_valid(simd{permute(lhs.reg(), rhs.reg())});
953
954 auto r = simd{};
955 for (std::size_t i = 0; i != N; ++i) {
956 if (rhs[i] >= 0) {
957 r[i] = lhs[rhs[i] & 0xf];
958 } else {
959 r[i] = 0;
960 }
961 }
962
963 return r;
964 }
965
968 [[nodiscard]] friend constexpr simd midpoint(simd const& p1, simd const& p2) noexcept
969 {
970 return (p1 + p2) * 0.5f;
971 }
972
975 [[nodiscard]] friend constexpr simd reflect_point(simd const& p, simd const anchor) noexcept
976 {
977 return anchor - (p - anchor);
978 }
979
980 hi_warning_push();
981 // C26494 Variable '...' is uninitialized. Always initialize an object (type.5).
982 // Internal to _MM_TRANSPOSE4_PS
983 hi_warning_ignore_msvc(26494);
984 template<typename... Columns>
985 [[nodiscard]] friend constexpr std::array<simd, size> transpose(Columns const&...columns) noexcept
986 {
987 static_assert(sizeof...(Columns) == size, "Can only transpose square matrices");
988
989 if (not std::is_constant_evaluated()) {
990 if constexpr (requires { transpose(columns.reg()...); }) {
991 hilet tmp = transpose(columns.reg()...);
992 auto r = std::array<simd, size>{};
993 for (auto i = 0_uz; i != size; ++i) {
994 r[i] = simd{tmp[i]};
995 }
996 return r;
997 }
998 }
999
1000 auto r = std::array<simd, N>{};
1001 auto f = [&r, &columns... ]<std::size_t... Ints>(std::index_sequence<Ints...>)
1002 {
1003 auto tf = [&r](auto i, auto v) {
1004 for (std::size_t j = 0; j != N; ++j) {
1005 r[j][i] = v[j];
1006 }
1007 return 0;
1008 };
1009 static_cast<void>((tf(Ints, columns) + ...));
1010 };
1011 f(std::make_index_sequence<sizeof...(columns)>{});
1012 return r;
1013 }
1014 hi_warning_pop();
1015
1016 [[nodiscard]] constexpr friend simd composit(simd const& under, simd const& over) noexcept
1017 requires(N == 4 && std::is_floating_point_v<T>)
1018 {
1019 if (get<3>(over) <= value_type{0}) {
1020 // fully transparent.
1021 return under;
1022 }
1023 if (get<3>(over) >= value_type{1}) {
1024 // fully opaque;
1025 return over;
1026 }
1027
1028 hilet over_alpha = over.wwww();
1029 hilet under_alpha = under.wwww();
1030
1031 hilet over_color = over.xyz1();
1032 hilet under_color = under.xyz1();
1033
1034 hilet output_color = over_color * over_alpha + under_color * under_alpha * (T{1} - over_alpha);
1035
1036 return output_color / output_color.www1();
1037 }
1038
1039 [[nodiscard]] constexpr friend simd composit(simd const& under, simd const& over) noexcept
1040 requires(std::is_same_v<value_type, float16> and size == 4)
1041 {
1042 return simd{composit(static_cast<simd<float, 4>>(under), static_cast<simd<float, 4>>(over))};
1043 }
1044
1045 [[nodiscard]] friend std::string to_string(simd const& rhs) noexcept
1046 {
1047 auto r = std::string{};
1048
1049 r += '(';
1050 for (std::size_t i = 0; i != N; ++i) {
1051 if (i != 0) {
1052 r += "; ";
1053 }
1054 r += std::format("{}", rhs[i]);
1055 }
1056 r += ')';
1057 return r;
1058 }
1059
1060 friend std::ostream& operator<<(std::ostream& lhs, simd const& rhs)
1061 {
1062 return lhs << to_string(rhs);
1063 }
1064
1069 template<std::size_t FromElement, std::size_t ToElement>
1070 [[nodiscard]] constexpr friend simd insert(simd const& lhs, simd const& rhs)
1071 {
1072 HI_X_runtime_evaluate_if_valid(simd{insert<FromElement, ToElement>(lhs.reg(), rhs.reg())});
1073
1074 auto r = simd{};
1075 for (std::size_t i = 0; i != N; ++i) {
1076 r[i] = (i == ToElement) ? rhs[FromElement] : lhs[i];
1077 }
1078
1079 return r;
1080 }
1081
1089 template<fixed_string Order>
1090 [[nodiscard]] constexpr simd swizzle() const
1091 {
1092 static_assert(Order.size() <= N);
1093
1094 HI_X_runtime_evaluate_if_valid(simd{reg().swizzle<Order>()});
1095
1096 auto r = simd{};
1097 swizzle_detail<0, Order>(r);
1098 return r;
1099 }
1100
1101#define SWIZZLE(name, str) \
1102 [[nodiscard]] constexpr simd name() const noexcept \
1103 requires(sizeof(str) - 1 <= N) \
1104 { \
1105 return swizzle<str>(); \
1106 }
1107
1108#define SWIZZLE_4D(name, str) \
1109 SWIZZLE(name##0, str "0") \
1110 SWIZZLE(name##1, str "1") \
1111 SWIZZLE(name##x, str "a") \
1112 SWIZZLE(name##y, str "b") \
1113 SWIZZLE(name##z, str "c") \
1114 SWIZZLE(name##w, str "d")
1115
1116#define SWIZZLE_3D(name, str) \
1117 SWIZZLE_4D(name##0, str "0") \
1118 SWIZZLE_4D(name##1, str "1") \
1119 SWIZZLE_4D(name##x, str "a") \
1120 SWIZZLE_4D(name##y, str "b") \
1121 SWIZZLE_4D(name##z, str "c") \
1122 SWIZZLE_4D(name##w, str "d") \
1123 SWIZZLE(name##0, str "0") \
1124 SWIZZLE(name##1, str "1") \
1125 SWIZZLE(name##x, str "a") \
1126 SWIZZLE(name##y, str "b") \
1127 SWIZZLE(name##z, str "c") \
1128 SWIZZLE(name##w, str "d")
1129
1130#define SWIZZLE_2D(name, str) \
1131 SWIZZLE_3D(name##0, str "0") \
1132 SWIZZLE_3D(name##1, str "1") \
1133 SWIZZLE_3D(name##x, str "a") \
1134 SWIZZLE_3D(name##y, str "b") \
1135 SWIZZLE_3D(name##z, str "c") \
1136 SWIZZLE_3D(name##w, str "d") \
1137 SWIZZLE(name##0, str "0") \
1138 SWIZZLE(name##1, str "1") \
1139 SWIZZLE(name##x, str "a") \
1140 SWIZZLE(name##y, str "b") \
1141 SWIZZLE(name##z, str "c") \
1142 SWIZZLE(name##w, str "d")
1143
1144 SWIZZLE_2D(_0, "0")
1145 SWIZZLE_2D(_1, "1")
1146 SWIZZLE_2D(x, "a")
1147 SWIZZLE_2D(y, "b")
1148 SWIZZLE_2D(z, "c")
1149 SWIZZLE_2D(w, "d")
1150
1151#undef SWIZZLE
1152#undef SWIZZLE_2D
1153#undef SWIZZLE_3D
1154#undef SWIZZLE_4D
1155
1156 template<size_t I, fixed_string Order>
1157 constexpr void swizzle_detail(simd& r) const noexcept
1158 {
1159 static_assert(I < size);
1160
1161 // Get the source element, or '0'.
1162 constexpr char c = I < Order.size() ? get<I>(Order) : '0';
1163
1164 if constexpr (c == '1') {
1165 r = insert<I>(r, value_type{1});
1166
1167 } else if constexpr (c == '0') {
1168 r = insert<I>(r, value_type{0});
1169
1170 } else if constexpr (c >= 'a' and c <= 'v') {
1171 constexpr size_t src_index = c - 'a';
1172 static_assert(src_index < size);
1173
1174 r = insert<I>(r, get<src_index>(*this));
1175
1176 } else if constexpr (c >= 'w' and c <= 'z') {
1177 constexpr size_t src_index = c == 'x' ? 0 : c == 'y' ? 1 : c == 'z' ? 2 : 3;
1178 static_assert(src_index < size);
1179
1180 r = insert<I>(r, get<src_index>(*this));
1181
1182 } else {
1184 }
1185
1186 if constexpr (I + 1 < size) {
1187 swizzle_detail<I + 1, Order>(r);
1188 }
1189 }
1190};
1191
1192using i8x1 = simd<int8_t, 1>;
1193using i8x2 = simd<int8_t, 2>;
1194using i8x4 = simd<int8_t, 4>;
1195using i8x8 = simd<int8_t, 8>;
1196using i8x16 = simd<int8_t, 16>;
1197using i8x32 = simd<int8_t, 32>;
1198using i8x64 = simd<int8_t, 64>;
1199
1200using u8x1 = simd<uint8_t, 1>;
1201using u8x2 = simd<uint8_t, 2>;
1202using u8x4 = simd<uint8_t, 4>;
1203using u8x8 = simd<uint8_t, 8>;
1204using u8x16 = simd<uint8_t, 16>;
1205using u8x32 = simd<uint8_t, 32>;
1206using u8x64 = simd<uint8_t, 64>;
1207
1208using i16x1 = simd<int16_t, 1>;
1209using i16x2 = simd<int16_t, 2>;
1210using i16x4 = simd<int16_t, 4>;
1211using i16x8 = simd<int16_t, 8>;
1212using i16x16 = simd<int16_t, 16>;
1213using i16x32 = simd<int16_t, 32>;
1214
1215using u16x1 = simd<uint16_t, 1>;
1216using u16x2 = simd<uint16_t, 2>;
1217using u16x4 = simd<uint16_t, 4>;
1218using u16x8 = simd<uint16_t, 8>;
1219using u16x16 = simd<uint16_t, 16>;
1220using u16x32 = simd<uint16_t, 32>;
1221
1222using f16x4 = simd<float16, 4>;
1223
1224using i32x1 = simd<int32_t, 1>;
1225using i32x2 = simd<int32_t, 2>;
1226using i32x4 = simd<int32_t, 4>;
1227using i32x8 = simd<int32_t, 8>;
1228using i32x16 = simd<int32_t, 16>;
1229
1230using u32x1 = simd<uint32_t, 1>;
1231using u32x2 = simd<uint32_t, 2>;
1232using u32x4 = simd<uint32_t, 4>;
1233using u32x8 = simd<uint32_t, 8>;
1234using u32x16 = simd<uint32_t, 16>;
1235
1236using f32x1 = simd<float, 1>;
1237using f32x2 = simd<float, 2>;
1238using f32x4 = simd<float, 4>;
1239using f32x8 = simd<float, 8>;
1240using f32x16 = simd<float, 16>;
1241
1242using i64x1 = simd<int64_t, 1>;
1243using i64x2 = simd<int64_t, 2>;
1244using i64x4 = simd<int64_t, 4>;
1245using i64x8 = simd<int64_t, 8>;
1246
1247using u64x1 = simd<uint64_t, 1>;
1248using u64x2 = simd<uint64_t, 2>;
1249using u64x4 = simd<uint64_t, 4>;
1250using u64x8 = simd<uint64_t, 8>;
1251
1252using f64x1 = simd<double, 1>;
1253using f64x2 = simd<double, 2>;
1254using f64x4 = simd<double, 4>;
1255using f64x8 = simd<double, 8>;
1256
1257} // namespace hi::inline v1
1258
1259template<class T, std::size_t N>
1260struct std::tuple_size<hi::simd<T, N>> : std::integral_constant<std::size_t, N> {};
1261
1262template<std::size_t I, class T, std::size_t N>
1263struct std::tuple_element<I, hi::simd<T, N>> {
1264 using type = T;
1265};
1266
1267template<typename T, size_t N>
1268struct std::equal_to<::hi::simd<T, N>> {
1269 constexpr bool operator()(::hi::simd<T, N> const& lhs, ::hi::simd<T, N> const& rhs) const noexcept
1270 {
1271 return equal(lhs, rhs);
1272 }
1273};
1274
1275// Add equality operator to Google-test internal namespace so that ASSERT_EQ() work.
1276template<typename T, size_t N>
1277inline bool operator==(::hi::simd<T, N> lhs, ::hi::simd<T, N> rhs) noexcept
1278{
1279 return std::equal_to{}(lhs, rhs);
1280}
1281
1282// Add equality operator to Google-test internal namespace so that ASSERT_NE() work.
1283template<typename T, size_t N>
1284inline bool operator!=(::hi::simd<T, N> lhs, ::hi::simd<T, N> rhs) noexcept
1285{
1286 return not std::equal_to<::hi::simd<T, N>>{}(lhs, rhs);
1287}
1288
1289#undef HI_X_accessor
1290#undef HI_X_binary_cmp_op
1291#undef HI_X_binary_math_op
1292#undef HI_X_binary_bit_op
1293#undef HI_X_binary_shift_op
1294#undef HI_X_binary_op_broadcast
1295#undef HI_X_inplace_op
1296#undef HI_X_runtime_evaluate_if_valid
1297
1298hi_warning_pop();
#define hi_static_no_default(...)
This part of the code should not be reachable, unless a programming bug.
Definition assert.hpp:308
#define hi_axiom(expression,...)
Specify an axiom; an expression that is true.
Definition assert.hpp:238
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
@ other
The gui_event does not have associated data.
STL namespace.
DOXYGEN BUG.
Definition algorithm.hpp:13
void composit(pixmap_span< sfloat_rgba16 > dst, hi::color color, graphic_path const &mask) noexcept
Composit color onto the destination image where the mask is solid.
geometry/margins.hpp
Definition cache.hpp:11
T begin(T... args)
T ceil(T... args)
T end(T... args)
T equal(T... args)
T floor(T... args)
T hypot(T... args)
T left(T... args)
T max(T... args)
T memcpy(T... args)
T min(T... args)
T operator!=(T... args)
T remainder(T... args)
T round(T... args)
T sqrt(T... args)
T to_string(T... args)