HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_f64x4_avx.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/utility.hpp"
9#include "../macros.hpp"
10#include <span>
11#include <array>
12#include <ostream>
13
14
15
16namespace hi { inline namespace v1 {
17
18#ifdef HI_HAS_AVX
19
36template<>
37struct native_simd<double,4> {
38 using value_type = double;
39 constexpr static size_t size = 4;
40 using array_type = std::array<value_type, size>;
41 using register_type = __m256d;
42
44
45 native_simd(native_simd const&) noexcept = default;
46 native_simd(native_simd&&) noexcept = default;
47 native_simd& operator=(native_simd const&) noexcept = default;
48 native_simd& operator=(native_simd&&) noexcept = default;
49
52 native_simd() noexcept : v(_mm256_setzero_pd()) {}
53
54 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
55
56 [[nodiscard]] explicit operator register_type() const noexcept
57 {
58 return v;
59 }
60
68 [[nodiscard]] native_simd(
69 value_type a,
70 value_type b = value_type{0},
71 value_type c = value_type{0},
72 value_type d = value_type{0}) noexcept :
73 v(_mm256_set_pd(d, c, b, a))
74 {
75 }
76
77 [[nodiscard]] explicit native_simd(value_type const *other) noexcept : v(_mm256_loadu_pd(other)) {}
78
79 void store(value_type *out) const noexcept
80 {
81 hi_axiom_not_null(out);
82 _mm256_storeu_pd(out, v);
83 }
84
85 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm256_loadu_pd(static_cast<value_type const *>(other))) {}
86
87 void store(void *out) const noexcept
88 {
89 hi_axiom_not_null(out);
90 _mm256_storeu_pd(static_cast<value_type *>(out), v);
91 }
92
93 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
94 {
95 hi_axiom(other.size() >= size);
96 v = _mm256_loadu_pd(other.data());
97 }
98
99 void store(std::span<value_type> out) const noexcept
100 {
101 hi_axiom(out.size() >= size);
102 _mm256_storeu_pd(out.data(), v);
103 }
104
105 [[nodiscard]] explicit native_simd(array_type other) noexcept : v(_mm256_loadu_pd(other.data())) {}
106
107 [[nodiscard]] explicit operator array_type() const noexcept
108 {
109 auto r = array_type{};
110 _mm256_storeu_pd(r.data(), v);
111 return r;
112 }
113
114 [[nodiscard]] explicit native_simd(native_simd<float, 4> const& a) noexcept;
115 [[nodiscard]] explicit native_simd(native_simd<int32_t, 4> const& a) noexcept;
116 //[[nodiscard]] explicit native_simd(native_f64x2 const &a, native_f64x2 const &b) noexcept;
117
127 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
128 {
129 return native_simd{_mm256_set1_pd(a)};
130 }
131
141 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
142 {
143#ifdef HI_HAS_AVX2
144 return native_simd{_mm256_permute4x64_pd(a.v, 0b00'00'00'00)};
145#else
146 hilet tmp = _mm256_permute_pd(a.v, 0b0000);
147 return native_simd{_mm256_permute2f128_pd(tmp, tmp, 0b0000'0000)};
148#endif
149 }
150
153 [[nodiscard]] static native_simd ones() noexcept
154 {
155#ifdef HI_HAS_AVX2
158 return native_simd{_mm256_castsi256_pd(ones)};
159#else
160 auto ones = _mm256_setzero_pd();
162 return native_simd{ones};
163#endif
164 }
165
166 [[nodiscard]] static native_simd from_mask(size_t a) noexcept
167 {
168 hi_axiom(a <= 0b1111);
169
170 uint64_t a_ = a;
171
172 a_ <<= 31;
174 a_ >>= 1;
176 a_ >>= 1;
178 a_ >>= 1;
180
181 tmp = _mm_srai_epi32(tmp, 31);
182 return native_simd{_mm256_castsi256_pd(_mm256_cvtepi32_epi64(tmp))};
183 }
184
187 [[nodiscard]] size_t mask() const noexcept
188 {
190 }
191
198 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
199 {
200 return _mm256_movemask_pd(_mm256_cmp_pd(a.v, b.v, _CMP_EQ_UQ)) == 0b1111;
201 }
202
203 [[nodiscard]] friend native_simd
204 almost_eq(native_simd a, native_simd b, value_type epsilon = std::numeric_limits<value_type>::epsilon()) noexcept
205 {
206 auto abs_diff = abs(a - b);
207 return abs_diff < broadcast(epsilon);
208 }
209
210 [[nodiscard]] friend bool
211 almost_equal(native_simd a, native_simd b, value_type epsilon = std::numeric_limits<value_type>::epsilon())
212 {
213 return almost_eq(a, b, epsilon).mask() == 0b1111;
214 }
215
216 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
217 {
218 return native_simd{_mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ)};
219 }
220
221 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
222 {
223 return native_simd{_mm256_cmp_pd(a.v, b.v, _CMP_NEQ_UQ)};
224 }
225
226 [[nodiscard]] friend native_simd operator<(native_simd a, native_simd b) noexcept
227 {
228 return native_simd{_mm256_cmp_pd(a.v, b.v, _CMP_LT_OQ)};
229 }
230
231 [[nodiscard]] friend native_simd operator>(native_simd a, native_simd b) noexcept
232 {
233 return native_simd{_mm256_cmp_pd(a.v, b.v, _CMP_GT_OQ)};
234 }
235
236 [[nodiscard]] friend native_simd operator<=(native_simd a, native_simd b) noexcept
237 {
238 return native_simd{_mm256_cmp_pd(a.v, b.v, _CMP_LE_OQ)};
239 }
240
241 [[nodiscard]] friend native_simd operator>=(native_simd a, native_simd b) noexcept
242 {
243 return native_simd{_mm256_cmp_pd(a.v, b.v, _CMP_GE_OQ)};
244 }
245
246 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
247 {
248 return a;
249 }
250
251 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
252 {
253 return native_simd{_mm256_add_pd(a.v, b.v)};
254 }
255
256 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
257 {
258 return native_simd{_mm256_sub_pd(a.v, b.v)};
259 }
260
261 [[nodiscard]] friend native_simd operator-(native_simd a) noexcept
262 {
263 return native_simd{} - a;
264 }
265
266 [[nodiscard]] friend native_simd operator*(native_simd a, native_simd b) noexcept
267 {
268 return native_simd{_mm256_mul_pd(a.v, b.v)};
269 }
270
271 [[nodiscard]] friend native_simd operator/(native_simd a, native_simd b) noexcept
272 {
273 return native_simd{_mm256_div_pd(a.v, b.v)};
274 }
275
276 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
277 {
278 return native_simd{_mm256_and_pd(a.v, b.v)};
279 }
280
281 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
282 {
283 return native_simd{_mm256_or_pd(a.v, b.v)};
284 }
285
286 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
287 {
288 return native_simd{_mm256_xor_pd(a.v, b.v)};
289 }
290
291 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
292 {
293 return not_and(a, ones());
294 }
295
296 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
297 {
298 return native_simd{_mm256_min_pd(a.v, b.v)};
299 }
300
301 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
302 {
303 return native_simd{_mm256_max_pd(a.v, b.v)};
304 }
305
306 [[nodiscard]] friend native_simd abs(native_simd a) noexcept
307 {
308 return not_and(broadcast(-0.0f), a);
309 }
310
311 [[nodiscard]] friend native_simd floor(native_simd a) noexcept
312 {
313 return native_simd{_mm256_floor_pd(a.v)};
314 }
315
316 [[nodiscard]] friend native_simd ceil(native_simd a) noexcept
317 {
318 return native_simd{_mm256_ceil_pd(a.v)};
319 }
320
321 template<native_rounding_mode Rounding = native_rounding_mode::current>
322 [[nodiscard]] friend native_simd round(native_simd a) noexcept
323 {
324 return native_simd{_mm256_round_pd(a.v, std::to_underlying(Rounding))};
325 }
326
329 [[nodiscard]] friend native_simd rcp(native_simd a) noexcept
330 {
331 return native_simd{_mm256_div_pd(_mm256_set_pd(1.0, 1.0, 1.0, 1.0), a.v)};
332 }
333
336 [[nodiscard]] friend native_simd sqrt(native_simd a) noexcept
337 {
338 return native_simd{_mm256_sqrt_pd(a.v)};
339 }
340
347 [[nodiscard]] friend native_simd rsqrt(native_simd a) noexcept
348 {
349 return rcp(sqrt(a));
350 }
351
358 template<size_t Mask>
359 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
360 {
361 static_assert(Mask <= 0b1111);
362 return blend<Mask>(a, native_simd{});
363 }
364
372 template<size_t Index>
373 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
374 {
375 static_assert(Index < 4);
376 return blend<1_uz << Index>(a, broadcast(b));
377 }
378
385 template<size_t Index>
386 [[nodiscard]] friend value_type get(native_simd a) noexcept
387 {
388 static_assert(Index < size);
389
390#ifdef HI_HAS_AVX2
392
393#else
394 constexpr auto hi_index = Index / (size / 2);
395 constexpr auto lo_index = Index % (size / 2);
396
397 hilet hi = _mm256_extractf128_pd(a.v, hi_index);
398 hilet lo = _mm_permute_pd(hi, lo_index);
399 return _mm_cvtsd_f64(lo);
400#endif
401 }
402
411 template<size_t Mask>
412 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
413 {
414 static_assert(Mask <= 0b1111);
415
416 if constexpr (Mask == 0b0000) {
417 return a;
418 } else if constexpr (Mask == 0b1111) {
419 return b;
420 } else {
421 return native_simd{_mm256_blend_pd(a.v, b.v, Mask)};
422 }
423 }
424
437 template<fixed_string SourceElements>
438 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
439 {
440 static_assert(SourceElements.size() == size);
441 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
442
443#if HI_HAS_AVX2
444 if constexpr (order == 0b11'10'01'00) {
445 return a;
446 } else {
447 return native_simd{_mm256_permute4x64_pd(a.v, order)};
448 }
449
450#else
451 // clang-format off
452 constexpr auto hi_order =
453 ((order & 0b00'00'00'10) >> 1) |
454 ((order & 0b00'00'10'00) >> 2) |
455 ((order & 0b00'10'00'00) >> 3) |
456 ((order & 0b10'00'00'00) >> 4);
457 constexpr auto lo_order =
458 (order & 0b00'00'00'01) |
459 ((order & 0b00'00'01'00) >> 1) |
460 ((order & 0b00'01'00'00) >> 2) |
461 ((order & 0b01'00'00'00) >> 3);
462 // clang-format on
463
464 if constexpr (order == 0b11'10'01'00) {
465 return a;
466 } else if constexpr (order == 0b00'00'00'00) {
467 return broadcast(a);
468 } else if constexpr (hi_order == 0b1100) {
469 return native_simd{_mm256_permute_pd(a.v, lo_order)};
470 } else if constexpr (hi_order == 0b0011) {
471 hilet tmp = _mm256_permute2f128_pd(a.v, a.v, 0b0000'0001);
472 return native_simd{_mm256_permute_pd(tmp, lo_order)};
473 } else if constexpr (hi_order == 0b1111) {
474 hilet tmp = _mm256_permute2f128_pd(a.v, a.v, 0b0001'0001);
475 return native_simd{_mm256_permute_pd(tmp, lo_order)};
476 } else if constexpr (hi_order == 0b0000) {
477 hilet tmp = _mm256_permute2f128_pd(a.v, a.v, 0b0000'0000);
478 return native_simd{_mm256_permute_pd(tmp, lo_order)};
479 } else {
480 hilet hi_0 = _mm256_permute2f128_pd(a.v, a.v, 0b0000'0000);
481 hilet hi_1 = _mm256_permute2f128_pd(a.v, a.v, 0b0001'0001);
484 return native_simd{_mm256_blend_pd(lo_0, lo_1, hi_order)};
485 }
486#endif
487 }
488
505 template<fixed_string SourceElements>
506 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
507 {
508 static_assert(SourceElements.size() == size);
509 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
510 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
511 constexpr auto number_mask = one_mask | zero_mask;
512
513 if constexpr (number_mask == 0b1111) {
514 // Swizzle was /[01][01][01][01]/.
516
517 } else if constexpr (number_mask == 0b0000) {
518 // Swizzle was /[^01][^01][^01][^01]/.
519 return permute<SourceElements>(a);
520
521#ifdef HI_HAS_SSE4_1
522 } else if constexpr (number_mask == zero_mask) {
523 // Swizzle was /[^1][^1][^1][^1]/.
526#endif
527
528 } else {
532 }
533 }
534
545 [[nodiscard]] friend native_simd horizontal_add(native_simd a, native_simd b) noexcept
546 {
547 return permute<"acbd">(native_simd{_mm256_hadd_pd(a.v, b.v)});
548 }
549
560 [[nodiscard]] friend native_simd horizontal_sub(native_simd a, native_simd b) noexcept
561 {
562 return permute<"acbd">(native_simd{_mm256_hsub_pd(a.v, b.v)});
563 }
564
571 [[nodiscard]] friend native_simd horizontal_sum(native_simd a) noexcept
572 {
573 hilet tmp = horizontal_add(a, a);
574 return native_simd{_mm256_hadd_pd(tmp.v, tmp.v)};
575 }
576
588 [[nodiscard]] friend native_simd interleaved_sub_add(native_simd a, native_simd b) noexcept
589 {
590 return native_simd{_mm256_addsub_pd(a.v, b.v)};
591 }
592
598 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
599 {
600 return native_simd{_mm256_andnot_pd(a.v, b.v)};
601 }
602
603 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
604 {
605 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
606 }
607
608 template<fixed_string SourceElements>
609 [[nodiscard]] static native_simd swizzle_numbers() noexcept
610 {
611 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
612 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
613 constexpr auto number_mask = one_mask | zero_mask;
614 constexpr auto alpha_mask = ~number_mask & 0b1111;
615
616 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
617 return {};
618
619 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
620 return broadcast(1.0f);
621
622 } else {
623 return native_simd{
624 to_bool(one_mask & 0b0001) ? 1.0f : 0.0f,
625 to_bool(one_mask & 0b0010) ? 1.0f : 0.0f,
626 to_bool(one_mask & 0b0100) ? 1.0f : 0.0f,
627 to_bool(one_mask & 0b1000) ? 1.0f : 0.0f};
628 }
629 }
630};
631
632#endif
633
634}} // namespace hi::v1
@ round
The end cap of the line is round.
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T ceil(T... args)
T equal(T... args)
T floor(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)
T sqrt(T... args)