HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
native_f32x4_sse.hpp
1// Copyright Take Vos 2022, 2023.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "native_simd_utility.hpp"
8#include "../utility/utility.hpp"
9#include "../macros.hpp"
10#include <span>
11#include <array>
12#include <ostream>
13
14
15
16namespace hi { inline namespace v1 {
17
18#ifdef HI_HAS_SSE
19
36template<>
37struct native_simd<float, 4> {
38 using value_type = float;
39 constexpr static size_t size = 4;
40
41 using array_type = std::array<value_type, size>;
42 using register_type = __m128;
43
45
46 native_simd(native_simd const&) noexcept = default;
47 native_simd(native_simd&&) noexcept = default;
48 native_simd& operator=(native_simd const&) noexcept = default;
49 native_simd& operator=(native_simd&&) noexcept = default;
50
53 native_simd() noexcept : v(_mm_setzero_ps()) {}
54
55 [[nodiscard]] explicit native_simd(register_type other) noexcept : v(other) {}
56
57 [[nodiscard]] explicit operator register_type() const noexcept
58 {
59 return v;
60 }
61
66 [[nodiscard]] explicit native_simd(value_type a) noexcept : v(_mm_set_ss(a)) {}
67
75 [[nodiscard]] native_simd(value_type a, value_type b, value_type c = value_type{0}, value_type d = value_type{0}) noexcept :
76 v(_mm_set_ps(d, c, b, a))
77 {
78 }
79
80 [[nodiscard]] explicit native_simd(value_type const *other) noexcept : v(_mm_loadu_ps(other)) {}
81
82 void store(value_type *out) const noexcept
83 {
84 hi_axiom_not_null(out);
85 _mm_storeu_ps(out, v);
86 }
87
88 [[nodiscard]] explicit native_simd(void const *other) noexcept : v(_mm_loadu_ps(static_cast<value_type const *>(other))) {}
89
90 void store(void *out) const noexcept
91 {
92 hi_axiom_not_null(out);
93 _mm_storeu_ps(static_cast<value_type *>(out), v);
94 }
95
96 [[nodiscard]] explicit native_simd(std::span<value_type const> other) noexcept
97 {
98 hi_axiom(other.size() >= size);
99 v = _mm_loadu_ps(other.data());
100 }
101
102 void store(std::span<value_type> out) const noexcept
103 {
104 hi_axiom(out.size() >= size);
105 _mm_storeu_ps(out.data(), v);
106 }
107
108 [[nodiscard]] explicit native_simd(array_type other) noexcept : v(_mm_loadu_ps(other.data())) {}
109
110 [[nodiscard]] explicit operator array_type() const noexcept
111 {
112 auto r = array_type{};
113 _mm_storeu_ps(r.data(), v);
114 return r;
115 }
116
117#ifdef HI_HAS_SSE2
118 [[nodiscard]] explicit native_simd(native_simd<int32_t, 4> const& a) noexcept;
119#endif
120#ifdef HI_HAS_AVX
121 [[nodiscard]] explicit native_simd(native_simd<double, 4> const& a) noexcept;
122#endif
123
133 [[nodiscard]] static native_simd broadcast(value_type a) noexcept
134 {
135 return native_simd{_mm_set1_ps(a)};
136 }
137
147 [[nodiscard]] static native_simd broadcast(native_simd a) noexcept
148 {
149#ifdef HI_HAS_AVX2
150 return native_simd{_mm_broadcastss_ps(a.v)};
151#else
152 return native_simd{_mm_shuffle_ps(a.v, a.v, 0b00'00'00'00)};
153#endif
154 }
155
158 [[nodiscard]] static native_simd from_mask(size_t a) noexcept
159 {
160 hi_axiom(a <= 0b1111);
161
162 uint64_t a_ = a;
163
164 a_ <<= 31;
166 a_ >>= 1;
168 a_ >>= 1;
170 a_ >>= 1;
172
173 tmp = _mm_srai_epi32(tmp, 31);
174 return native_simd{_mm_castsi128_ps(tmp)};
175 }
176
179 [[nodiscard]] static native_simd ones() noexcept
180 {
181#ifdef HI_HAS_SSE2
182 auto ones = _mm_undefined_si128();
184 return native_simd{_mm_castsi128_ps(ones)};
185#else
186 auto ones = _mm_setzero_ps();
188 return native_simd{ones};
189#endif
190 }
191
194 [[nodiscard]] size_t mask() const noexcept
195 {
197 }
198
205 [[nodiscard]] friend bool equal(native_simd a, native_simd b) noexcept
206 {
207#ifdef HI_HAS_SSE2
208 return _mm_movemask_epi8(_mm_cmpeq_epi32(_mm_castps_si128(a.v), _mm_castps_si128(b.v))) == 0b1111'1111'1111'1111;
209#else
210 return static_cast<array_type>(a) == static_cast<array_type>(b);
211#endif
212 }
213
214 [[nodiscard]] friend native_simd
215 almost_eq(native_simd a, native_simd b, value_type epsilon = std::numeric_limits<value_type>::epsilon()) noexcept
216 {
217 hilet abs_diff = abs(a - b);
218 return abs_diff < broadcast(epsilon);
219 }
220
221 [[nodiscard]] friend bool
222 almost_equal(native_simd a, native_simd b, value_type epsilon = std::numeric_limits<value_type>::epsilon())
223 {
224 return almost_eq(a, b, epsilon).mask() == 0b1111;
225 }
226
227 [[nodiscard]] friend native_simd operator==(native_simd a, native_simd b) noexcept
228 {
229 return native_simd{_mm_cmpeq_ps(a.v, b.v)};
230 }
231
232 [[nodiscard]] friend native_simd operator!=(native_simd a, native_simd b) noexcept
233 {
234 return native_simd{_mm_cmpneq_ps(a.v, b.v)};
235 }
236
237 [[nodiscard]] friend native_simd operator<(native_simd a, native_simd b) noexcept
238 {
239 return native_simd{_mm_cmplt_ps(a.v, b.v)};
240 }
241
242 [[nodiscard]] friend native_simd operator>(native_simd a, native_simd b) noexcept
243 {
244 return native_simd{_mm_cmpgt_ps(a.v, b.v)};
245 }
246
247 [[nodiscard]] friend native_simd operator<=(native_simd a, native_simd b) noexcept
248 {
249 return native_simd{_mm_cmple_ps(a.v, b.v)};
250 }
251
252 [[nodiscard]] friend native_simd operator>=(native_simd a, native_simd b) noexcept
253 {
254 return native_simd{_mm_cmpge_ps(a.v, b.v)};
255 }
256
257 [[nodiscard]] friend native_simd operator+(native_simd a) noexcept
258 {
259 return a;
260 }
261
262 [[nodiscard]] friend native_simd operator+(native_simd a, native_simd b) noexcept
263 {
264 return native_simd{_mm_add_ps(a.v, b.v)};
265 }
266
267 [[nodiscard]] friend native_simd operator-(native_simd a, native_simd b) noexcept
268 {
269 return native_simd{_mm_sub_ps(a.v, b.v)};
270 }
271
272 [[nodiscard]] friend native_simd operator-(native_simd a) noexcept
273 {
274 return native_simd{} - a;
275 }
276
277 [[nodiscard]] friend native_simd operator*(native_simd a, native_simd b) noexcept
278 {
279 return native_simd{_mm_mul_ps(a.v, b.v)};
280 }
281
282 [[nodiscard]] friend native_simd operator/(native_simd a, native_simd b) noexcept
283 {
284 return native_simd{_mm_div_ps(a.v, b.v)};
285 }
286
287 [[nodiscard]] friend native_simd operator&(native_simd a, native_simd b) noexcept
288 {
289 return native_simd{_mm_and_ps(a.v, b.v)};
290 }
291
292 [[nodiscard]] friend native_simd operator|(native_simd a, native_simd b) noexcept
293 {
294 return native_simd{_mm_or_ps(a.v, b.v)};
295 }
296
297 [[nodiscard]] friend native_simd operator^(native_simd a, native_simd b) noexcept
298 {
299 return native_simd{_mm_xor_ps(a.v, b.v)};
300 }
301
302 [[nodiscard]] friend native_simd operator~(native_simd a) noexcept
303 {
304 return not_and(a, ones());
305 }
306
307 [[nodiscard]] friend native_simd min(native_simd a, native_simd b) noexcept
308 {
309 return native_simd{_mm_min_ps(a.v, b.v)};
310 }
311
312 [[nodiscard]] friend native_simd max(native_simd a, native_simd b) noexcept
313 {
314 return native_simd{_mm_max_ps(a.v, b.v)};
315 }
316
317 [[nodiscard]] friend native_simd abs(native_simd a) noexcept
318 {
319 return not_and(broadcast(-0.0f), a);
320 }
321
322#ifdef HI_HAS_SSE4_1
323 [[nodiscard]] friend native_simd floor(native_simd a) noexcept
324 {
325 return native_simd{_mm_floor_ps(a.v)};
326 }
327#endif
328
329#ifdef HI_HAS_SSE4_1
330 [[nodiscard]] friend native_simd ceil(native_simd a) noexcept
331 {
332 return native_simd{_mm_ceil_ps(a.v)};
333 }
334#endif
335
336#ifdef HI_HAS_SSE4_1
337 template<native_rounding_mode Rounding = native_rounding_mode::current>
338 [[nodiscard]] friend native_simd round(native_simd a) noexcept
339 {
340 return native_simd{_mm_round_ps(a.v, std::to_underlying(Rounding))};
341 }
342#endif
343
346 [[nodiscard]] friend native_simd rcp(native_simd a) noexcept
347 {
348 return native_simd{_mm_rcp_ps(a.v)};
349 }
350
353 [[nodiscard]] friend native_simd sqrt(native_simd a) noexcept
354 {
355 return native_simd{_mm_sqrt_ps(a.v)};
356 }
357
364 [[nodiscard]] friend native_simd rsqrt(native_simd a) noexcept
365 {
366 return native_simd{_mm_rsqrt_ps(a.v)};
367 }
368
375 template<size_t Mask>
376 [[nodiscard]] friend native_simd set_zero(native_simd a) noexcept
377 {
378 static_assert(Mask <= 0b1111);
379 if constexpr (Mask == 0b0000) {
380 return a;
381 } else if constexpr (Mask == 0b1111) {
382 return {};
383 } else {
384#ifdef HI_HAS_SSE4_1
385 return native_simd{_mm_insert_ps(a.v, a.v, Mask)};
386#else
387 hilet mask = from_mask(Mask);
388 return not_and(mask, a);
389#endif
390 }
391 }
392
400 template<size_t Index>
401 [[nodiscard]] friend native_simd insert(native_simd a, value_type b) noexcept
402 {
403 static_assert(Index < 4);
404
405#ifdef HI_HAS_SSE4_1
406 return native_simd{_mm_insert_ps(a.v, _mm_set1_ps(b), narrow_cast<int>(Index << 4))};
407#else
408 hilet mask = from_mask(1_uz << Index);
409 return not_and(mask, a) | (mask & broadcast(b));
410#endif
411 }
412
413 template<size_t SrcIndex, size_t DstIndex>
414 [[nodiscard]] friend native_simd insert(native_simd a, native_simd b) noexcept
415 {
416 static_assert(SrcIndex < size);
417 static_assert(DstIndex < size);
418#ifdef HI_HAS_SSE4_1
419 return native_simd{_mm_insert_ps(a.v, b.v, (SrcIndex << 6) | (DstIndex << 4))};
420#else
421 return insert<DstIndex>(a, get<SrcIndex>(b));
422#endif
423 }
424
431 template<size_t Index>
432 [[nodiscard]] friend value_type get(native_simd a) noexcept
433 {
434 static_assert(Index < size);
435
436 hilet tmp = _mm_shuffle_ps(a.v, a.v, Index);
437 return _mm_cvtss_f32(tmp);
438 }
439
448 template<size_t Mask>
449 [[nodiscard]] friend native_simd blend(native_simd a, native_simd b) noexcept
450 {
451 static_assert(Mask <= 0b1111);
452
453 if constexpr (Mask == 0b0000) {
454 return a;
455 } else if constexpr (Mask == 0b1111) {
456 return b;
457 } else {
458#ifdef HI_HAS_SSE4_1
459 return native_simd{_mm_blend_ps(a.v, b.v, Mask)};
460#else
461 hilet mask = from_mask(Mask);
462 return not_and(mask, a) | (mask & b);
463#endif
464 }
465 }
466
479 template<fixed_string SourceElements>
480 [[nodiscard]] friend native_simd permute(native_simd a) noexcept
481 {
482 static_assert(SourceElements.size() == size);
483 constexpr auto order = detail::native_swizzle_to_packed_indices<SourceElements, size>();
484
485 if constexpr (order == 0b11'10'01'00) {
486 return a;
487 } else if constexpr (order == 0b00'00'00'00) {
488 return broadcast(a);
489 } else {
490#ifdef HI_HAS_AVX
491 return native_simd{_mm_permute_ps(a.v, order)};
492#else
493 return native_simd{_mm_shuffle_ps(a.v, a.v, order)};
494#endif
495 }
496 }
497
498 [[nodiscard]] friend native_simd permute(native_simd a, native_simd<int32_t, 4> const& source_elements) noexcept;
499
516 template<fixed_string SourceElements>
517 [[nodiscard]] friend native_simd swizzle(native_simd a) noexcept
518 {
519 static_assert(SourceElements.size() == size);
520 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
521 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
522 constexpr auto number_mask = one_mask | zero_mask;
523
524 if constexpr (number_mask == 0b1111) {
525 // Swizzle was /[01][01][01][01]/.
527
528 } else if constexpr (number_mask == 0b0000) {
529 // Swizzle was /[^01][^01][^01][^01]/.
530 return permute<SourceElements>(a);
531
532#ifdef HI_HAS_SSE4_1
533 } else if constexpr (number_mask == zero_mask) {
534 // Swizzle was /[^1][^1][^1][^1]/.
537#endif
538
539 } else {
543 }
544 }
545
546#ifdef HI_HAS_SSE3
557 [[nodiscard]] friend native_simd horizontal_add(native_simd a, native_simd b) noexcept
558 {
559 return native_simd{_mm_hadd_ps(a.v, b.v)};
560 }
561#endif
562
563#ifdef HI_HAS_SSE3
574 [[nodiscard]] friend native_simd horizontal_sub(native_simd a, native_simd b) noexcept
575 {
576 return native_simd{_mm_hsub_ps(a.v, b.v)};
577 }
578#endif
579
586 [[nodiscard]] friend native_simd horizontal_sum(native_simd a) noexcept
587 {
588 hilet tmp = a + permute<"cdab">(a);
589 return tmp + permute<"badc">(tmp);
590 }
591
602 template<size_t SourceMask>
603 [[nodiscard]] friend native_simd dot_product(native_simd a, native_simd b) noexcept
604 {
605 static_assert(SourceMask <= 0b1111);
606#ifdef HI_HAS_SSE4_1
607 return native_simd{_mm_dp_ps(a.v, b.v, (SourceMask << 4) | 0b1111)};
608#else
610#endif
611 }
612
613#ifdef HI_HAS_SSE3
625 [[nodiscard]] friend native_simd interleaved_sub_add(native_simd a, native_simd b) noexcept
626 {
627 return native_simd{_mm_addsub_ps(a.v, b.v)};
628 }
629#endif
630
636 [[nodiscard]] friend native_simd not_and(native_simd a, native_simd b) noexcept
637 {
638 return native_simd{_mm_andnot_ps(a.v, b.v)};
639 }
640
641 [[nodiscard]] friend std::array<native_simd, 4> transpose(native_simd a, native_simd b, native_simd c, native_simd d) noexcept
642 {
643 _MM_TRANSPOSE4_PS(a.v, b.v, c.v, d.v);
644 return {a, b, c, d};
645 }
646
647 friend std::ostream& operator<<(std::ostream& a, native_simd b) noexcept
648 {
649 return a << "(" << get<0>(b) << ", " << get<1>(b) << ", " << get<2>(b) << ", " << get<3>(b) << ")";
650 }
651
652 template<fixed_string SourceElements>
653 [[nodiscard]] static native_simd swizzle_numbers() noexcept
654 {
655 constexpr auto one_mask = detail::native_swizzle_to_mask<SourceElements, size, '1'>();
656 constexpr auto zero_mask = detail::native_swizzle_to_mask<SourceElements, size, '0'>();
657 constexpr auto number_mask = one_mask | zero_mask;
658 constexpr auto alpha_mask = ~number_mask & 0b1111;
659
660 if constexpr ((zero_mask | alpha_mask) == 0b1111) {
661 return {};
662
663 } else if constexpr ((one_mask | alpha_mask) == 0b1111) {
664 return broadcast(1.0f);
665
666 } else {
667 return native_simd{
668 to_bool(one_mask & 0b0001) ? 1.0f : 0.0f,
669 to_bool(one_mask & 0b0010) ? 1.0f : 0.0f,
670 to_bool(one_mask & 0b0100) ? 1.0f : 0.0f,
671 to_bool(one_mask & 0b1000) ? 1.0f : 0.0f};
672 }
673 }
674};
675
676#endif
677}} // namespace hi::v1
@ round
The end cap of the line is round.
@ other
The gui_event does not have associated data.
DOXYGEN BUG.
Definition algorithm.hpp:16
geometry/margins.hpp
Definition lookahead_iterator.hpp:5
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition cast.hpp:377
T ceil(T... args)
T equal(T... args)
T floor(T... args)
T max(T... args)
T min(T... args)
T operator!=(T... args)
T sqrt(T... args)