HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
swizzle_avx.hpp
1// Copyright Take Vos 2021-2022.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "../utility.hpp"
8#include "../architecture.hpp"
9#if defined(HI_HAS_AVX)
10
11#include <emmintrin.h>
12#include <smmintrin.h>
13#include <xmmintrin.h>
14#include <pmmintrin.h>
15#include <immintrin.h>
16
17hi_warning_push();
18// C26818: Switch statement does not cover all cases. Consider adding a 'default' label (es.79).
19// False positive.
20hi_warning_ignore_msvc(26818)
21
22namespace hi::inline v1 {
23
24template<ssize_t A, ssize_t B, ssize_t C, ssize_t D>
25[[nodiscard]] constexpr static int _mm_swizzle_ps_permute_mask() noexcept
26{
27 static_assert(A >= -3 and A < 4);
28 static_assert(B >= -3 and B < 4);
29 static_assert(C >= -3 and C < 4);
30 static_assert(D >= -3 and D < 4);
31
32 int r = 0;
33 switch (A) {
34 case 0: r |= 0b00'00'00'00; break;
35 case 1: r |= 0b00'00'00'01; break;
36 case 2: r |= 0b00'00'00'10; break;
37 case 3: r |= 0b00'00'00'11; break;
38 case -1: r |= 0b00'00'00'00; break;
39 case -2: r |= 0b00'00'00'00; break;
40 }
41 switch (B) {
42 case 0: r |= 0b00'00'00'00; break;
43 case 1: r |= 0b00'00'01'00; break;
44 case 2: r |= 0b00'00'10'00; break;
45 case 3: r |= 0b00'00'11'00; break;
46 case -1: r |= 0b00'00'01'00; break;
47 case -2: r |= 0b00'00'01'00; break;
48 }
49 switch (C) {
50 case 0: r |= 0b00'00'00'00; break;
51 case 1: r |= 0b00'01'00'00; break;
52 case 2: r |= 0b00'10'00'00; break;
53 case 3: r |= 0b00'11'00'00; break;
54 case -1: r |= 0b00'10'00'00; break;
55 case -2: r |= 0b00'10'00'00; break;
56 }
57 switch (D) {
58 case 0: r |= 0b00'00'00'00; break;
59 case 1: r |= 0b01'00'00'00; break;
60 case 2: r |= 0b10'00'00'00; break;
61 case 3: r |= 0b11'00'00'00; break;
62 case -1: r |= 0b11'00'00'00; break;
63 case -2: r |= 0b11'00'00'00; break;
64 }
65 return r;
66}
67
68template<ssize_t A, ssize_t B, ssize_t C, ssize_t D>
69[[nodiscard]] constexpr static int _mm_swizzle_ps_not_one_mask() noexcept
70{
71 static_assert(A >= -3 && A < 4);
72 static_assert(B >= -3 && B < 4);
73 static_assert(C >= -3 && C < 4);
74 static_assert(D >= -3 && D < 4);
75
76 int r = 0;
77 r |= (A == -2) ? 0 : 0b0001;
78 r |= (B == -2) ? 0 : 0b0010;
79 r |= (C == -2) ? 0 : 0b0100;
80 r |= (D == -2) ? 0 : 0b1000;
81 return r;
82}
83
84template<ssize_t A, ssize_t B, ssize_t C, ssize_t D>
85[[nodiscard]] constexpr static int _mm_swizzle_ps_number_mask() noexcept
86{
87 static_assert(A >= -3 && A < 4);
88 static_assert(B >= -3 && B < 4);
89 static_assert(C >= -3 && C < 4);
90 static_assert(D >= -3 && D < 4);
91
92 int r = 0;
93 r |= A < 0 ? 0b0001 : 0;
94 r |= B < 0 ? 0b0010 : 0;
95 r |= C < 0 ? 0b0100 : 0;
96 r |= D < 0 ? 0b1000 : 0;
97 return r;
98}
99
100template<ssize_t A, ssize_t B, ssize_t C, ssize_t D>
101[[nodiscard]] __m128 _mm_swizzle_ps(__m128 const &value) noexcept
102{
103 static_assert(A >= -3 && A < 4);
104 static_assert(B >= -3 && B < 4);
105 static_assert(C >= -3 && C < 4);
106 static_assert(D >= -3 && D < 4);
107
108 constexpr int permute_mask = _mm_swizzle_ps_permute_mask<A, B, C, D>();
109 constexpr int not_one_mask = _mm_swizzle_ps_not_one_mask<A, B, C, D>();
110 constexpr int number_mask = _mm_swizzle_ps_number_mask<A, B, C, D>();
111
112 __m128 swizzled;
113 // Clang is able to optimize these intrinsics, MSVC is not.
114 if constexpr (permute_mask != 0b11'10'01'00) {
115 swizzled = _mm_permute_ps(value, permute_mask);
116 } else {
117 swizzled = value;
118 }
119
120 __m128 numbers = _mm_undefined_ps();
121 if constexpr (not_one_mask == 0b0000) {
122 numbers = _mm_set_ps1(1.0f);
123 } else if constexpr (not_one_mask == 0b1111) {
124 numbers = _mm_setzero_ps();
125 } else if constexpr (not_one_mask == 0b1110) {
126 numbers = _mm_set_ss(1.0f);
127 } else {
128 hilet _1111 = _mm_set_ps1(1.0f);
129 numbers = _mm_insert_ps(_1111, _1111, not_one_mask);
130 }
131
132 __m128 result = _mm_undefined_ps();
133 if constexpr (number_mask == 0b0000) {
134 result = swizzled;
135 } else if constexpr (number_mask == 0b1111) {
136 result = numbers;
137 } else if constexpr (((not_one_mask | ~number_mask) & 0b1111) == 0b1111) {
138 result = _mm_insert_ps(swizzled, swizzled, number_mask);
139 } else {
140 result = _mm_blend_ps(swizzled, numbers, number_mask);
141 }
142 return result;
143}
144
145template<ssize_t A, ssize_t B, ssize_t C, ssize_t D>
146[[nodiscard]] __m128i _mm_swizzle_epi32(__m128i const &value) noexcept
147{
148 return _mm_castps_si128(_mm_swizzle_ps<A, B, C, D>(_mm_castsi128_ps(value)));
149}
150
151template<ssize_t A = -1, ssize_t B = -1>
152[[nodiscard]] __m128d _mm_swizzle_pd(__m128d const &value) noexcept
153{
154 constexpr auto A1 = A >= 0 ? A * 2 : A;
155 constexpr auto A2 = A >= 0 ? A1 + 1 : A1;
156 constexpr auto B1 = B >= 0 ? B * 2 : B;
157 constexpr auto B2 = B >= 0 ? B1 + 1 : B1;
158
159 return _mm_castps_pd(_mm_swizzle_ps<A1, A2, B1, B2>(_mm_castpd_ps(value)));
160}
161
162template<ssize_t A = -1, ssize_t B = -1>
163[[nodiscard]] __m128i _mm_swizzle_epi64(__m128i const &value) noexcept
164{
165 return _mm_castpd_si128(_mm_swizzle_pd<A, B>(_mm_castsi128_pd(value)));
166}
167
168} // namespace hi::inline v1
169
170hi_warning_pop();
171
172#endif
Utilities used by the HikoGUI library itself.
#define hilet
Invariant should be the default for variables.
Definition utility.hpp:23
Functions and macros for handling architectural difference between compilers, CPUs and operating syst...
DOXYGEN BUG.
Definition algorithm.hpp:15
The HikoGUI namespace.
Definition ascii.hpp:19
std::ptrdiff_t ssize_t
Signed size/index into an array.
Definition utility.hpp:173