HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
dsp_mul.hpp
1
2
3#pragam once
4
5namespace hi { inline namespace v1 {
6
14constexpr void dsp_mul(float const *a, float const *b, float *o, size_t n) noexcept
15{
16 if (not std::is_constant_evaluated()) {
17#if defined(HI_HAS_AVX)
18 for (auto const a_end = a + floor(n, 8); a != a_end; a += 8, b += 8, o += 8) {
19 auto const a_ = _mm256_loadu_ps(a);
20 auto const b_ = _mm256_loadu_ps(b);
21 auto const o_ = _mm256_mul_ps(a_, b_);
22 _mm256_storeu_ps(o, o_);
23 }
24
25#elif defined(HI_HAS_SSE)
26 for (auto const a_end = a + floor(n, 4); a != a_end; a += 4, b += 4, o += 4) {
27 auto const a_ = _mm_loadu_ps(a);
28 auto const b_ = _mm_loadu_ps(b);
29 auto const o_ = _mm_mul_ps(a_, b_);
30 _mm_storeu_ps(o, o_);
31 }
32#endif
33 }
34
35 for (auto const a_end = a + n; a != a_end; ++a, ++b, ++o) {
36 *o = *a * *b;
37 }
38}
39
47constexpr void dsp_mul(float const *a, float b, float *o, size_t n) noexcept
48{
49 if (not std::is_constant_evaluated()) {
50#if defined(HI_HAS_AVX)
51 auto const b_ = _mm256_set1_ps(b);
52 for (auto const a_end = a + floor(n, 8); a != a_end; a += 8, o += 8) {
53 auto const a_ = _mm256_loadu_ps(a);
54 auto const o_ = _mm256_mul_ps(a_, b_);
55 _mm256_storeu_ps(o, o_);
56 }
57
58#elif defined(HI_HAS_SSE)
59 auto const b_ = _mm_set1_ps(b);
60 for (auto const a_end = a + floor(n, 4); a != a_end; a += 4, o += 4) {
61 auto const a_ = _mm_loadu_ps(a);
62 auto const o_ = _mm_mul_ps(a_, b_);
63 _mm_storeu_ps(o, o_);
64 }
65#endif
66 }
67
68 for (auto const a_end = a + n; a != a_end; ++a, ++o) {
69 *o = *a * b;
70 }
71}
72
73
81constexpr void dsp_mul_acc(float const *a, float const *b, float *o, size_t n) noexcept
82{
83 if (not std::is_constant_evaluated()) {
84#if defined(HI_HAS_AVX)
85 for (auto const a_end = a + floor(n, 8); a != a_end; a += 8, b += 8, o += 8) {
86 auto const a_ = _mm256_loadu_ps(a);
87 auto const b_ = _mm256_loadu_ps(b);
88 auto const o_ = _mm256_mul_ps(a_, b_);
89 _mm256_storeu_ps(o, _mm256_add_ps(_mm256_loadu_ps(o), o_));
90 }
91
92#elif defined(HI_HAS_SSE)
93 for (auto const a_end = a + floor(n, 4); a != a_end; a += 4, b += 4, o += 4) {
94 auto const a_ = _mm_loadu_ps(a);
95 auto const b_ = _mm_loadu_ps(b);
96 auto const o_ = _mm_mul_ps(a_, b_);
97 _mm_storeu_ps(o, _mm_add_ps(_mm_loadu_ps(o), o_));
98 }
99#endif
100 }
101
102 for (auto const a_end = a + n; a != a_end; ++a, ++b, ++o) {
103 *o = *o + *a * *b;
104 }
105}
106
114constexpr void dsp_mul_acc(float const *a, float b, float *o, size_t n) noexcept
115{
116 if (not std::is_constant_evaluated()) {
117#if defined(HI_HAS_AVX)
118 auto const b_ = _mm256_set1_ps(b);
119 for (auto const a_end = a + floor(n, 8); a != a_end; a += 8, o += 8) {
120 auto const a_ = _mm256_loadu_ps(a);
121 auto const o_ = _mm256_mul_ps(a_, b_);
122 _mm256_storeu_ps(o, _mm256_add_ps(_mm256_loadu_ps(o), o_));
123 }
124
125#elif defined(HI_HAS_SSE)
126 auto const b_ = _mm_set1_ps(b);
127 for (auto const a_end = a + floor(n, 4); a != a_end; a += 4, o += 4) {
128 auto const a_ = _mm_loadu_ps(a);
129 auto const o_ = _mm_mul_ps(a_, b_);
130 _mm_storeu_ps(o, _mm_add_ps(_mm_loadu_ps(o), o_));
131 }
132#endif
133 }
134
135 for (auto const a_end = a + n; a != a_end; ++a, ++o) {
136 *o = *o + *a * b;
137 }
138}
139
140
141}}
The HikoGUI namespace.
Definition array_generic.hpp:20
constexpr void dsp_mul_acc(float const *a, float const *b, float *o, size_t n) noexcept
Multiply two float arrays and accumulate into another array.
Definition dsp_mul.hpp:81
DOXYGEN BUG.
Definition algorithm_misc.hpp:20