HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
src
hikogui
DSP
dsp_mul.hpp
1
2
3
#pragam once
4
5
namespace
hi
{
inline
namespace
v1
{
6
14
constexpr
void
dsp_mul(
float
const
*a,
float
const
*b,
float
*o,
size_t
n)
noexcept
15
{
16
if
(
not
std::is_constant_evaluated()) {
17
#if defined(HI_HAS_AVX)
18
for
(
auto
const
a_end
= a + floor(n, 8); a !=
a_end
; a += 8, b += 8, o += 8) {
19
auto
const
a_
=
_mm256_loadu_ps
(a);
20
auto
const
b_
=
_mm256_loadu_ps
(b);
21
auto
const
o_
=
_mm256_mul_ps
(
a_
,
b_
);
22
_mm256_storeu_ps
(o,
o_
);
23
}
24
25
#elif defined(HI_HAS_SSE)
26
for
(
auto
const
a_end
= a + floor(n, 4); a !=
a_end
; a += 4, b += 4, o += 4) {
27
auto
const
a_
=
_mm_loadu_ps
(a);
28
auto
const
b_
=
_mm_loadu_ps
(b);
29
auto
const
o_
=
_mm_mul_ps
(
a_
,
b_
);
30
_mm_storeu_ps
(o,
o_
);
31
}
32
#endif
33
}
34
35
for
(
auto
const
a_end
= a + n; a !=
a_end
; ++a, ++b, ++o) {
36
*o = *a * *b;
37
}
38
}
39
47
constexpr
void
dsp_mul(
float
const
*a,
float
b,
float
*o,
size_t
n)
noexcept
48
{
49
if
(
not
std::is_constant_evaluated()) {
50
#if defined(HI_HAS_AVX)
51
auto
const
b_
=
_mm256_set1_ps
(b);
52
for
(
auto
const
a_end
= a + floor(n, 8); a !=
a_end
; a += 8, o += 8) {
53
auto
const
a_
=
_mm256_loadu_ps
(a);
54
auto
const
o_
=
_mm256_mul_ps
(
a_
,
b_
);
55
_mm256_storeu_ps
(o,
o_
);
56
}
57
58
#elif defined(HI_HAS_SSE)
59
auto
const
b_
=
_mm_set1_ps
(b);
60
for
(
auto
const
a_end
= a + floor(n, 4); a !=
a_end
; a += 4, o += 4) {
61
auto
const
a_
=
_mm_loadu_ps
(a);
62
auto
const
o_
=
_mm_mul_ps
(
a_
,
b_
);
63
_mm_storeu_ps
(o,
o_
);
64
}
65
#endif
66
}
67
68
for
(
auto
const
a_end
= a + n; a !=
a_end
; ++a, ++o) {
69
*o = *a * b;
70
}
71
}
72
73
81
constexpr
void
dsp_mul_acc
(
float
const
*a,
float
const
*b,
float
*o,
size_t
n)
noexcept
82
{
83
if
(
not
std::is_constant_evaluated()) {
84
#if defined(HI_HAS_AVX)
85
for
(
auto
const
a_end
= a + floor(n, 8); a !=
a_end
; a += 8, b += 8, o += 8) {
86
auto
const
a_
=
_mm256_loadu_ps
(a);
87
auto
const
b_
=
_mm256_loadu_ps
(b);
88
auto
const
o_
=
_mm256_mul_ps
(
a_
,
b_
);
89
_mm256_storeu_ps
(o,
_mm256_add_ps
(
_mm256_loadu_ps
(o),
o_
));
90
}
91
92
#elif defined(HI_HAS_SSE)
93
for
(
auto
const
a_end
= a + floor(n, 4); a !=
a_end
; a += 4, b += 4, o += 4) {
94
auto
const
a_
=
_mm_loadu_ps
(a);
95
auto
const
b_
=
_mm_loadu_ps
(b);
96
auto
const
o_
=
_mm_mul_ps
(
a_
,
b_
);
97
_mm_storeu_ps
(o,
_mm_add_ps
(
_mm_loadu_ps
(o),
o_
));
98
}
99
#endif
100
}
101
102
for
(
auto
const
a_end
= a + n; a !=
a_end
; ++a, ++b, ++o) {
103
*o = *o + *a * *b;
104
}
105
}
106
114
constexpr
void
dsp_mul_acc
(
float
const
*a,
float
b,
float
*o,
size_t
n)
noexcept
115
{
116
if
(
not
std::is_constant_evaluated()) {
117
#if defined(HI_HAS_AVX)
118
auto
const
b_
=
_mm256_set1_ps
(b);
119
for
(
auto
const
a_end
= a + floor(n, 8); a !=
a_end
; a += 8, o += 8) {
120
auto
const
a_
=
_mm256_loadu_ps
(a);
121
auto
const
o_
=
_mm256_mul_ps
(
a_
,
b_
);
122
_mm256_storeu_ps
(o,
_mm256_add_ps
(
_mm256_loadu_ps
(o),
o_
));
123
}
124
125
#elif defined(HI_HAS_SSE)
126
auto
const
b_
=
_mm_set1_ps
(b);
127
for
(
auto
const
a_end
= a + floor(n, 4); a !=
a_end
; a += 4, o += 4) {
128
auto
const
a_
=
_mm_loadu_ps
(a);
129
auto
const
o_
=
_mm_mul_ps
(
a_
,
b_
);
130
_mm_storeu_ps
(o,
_mm_add_ps
(
_mm_loadu_ps
(o),
o_
));
131
}
132
#endif
133
}
134
135
for
(
auto
const
a_end
= a + n; a !=
a_end
; ++a, ++o) {
136
*o = *o + *a * b;
137
}
138
}
139
140
141
}}
v1
DOXYGEN BUG.
Definition
algorithm_misc.hpp:20
hi
The HikoGUI namespace.
Definition
recursive_iterator.hpp:15
hi::v1::dsp_mul_acc
constexpr void dsp_mul_acc(float const *a, float const *b, float *o, size_t n) noexcept
Multiply two float arrays and accumulate into another array.
Definition
dsp_mul.hpp:81
hi::v1::narrow_cast
constexpr Out narrow_cast(In const &rhs) noexcept
Cast numeric values without loss of precision.
Definition
cast.hpp:378
Generated on Mon Apr 22 2024 12:51:58 for HikoGUI by
1.10.0