14constexpr void dsp_mul(
float const *a,
float const *b,
float *o,
size_t n)
noexcept
16 if (not std::is_constant_evaluated()) {
17#if defined(HI_HAS_AVX)
18 for (
auto const a_end = a + floor(n, 8); a != a_end; a += 8, b += 8, o += 8) {
19 auto const a_ = _mm256_loadu_ps(a);
20 auto const b_ = _mm256_loadu_ps(b);
21 auto const o_ = _mm256_mul_ps(a_, b_);
22 _mm256_storeu_ps(o, o_);
25#elif defined(HI_HAS_SSE)
26 for (
auto const a_end = a + floor(n, 4); a != a_end; a += 4, b += 4, o += 4) {
27 auto const a_ = _mm_loadu_ps(a);
28 auto const b_ = _mm_loadu_ps(b);
29 auto const o_ = _mm_mul_ps(a_, b_);
35 for (
auto const a_end = a + n; a != a_end; ++a, ++b, ++o) {
47constexpr void dsp_mul(
float const *a,
float b,
float *o,
size_t n)
noexcept
49 if (not std::is_constant_evaluated()) {
50#if defined(HI_HAS_AVX)
51 auto const b_ = _mm256_set1_ps(b);
52 for (
auto const a_end = a + floor(n, 8); a != a_end; a += 8, o += 8) {
53 auto const a_ = _mm256_loadu_ps(a);
54 auto const o_ = _mm256_mul_ps(a_, b_);
55 _mm256_storeu_ps(o, o_);
58#elif defined(HI_HAS_SSE)
59 auto const b_ = _mm_set1_ps(b);
60 for (
auto const a_end = a + floor(n, 4); a != a_end; a += 4, o += 4) {
61 auto const a_ = _mm_loadu_ps(a);
62 auto const o_ = _mm_mul_ps(a_, b_);
68 for (
auto const a_end = a + n; a != a_end; ++a, ++o) {
81constexpr void dsp_mul_acc(
float const *a,
float const *b,
float *o,
size_t n)
noexcept
83 if (not std::is_constant_evaluated()) {
84#if defined(HI_HAS_AVX)
85 for (
auto const a_end = a + floor(n, 8); a != a_end; a += 8, b += 8, o += 8) {
86 auto const a_ = _mm256_loadu_ps(a);
87 auto const b_ = _mm256_loadu_ps(b);
88 auto const o_ = _mm256_mul_ps(a_, b_);
89 _mm256_storeu_ps(o, _mm256_add_ps(_mm256_loadu_ps(o), o_));
92#elif defined(HI_HAS_SSE)
93 for (
auto const a_end = a + floor(n, 4); a != a_end; a += 4, b += 4, o += 4) {
94 auto const a_ = _mm_loadu_ps(a);
95 auto const b_ = _mm_loadu_ps(b);
96 auto const o_ = _mm_mul_ps(a_, b_);
97 _mm_storeu_ps(o, _mm_add_ps(_mm_loadu_ps(o), o_));
102 for (
auto const a_end = a + n; a != a_end; ++a, ++b, ++o) {
114constexpr void dsp_mul_acc(
float const *a,
float b,
float *o,
size_t n)
noexcept
116 if (not std::is_constant_evaluated()) {
117#if defined(HI_HAS_AVX)
118 auto const b_ = _mm256_set1_ps(b);
119 for (
auto const a_end = a + floor(n, 8); a != a_end; a += 8, o += 8) {
120 auto const a_ = _mm256_loadu_ps(a);
121 auto const o_ = _mm256_mul_ps(a_, b_);
122 _mm256_storeu_ps(o, _mm256_add_ps(_mm256_loadu_ps(o), o_));
125#elif defined(HI_HAS_SSE)
126 auto const b_ = _mm_set1_ps(b);
127 for (
auto const a_end = a + floor(n, 4); a != a_end; a += 4, o += 4) {
128 auto const a_ = _mm_loadu_ps(a);
129 auto const o_ = _mm_mul_ps(a_, b_);
130 _mm_storeu_ps(o, _mm_add_ps(_mm_loadu_ps(o), o_));
135 for (
auto const a_end = a + n; a != a_end; ++a, ++o) {
constexpr void dsp_mul_acc(float const *a, float const *b, float *o, size_t n) noexcept
Multiply two float arrays and accumulate into another array.
Definition dsp_mul.hpp:81