HikoGUI
A low latency retained GUI
Loading...
Searching...
No Matches
audio_sample_packer.hpp
1// Copyright Take Vos 2021.
2// Distributed under the Boost Software License, Version 1.0.
3// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
4
5#pragma once
6
7#include "audio_sample_format.hpp"
8#include "../utility/utility.hpp"
9#include "../random/random.hpp"
10#include "../macros.hpp"
11#include <hikocpu/hikocpu.hpp>
12#include <cstddef>
13#include <bit>
14
15hi_export_module(hikogui.audio.audio_sample_packer);
16
17hi_export namespace hi { inline namespace v1 {
18
19hi_export class audio_sample_packer {
20public:
30 _dither(format.num_bits), _format(format), _stride(stride)
31 {
32 _store_shuffle_indices = format.store_shuffle_indices(stride);
33 _concat_shuffle_indices = format.concat_shuffle_indices(stride);
34
35 _multiplier = f32x4::broadcast(format.pack_multiplier());
36
37 _num_chunks_per_quad = format.num_chunks_per_quad(stride);
38 _chunk_stride = format.chunk_stride(stride);
39
40 _direction = format.endian == std::endian::little ? 1 : -1;
41 _start_byte = format.endian == std::endian::little ? 0 : format.num_bytes - 1;
42 _align_shift = 32 - format.num_bytes * 8;
43 }
44
51 void operator()(float const *hi_restrict src, std::byte *hi_restrict dst, std::size_t num_samples) const noexcept
52 {
53 hi_assert(src != nullptr);
54 hi_assert(dst != nullptr);
55
56 // Calculate a conservative number of samples that can be copied quickly
57 // without overflowing the dst buffer.
58 auto const src_end = src + num_samples;
59 auto const src_fast_end = src + _format.num_fast_quads(_stride, num_samples) * 4;
60
61 auto const store_shuffle_indices = _store_shuffle_indices;
62 auto const concat_shuffle_indices = _concat_shuffle_indices;
63 auto const num_chunks_per_quad = _num_chunks_per_quad;
64 auto const chunk_stride = _chunk_stride;
65
66 if (_format.is_float) {
67 while (src != src_fast_end) {
68 auto const float_samples = load_samples(src);
69 auto const int_samples = i8x16::cast_from(float_samples);
70 store_samples(int_samples, dst, store_shuffle_indices, concat_shuffle_indices, num_chunks_per_quad, chunk_stride);
71 }
72 while (src != src_end) {
73 auto const float_sample = load_sample(src);
74 auto const int_sample = std::bit_cast<int32_t>(float_sample);
75 store_sample(int_sample, dst, _stride, _format.num_bytes, _direction, _start_byte, _align_shift);
76 }
77
78 } else {
79 auto const multiplier = _multiplier;
80 auto const one = f32x4::broadcast(1);
81 auto const min_one = f32x4::broadcast(-1);
82
83 auto dither = _dither;
84
85 while (src != src_fast_end) {
86 auto const dither_value = dither.next();
87
88 auto float_samples = load_samples(src);
89 float_samples += dither_value;
90 float_samples = min(float_samples, one);
91 float_samples = max(float_samples, min_one);
92 float_samples *= multiplier;
93 auto const int_samples = i8x16::cast_from(static_cast<i32x4>(float_samples));
94 store_samples(int_samples, dst, store_shuffle_indices, concat_shuffle_indices, num_chunks_per_quad, chunk_stride);
95 }
96 while (src != src_end) {
97 auto const dither_value = dither.next();
98
99 auto float_sample = f32x4::broadcast(load_sample(src));
100 float_sample += dither_value;
101 float_sample = min(float_sample, one);
102 float_sample = max(float_sample, min_one);
103 float_sample *= multiplier;
104 auto const int_sample = get<0>(static_cast<i32x4>(float_sample));
105 store_sample(int_sample, dst, _stride, _format.num_bytes, _direction, _start_byte, _align_shift);
106 }
107
108 _dither = dither;
109 }
110 }
111
112private:
113 i8x16 _store_shuffle_indices;
114 i8x16 _concat_shuffle_indices;
115 f32x4 _multiplier;
116 mutable dither _dither;
117 audio_sample_format _format;
118 std::size_t _num_chunks_per_quad;
119 std::size_t _stride;
120 std::size_t _chunk_stride;
121 int _direction;
122 int _start_byte;
123 int _align_shift;
124
125 static void store_sample(
126 int32_t int_sample,
127 std::byte * hi_restrict & dst,
128 std::size_t stride,
129 int num_bytes,
130 int direction,
131 int start_byte,
132 int align_shift) noexcept
133 {
134 int_sample >>= align_shift;
135
136 hi_axiom(dst != nullptr);
137 auto p = dst + start_byte;
138 do {
139 *p = static_cast<std::byte>(int_sample);
140 p += direction;
141 int_sample >>= 8;
142 } while (--num_bytes);
143
144 dst += stride;
145 }
146
147 static void
148 store_samples(i8x16 int_samples, std::byte * hi_restrict & dst, i8x16 store_shuffle_indices, std::size_t stride) noexcept
149 {
150 hi_axiom(dst != nullptr);
151 hi_axiom(stride > 0);
152
153 // Read out the samples from the other channels, that where packed before.
154 auto tmp = i8x16::load(dst);
155
156 auto const packed_samples = permute(int_samples, store_shuffle_indices);
157
158 // When the shuffle-index is -1 use the samples from the other channels.
159 tmp = blend(packed_samples, tmp, store_shuffle_indices);
160
161 // Store back the samples from this channel and from the other channel.
162 tmp.store(dst);
163
164 dst += stride;
165 }
166
167 static void store_samples(
168 i8x16 int_samples,
169 std::byte * hi_restrict & dst,
170 i8x16 store_shuffle_indices,
171 i8x16 concat_shuffle_indices,
172 std::size_t num_chunks,
173 std::size_t stride) noexcept
174 {
175 hi_assert(dst != nullptr);
176 hi_assert(num_chunks > 0 and num_chunks <= 4);
177 hi_assert(stride > 0);
178
179 do {
180 store_samples(int_samples, dst, store_shuffle_indices, stride);
181 int_samples = permute(int_samples, concat_shuffle_indices);
182 // The result of the last shuffle is not used, so will be pipelined by the CPU.
183 } while (--num_chunks);
184 }
185
186 [[nodiscard]] static float load_sample(float const * hi_restrict & src) noexcept
187 {
188 hi_axiom(src != nullptr);
189 return *(src++);
190 }
191
192 [[nodiscard]] static f32x4 load_samples(float const * hi_restrict & src) noexcept
193 {
194 auto const r = f32x4::load(src);
195 src += 4;
196 return r;
197 }
198};
199
200}} // namespace hi::inline v1
The HikoGUI namespace.
Definition array_generic.hpp:20
@ one
The number was one, and this means something in the current language.
DOXYGEN BUG.
Definition algorithm_misc.hpp:20
Definition simd_intf.hpp:18
Audio sample format.
Definition audio_sample_format.hpp:30
uint8_t num_bytes
The number of bytes of the container.
Definition audio_sample_format.hpp:34
bool is_float
The numeric type is floating point.
Definition audio_sample_format.hpp:56
std::size_t num_fast_quads(std::size_t stride, std::size_t num_samples) const noexcept
Calculate the number of 4 sample-quads can be handled as chunked loads and stores.
Definition audio_sample_format.hpp:238
Definition audio_sample_packer.hpp:19
audio_sample_packer(audio_sample_format format, std::size_t stride) noexcept
Audio sample packer One instance of this class can be used to pack multiple buffers either from one a...
Definition audio_sample_packer.hpp:29
void operator()(float const *hi_restrict src, std::byte *hi_restrict dst, std::size_t num_samples) const noexcept
Unpack samples.
Definition audio_sample_packer.hpp:51