30 _dither(format.num_bits), _format(format), _stride(stride)
32 _store_shuffle_indices = format.store_shuffle_indices(stride);
33 _concat_shuffle_indices = format.concat_shuffle_indices(stride);
35 _multiplier = f32x4::broadcast(format.pack_multiplier());
37 _num_chunks_per_quad = format.num_chunks_per_quad(stride);
38 _chunk_stride = format.chunk_stride(stride);
40 _direction = format.endian == std::endian::little ? 1 : -1;
41 _start_byte = format.endian == std::endian::little ? 0 : format.num_bytes - 1;
42 _align_shift = 32 - format.num_bytes * 8;
51 void operator()(
float const *hi_restrict src, std::byte *hi_restrict dst,
std::size_t num_samples)
const noexcept
53 hi_assert(src !=
nullptr);
54 hi_assert(dst !=
nullptr);
58 auto const src_end = src + num_samples;
59 auto const src_fast_end = src + _format.
num_fast_quads(_stride, num_samples) * 4;
61 auto const store_shuffle_indices = _store_shuffle_indices;
62 auto const concat_shuffle_indices = _concat_shuffle_indices;
63 auto const num_chunks_per_quad = _num_chunks_per_quad;
64 auto const chunk_stride = _chunk_stride;
67 while (src != src_fast_end) {
68 auto const float_samples = load_samples(src);
69 auto const int_samples = i8x16::cast_from(float_samples);
70 store_samples(int_samples, dst, store_shuffle_indices, concat_shuffle_indices, num_chunks_per_quad, chunk_stride);
72 while (src != src_end) {
73 auto const float_sample = load_sample(src);
74 auto const int_sample = std::bit_cast<int32_t>(float_sample);
75 store_sample(int_sample, dst, _stride, _format.
num_bytes, _direction, _start_byte, _align_shift);
79 auto const multiplier = _multiplier;
80 auto const one = f32x4::broadcast(1);
81 auto const min_one = f32x4::broadcast(-1);
83 auto dither = _dither;
85 while (src != src_fast_end) {
86 auto const dither_value = dither.next();
88 auto float_samples = load_samples(src);
89 float_samples += dither_value;
90 float_samples = min(float_samples,
one);
91 float_samples = max(float_samples, min_one);
92 float_samples *= multiplier;
93 auto const int_samples = i8x16::cast_from(
static_cast<i32x4>(float_samples));
94 store_samples(int_samples, dst, store_shuffle_indices, concat_shuffle_indices, num_chunks_per_quad, chunk_stride);
96 while (src != src_end) {
97 auto const dither_value = dither.next();
99 auto float_sample = f32x4::broadcast(load_sample(src));
100 float_sample += dither_value;
101 float_sample = min(float_sample,
one);
102 float_sample = max(float_sample, min_one);
103 float_sample *= multiplier;
104 auto const int_sample = get<0>(
static_cast<i32x4>(float_sample));
105 store_sample(int_sample, dst, _stride, _format.
num_bytes, _direction, _start_byte, _align_shift);