Skip to content

Commit bdbf36e

Browse files
authored
Add audio normalizer (#642)
1 parent ceb316e commit bdbf36e

File tree

5 files changed

+187
-6
lines changed

5 files changed

+187
-6
lines changed

Cargo.lock

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/audio/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ tokio-stream = { workspace = true }
2020
cpal = { workspace = true }
2121
rodio = { workspace = true, features = ["vorbis"] }
2222

23+
ebur128 = "0.1.10"
2324
kalosm-sound = { workspace = true, default-features = false }
2425
ringbuf = "0.4.8"
2526

@@ -33,5 +34,7 @@ wasapi = { git = "https://github.com/HEnquist/wasapi-rs", rev = "24ae99c0134f7e1
3334
alsa = "0.9.1"
3435

3536
[dev-dependencies]
36-
rodio = "*"
37+
hound = { workspace = true }
38+
hypr-data = { workspace = true }
39+
rodio = { workspace = true }
3740
serial_test = { workspace = true }

crates/audio/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
mod errors;
22
mod mic;
3+
mod norm;
34
mod speaker;
45
mod stream;
56

67
pub use errors::*;
78
pub use mic::*;
9+
pub use norm::*;
810
pub use speaker::*;
911
pub use stream::*;
1012

crates/audio/src/norm.rs

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
use std::pin::Pin;
2+
use std::task::{Context, Poll};
3+
4+
use ebur128::{EbuR128, Mode};
5+
use futures_util::Stream;
6+
7+
const CHANNELS: u32 = 1;
8+
const TARGET_LUFS: f64 = -23.0;
9+
const TRUE_PEAK_LIMIT: f64 = -1.0;
10+
const LIMITER_LOOKAHEAD_MS: usize = 10;
11+
const ANALYZE_CHUNK_SIZE: usize = 512;
12+
13+
pub struct NormalizedSource<S: kalosm_sound::AsyncSource> {
14+
source: S,
15+
gain_linear: f32,
16+
ebur128: EbuR128,
17+
loudness_buffer: Vec<f32>,
18+
limiter: TruePeakLimiter,
19+
true_peak_limit: f32,
20+
}
21+
22+
struct TruePeakLimiter {
23+
lookahead_samples: usize,
24+
buffer: Vec<f32>,
25+
gain_reduction: Vec<f32>,
26+
current_position: usize,
27+
}
28+
29+
impl TruePeakLimiter {
30+
fn new(sample_rate: u32) -> Self {
31+
let lookahead_samples = ((sample_rate as usize * LIMITER_LOOKAHEAD_MS) / 1000).max(1);
32+
33+
Self {
34+
lookahead_samples,
35+
buffer: vec![0.0; lookahead_samples],
36+
gain_reduction: vec![1.0; lookahead_samples],
37+
current_position: 0,
38+
}
39+
}
40+
41+
fn process(&mut self, sample: f32, true_peak_limit: f32) -> f32 {
42+
self.buffer[self.current_position] = sample;
43+
44+
let sample_abs = sample.abs();
45+
if sample_abs > true_peak_limit {
46+
let reduction = true_peak_limit / sample_abs;
47+
self.gain_reduction[self.current_position] = reduction;
48+
} else {
49+
self.gain_reduction[self.current_position] = 1.0;
50+
}
51+
52+
let output_position = (self.current_position + 1) % self.lookahead_samples;
53+
let output_sample = self.buffer[output_position] * self.gain_reduction[output_position];
54+
55+
self.current_position = output_position;
56+
output_sample
57+
}
58+
}
59+
60+
pub trait NormalizeExt<S: kalosm_sound::AsyncSource> {
61+
fn normalize(self) -> NormalizedSource<S>;
62+
}
63+
64+
impl<S: kalosm_sound::AsyncSource> NormalizeExt<S> for S {
65+
fn normalize(self) -> NormalizedSource<S> {
66+
let sample_rate = self.sample_rate();
67+
let ebur128 = EbuR128::new(CHANNELS, sample_rate, Mode::I | Mode::TRUE_PEAK)
68+
.expect("Failed to create EBU R128 analyzer");
69+
70+
let true_peak_limit = 10_f32.powf(TRUE_PEAK_LIMIT as f32 / 20.0);
71+
72+
NormalizedSource {
73+
source: self,
74+
gain_linear: 1.0,
75+
ebur128,
76+
loudness_buffer: Vec::with_capacity(ANALYZE_CHUNK_SIZE),
77+
limiter: TruePeakLimiter::new(sample_rate),
78+
true_peak_limit,
79+
}
80+
}
81+
}
82+
83+
impl<S: kalosm_sound::AsyncSource + Unpin> Stream for NormalizedSource<S> {
84+
type Item = f32;
85+
86+
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
87+
let this = self.get_mut();
88+
let mut inner = std::pin::pin!(this.source.as_stream());
89+
90+
match inner.as_mut().poll_next(cx) {
91+
Poll::Ready(Some(sample)) => {
92+
this.loudness_buffer.push(sample);
93+
94+
if this.loudness_buffer.len() >= ANALYZE_CHUNK_SIZE {
95+
let _ = this.ebur128.add_frames_f32(&this.loudness_buffer);
96+
this.loudness_buffer.clear();
97+
98+
if let Ok(current_lufs) = this.ebur128.loudness_global() {
99+
if current_lufs.is_finite() && current_lufs < 0.0 {
100+
let gain_db = TARGET_LUFS - current_lufs;
101+
this.gain_linear = 10_f32.powf(gain_db as f32 / 20.0);
102+
}
103+
}
104+
}
105+
106+
let amplified = sample * this.gain_linear;
107+
let limited = this.limiter.process(amplified, this.true_peak_limit);
108+
109+
Poll::Ready(Some(limited))
110+
}
111+
Poll::Pending => Poll::Pending,
112+
Poll::Ready(None) => Poll::Ready(None),
113+
}
114+
}
115+
}
116+
117+
impl<S: kalosm_sound::AsyncSource + Unpin> kalosm_sound::AsyncSource for NormalizedSource<S> {
118+
fn sample_rate(&self) -> u32 {
119+
self.source.sample_rate()
120+
}
121+
122+
fn as_stream(&mut self) -> impl Stream<Item = f32> + '_ {
123+
Box::pin(self)
124+
}
125+
}
126+
127+
#[cfg(test)]
128+
mod tests {
129+
use super::*;
130+
use futures_util::StreamExt;
131+
use kalosm_sound::AsyncSource;
132+
133+
#[tokio::test]
134+
async fn test_normalize() {
135+
let audio = rodio::Decoder::new_wav(std::io::BufReader::new(
136+
std::fs::File::open(hypr_data::english_1::AUDIO_PATH).unwrap(),
137+
))
138+
.unwrap();
139+
140+
let sample_rate = audio.sample_rate();
141+
let mut normalized = audio.normalize();
142+
143+
let mut writer = {
144+
let spec = hound::WavSpec {
145+
channels: 1,
146+
sample_rate,
147+
bits_per_sample: 32,
148+
sample_format: hound::SampleFormat::Float,
149+
};
150+
let output_path = std::path::Path::new("./normalized_output.wav");
151+
hound::WavWriter::create(output_path, spec).unwrap()
152+
};
153+
154+
let mut stream = normalized.as_stream();
155+
while let Some(sample) = stream.next().await {
156+
writer.write_sample(sample).unwrap();
157+
}
158+
writer.finalize().unwrap();
159+
}
160+
}

crates/audio/src/speaker/macos.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
use std::sync::{Arc, Mutex};
2+
use std::task::{Poll, Waker};
3+
14
use anyhow::Result;
25
use futures_util::Stream;
36
use ringbuf::{
47
traits::{Consumer, Producer, Split},
5-
HeapRb,
8+
HeapCons, HeapProd, HeapRb,
69
};
7-
use std::sync::{Arc, Mutex};
8-
use std::task::{Poll, Waker};
910

1011
use ca::aggregate_device_keys as agg_keys;
1112
use cidre::{arc, av, cat, cf, core_audio as ca, ns, os};
@@ -24,7 +25,7 @@ struct WakerState {
2425
}
2526

2627
pub struct SpeakerStream {
27-
consumer: ringbuf::HeapCons<f32>,
28+
consumer: HeapCons<f32>,
2829
stream_desc: cat::AudioBasicStreamDesc,
2930
sample_rate_override: Option<u32>,
3031
_device: ca::hardware::StartedDevice<ca::AggregateDevice>,
@@ -48,7 +49,7 @@ impl SpeakerStream {
4849

4950
struct Ctx {
5051
format: arc::R<av::AudioFormat>,
51-
producer: ringbuf::HeapProd<f32>,
52+
producer: HeapProd<f32>,
5253
waker_state: Arc<Mutex<WakerState>>,
5354
}
5455

0 commit comments

Comments
 (0)