-
Notifications
You must be signed in to change notification settings - Fork 27
Add audio processing module #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ladvoc
wants to merge
16
commits into
main
Choose a base branch
from
ladvoc/add-apm
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
32acf70
Implement apm
ladvoc cb23c17
Expose internals for testing
ladvoc aad73bc
Test APM
ladvoc 0892526
Add set stream delay method
ladvoc 7c6f099
Fix memory leak
ladvoc 6294861
Create audio buffer
ladvoc 624935f
Merge branch 'ladvoc/fix-audio-leak' into ladvoc/add-apm
ladvoc 5569acc
Integrate audio buffer
ladvoc 51c596b
Integrate apm
ladvoc fc0e83f
Call superclass stop method
ladvoc e09c6cc
Merge branch 'ladvoc/audio-source-stop' into ladvoc/add-apm
ladvoc bbad26a
Set stream delay
ladvoc 87f723a
Merge remote-tracking branch 'origin/main' into ladvoc/add-apm
ladvoc 5f71c75
Process & capture on audio thread
ladvoc af3d8ec
Optimize clear
ladvoc 89437aa
Remove locking from AudioBuffer
ladvoc File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
using LiveKit.Internal; | ||
using UnityEngine; | ||
|
||
namespace LiveKit | ||
{ | ||
/// <summary> | ||
/// Captures and processes the reverse audio stream using an <see cref="AudioProcessingModule"/>. | ||
/// </summary> | ||
/// <remarks> | ||
/// The reverse stream is captured from the scene's audio listener. | ||
/// </remarks> | ||
internal class ApmReverseStream | ||
{ | ||
private readonly AudioBuffer _captureBuffer = new AudioBuffer(); | ||
private readonly AudioProcessingModule _apm; // APM is thread safe | ||
private AudioFilter _audioFilter; | ||
|
||
internal ApmReverseStream(AudioProcessingModule apm) | ||
{ | ||
_apm = apm; | ||
} | ||
|
||
internal void Start() | ||
{ | ||
var audioListener = GameObject.FindObjectOfType<AudioListener>(); | ||
if (audioListener == null) | ||
{ | ||
Utils.Error("AudioListener not found in scene"); | ||
return; | ||
} | ||
_audioFilter = audioListener.gameObject.AddComponent<AudioFilter>(); | ||
_audioFilter.AudioRead += OnAudioRead; | ||
} | ||
|
||
internal void Stop() | ||
{ | ||
if (_audioFilter != null) | ||
Object.Destroy(_audioFilter); | ||
} | ||
|
||
private void OnAudioRead(float[] data, int channels, int sampleRate) | ||
{ | ||
_captureBuffer.Write(data, (uint)channels, (uint)sampleRate); | ||
while (true) | ||
{ | ||
using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS); | ||
if (frame == null) break; | ||
|
||
_apm.ProcessReverseStream(frame); | ||
} | ||
} | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
using LiveKit.Proto; | ||
using LiveKit.Internal.FFIClients.Requests; | ||
using LiveKit.Internal; | ||
using System; | ||
using System.Runtime.CompilerServices; | ||
[assembly: InternalsVisibleTo("Tests")] | ||
|
||
namespace LiveKit | ||
{ | ||
/// <summary> | ||
/// Provides WebRTC audio processing capabilities including echo cancellation, noise suppression, | ||
/// high-pass filtering, and gain control. | ||
/// </summary> | ||
public sealed class AudioProcessingModule | ||
{ | ||
internal readonly FfiHandle Handle; | ||
|
||
/// <summary> | ||
/// Initializes an <see cref="AudioProcessingModule" /> instance with the specified audio processing features. | ||
/// </summary> | ||
/// <param name="echoCancellationEnabled">Whether to enable echo cancellation.</param> | ||
/// <param name="noiseSuppressionEnabled">Whether to enable noise suppression.</param> | ||
/// <param name="highPassFilterEnabled">Whether to enable high-pass filtering.</param> | ||
/// <param name="gainControllerEnabled">Whether to enable gain control.</param> | ||
public AudioProcessingModule( | ||
bool echoCancellationEnabled, | ||
bool noiseSuppressionEnabled, | ||
bool highPassFilterEnabled, | ||
bool gainControllerEnabled) | ||
{ | ||
using var request = FFIBridge.Instance.NewRequest<NewApmRequest>(); | ||
var newApm = request.request; | ||
newApm.EchoCancellerEnabled = echoCancellationEnabled; | ||
newApm.NoiseSuppressionEnabled = noiseSuppressionEnabled; | ||
newApm.HighPassFilterEnabled = highPassFilterEnabled; | ||
newApm.GainControllerEnabled = gainControllerEnabled; | ||
|
||
using var response = request.Send(); | ||
FfiResponse res = response; | ||
Handle = FfiHandle.FromOwnedHandle(res.NewApm.Apm.Handle); | ||
} | ||
|
||
/// <summary> | ||
/// Process the provided audio frame using the configured audio processing features. | ||
/// </summary> | ||
/// <param name="data">The audio frame to process.</param> | ||
/// <remarks> | ||
/// Important: Audio frames must be exactly 10 ms in duration. | ||
/// | ||
/// The input audio frame is modified in-place (if applicable) by the underlying audio | ||
/// processing module (e.g., echo cancellation, noise suppression, etc.). | ||
/// </remarks> | ||
public void ProcessStream(AudioFrame data) | ||
{ | ||
using var request = FFIBridge.Instance.NewRequest<ApmProcessStreamRequest>(); | ||
var processStream = request.request; | ||
processStream.ApmHandle = (ulong)Handle.DangerousGetHandle(); | ||
processStream.DataPtr = (ulong)data.Data; | ||
processStream.Size = (uint)data.Length; | ||
processStream.SampleRate = data.SampleRate; | ||
processStream.NumChannels = data.NumChannels; | ||
|
||
using var response = request.Send(); | ||
FfiResponse res = response; | ||
if (res.ApmProcessStream.HasError) | ||
{ | ||
throw new Exception(res.ApmProcessStream.Error); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Process the reverse audio frame (typically used for echo cancellation in a full-duplex setup). | ||
/// </summary> | ||
/// <param name="data">The audio frame to process.</param> | ||
/// <remarks> | ||
/// Important: Audio frames must be exactly 10 ms in duration. | ||
/// | ||
/// In an echo cancellation scenario, this method is used to process the "far-end" audio | ||
/// prior to mixing or feeding it into the echo canceller. Like <see cref="ProcessStream"/>, the | ||
/// input audio frame is modified in-place by the underlying processing module. | ||
/// </remarks> | ||
public void ProcessReverseStream(AudioFrame data) | ||
{ | ||
using var request = FFIBridge.Instance.NewRequest<ApmProcessReverseStreamRequest>(); | ||
var processReverseStream = request.request; | ||
processReverseStream.ApmHandle = (ulong)Handle.DangerousGetHandle(); | ||
processReverseStream.DataPtr = (ulong)data.Data; | ||
processReverseStream.Size = (uint)data.Length; | ||
processReverseStream.SampleRate = data.SampleRate; | ||
processReverseStream.NumChannels = data.NumChannels; | ||
|
||
using var response = request.Send(); | ||
FfiResponse res = response; | ||
if (res.ApmProcessReverseStream.HasError) | ||
{ | ||
throw new Exception(res.ApmProcessReverseStream.Error); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// This must be called if and only if echo processing is enabled. | ||
/// </summary> | ||
/// <remarks> | ||
/// Sets the `delay` in milliseconds between receiving a far-end frame in <see cref="ProcessReverseStream"/> | ||
/// and receiving the corresponding echo in a near-end frame in <see cref="ProcessStream"/>. | ||
/// | ||
/// The delay can be calculated as: delay = (t_render - t_analyze) + (t_process - t_capture) | ||
/// | ||
/// Where: | ||
/// - t_analyze: Time when frame is passed to <see cref="ProcessReverseStream"/> | ||
/// - t_render: Time when first sample of frame is rendered by audio hardware | ||
/// - t_capture: Time when first sample of frame is captured by audio hardware | ||
/// - t_process: Time when frame is passed to <see cref="ProcessStream"/> | ||
/// </remarks> | ||
public void SetStreamDelayMs(int delayMs) | ||
{ | ||
using var request = FFIBridge.Instance.NewRequest<ApmSetStreamDelayRequest>(); | ||
var setStreamDelay = request.request; | ||
setStreamDelay.ApmHandle = (ulong)Handle.DangerousGetHandle(); | ||
setStreamDelay.DelayMs = delayMs; | ||
|
||
using var response = request.Send(); | ||
FfiResponse res = response; | ||
if (res.ApmSetStreamDelay.HasError) | ||
{ | ||
throw new Exception(res.ApmSetStreamDelay.Error); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// The required duration for audio frames being processed. | ||
/// </summary> | ||
public const uint FRAME_DURATION_MS = 10; | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
using System; | ||
using LiveKit.Internal; | ||
|
||
namespace LiveKit | ||
{ | ||
/// <summary> | ||
/// A ring buffer for audio samples. | ||
/// </summary> | ||
internal class AudioBuffer | ||
{ | ||
private readonly uint _bufferDurationMs; | ||
private RingBuffer _buffer; | ||
private uint _channels; | ||
private uint _sampleRate; | ||
|
||
/// <summary> | ||
/// Initializes a new audio sample buffer for holding samples for a given duration. | ||
/// </summary> | ||
internal AudioBuffer(uint bufferDurationMs = 200) | ||
{ | ||
_bufferDurationMs = bufferDurationMs; | ||
} | ||
|
||
/// <summary> | ||
/// Write audio samples. | ||
/// </summary> | ||
/// <remarks> | ||
/// The float data will be converted to short format before being written to the buffer. | ||
/// If the number of channels or sample rate changes, the buffer will be recreated. | ||
/// </remarks> | ||
/// <param name="data">The audio samples to write.</param> | ||
/// <param name="channels">The number of channels in the audio data.</param> | ||
/// <param name="sampleRate">The sample rate of the audio data in Hz.</param> | ||
internal void Write(float[] data, uint channels, uint sampleRate) | ||
{ | ||
static short FloatToS16(float v) | ||
{ | ||
v *= 32768f; | ||
v = Math.Min(v, 32767f); | ||
v = Math.Max(v, -32768f); | ||
return (short)(v + Math.Sign(v) * 0.5f); | ||
} | ||
|
||
var s16Data = new short[data.Length]; | ||
for (int i = 0; i < data.Length; i++) | ||
{ | ||
s16Data[i] = FloatToS16(data[i]); | ||
} | ||
Capture(s16Data, channels, sampleRate); | ||
} | ||
|
||
private void Capture(short[] data, uint channels, uint sampleRate) | ||
{ | ||
if (_buffer == null || channels != _channels || sampleRate != _sampleRate) | ||
{ | ||
var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f)); | ||
_buffer?.Dispose(); | ||
_buffer = new RingBuffer(size * sizeof(short)); | ||
_channels = channels; | ||
_sampleRate = sampleRate; | ||
} | ||
unsafe | ||
{ | ||
fixed (short* pData = data) | ||
{ | ||
var byteData = new ReadOnlySpan<byte>(pData, data.Length * sizeof(short)); | ||
_buffer.Write(byteData); | ||
} | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Reads a frame that is the length of the given duration. | ||
/// </summary> | ||
/// <param name="durationMs">The duration of the audio samples to read in milliseconds.</param> | ||
/// <returns>An AudioFrame containing the read audio samples or if there is not enough samples, null.</returns> | ||
internal AudioFrame ReadDuration(uint durationMs) | ||
{ | ||
if (_buffer == null) return null; | ||
|
||
var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f)); | ||
var requiredLength = samplesForDuration * _channels * sizeof(short); | ||
if (_buffer.AvailableRead() < requiredLength) return null; | ||
|
||
var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration); | ||
unsafe | ||
{ | ||
var frameData = new Span<byte>(frame.Data.ToPointer(), frame.Length); | ||
_buffer.Read(frameData); | ||
} | ||
return frame; | ||
} | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this one too?