Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions Runtime/Scripts/ApmReverseStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System.Threading;
using LiveKit.Internal;
using UnityEngine;

namespace LiveKit
{
/// <summary>
/// Captures and processes the reverse audio stream using an <see cref="AudioProcessingModule"/>.
/// </summary>
/// <remarks>
/// The reverse stream is captured from the scene's audio listener.
/// </remarks>
internal class ApmReverseStream
{
private readonly AudioBuffer _captureBuffer = new AudioBuffer();
private readonly AudioProcessingModule _apm; // APM is thread safe
private Thread _thread;
private AudioFilter _audioFilter;

internal ApmReverseStream(AudioProcessingModule apm)
{
_apm = apm;
}

internal void Start()
{
var audioListener = GameObject.FindObjectOfType<AudioListener>();
if (audioListener == null)
{
Utils.Error("AudioListener not found in scene");
return;
}
_audioFilter = audioListener.gameObject.AddComponent<AudioFilter>();
_audioFilter.AudioRead += OnAudioRead;

_thread = new Thread(ProcessReverseStream);
_thread.Start();
}

internal void Stop()
{
_thread?.Abort();
if (_audioFilter != null)
Object.Destroy(_audioFilter);
}

private void ProcessReverseStream()
{
while (true)
{
Thread.Sleep(Constants.TASK_DELAY);
Copy link
Member

@theomonnom theomonnom Apr 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we're likely going to have a skew here. (this will impact the AEC a lot in long room duration)
Is there a way to directly process the frames as we receive them?

using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS);
if (frame == null) continue;
_apm.ProcessReverseStream(frame);
}
}

private void OnAudioRead(float[] data, int channels, int sampleRate)
{
_captureBuffer.Write(data, (uint)channels, (uint)sampleRate);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could directly use ProcessReverseStream here?

}
}
}
11 changes: 11 additions & 0 deletions Runtime/Scripts/ApmReverseStream.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions Runtime/Scripts/AudioFrame.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
using LiveKit.Internal;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Tests")]

namespace LiveKit
{
Expand All @@ -23,6 +25,7 @@ public class AudioFrame : IDisposable

public AudioFrameBufferInfo Info => _info;

private NativeArray<byte> _allocatedData; // Only used if the frame's data is allocated by Unity
private IntPtr _dataPtr;
public IntPtr Data => _dataPtr;

Expand All @@ -44,8 +47,8 @@ internal AudioFrame(uint sampleRate, uint numChannels, uint samplesPerChannel) {
_samplesPerChannel = samplesPerChannel;
unsafe
{
var data = new NativeArray<byte>(Length, Allocator.Persistent);
_dataPtr = (IntPtr)NativeArrayUnsafeUtility.GetUnsafePtr(data);
_allocatedData = new NativeArray<byte>(Length, Allocator.Persistent);
_dataPtr = (IntPtr)NativeArrayUnsafeUtility.GetUnsafePtr(_allocatedData);
}
}
~AudioFrame()
Expand All @@ -63,6 +66,10 @@ protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (_allocatedData.IsCreated)
{
_allocatedData.Dispose();
}
_disposed = true;
}
}
Expand Down
135 changes: 135 additions & 0 deletions Runtime/Scripts/AudioProcessingModule.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
using LiveKit.Proto;
using LiveKit.Internal.FFIClients.Requests;
using LiveKit.Internal;
using System;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Tests")]

namespace LiveKit
{
/// <summary>
/// Provides WebRTC audio processing capabilities including echo cancellation, noise suppression,
/// high-pass filtering, and gain control.
/// </summary>
public sealed class AudioProcessingModule
{
internal readonly FfiHandle Handle;

/// <summary>
/// Initializes an <see cref="AudioProcessingModule" /> instance with the specified audio processing features.
/// </summary>
/// <param name="echoCancellationEnabled">Whether to enable echo cancellation.</param>
/// <param name="noiseSuppressionEnabled">Whether to enable noise suppression.</param>
/// <param name="highPassFilterEnabled">Whether to enable high-pass filtering.</param>
/// <param name="gainControllerEnabled">Whether to enable gain control.</param>
public AudioProcessingModule(
bool echoCancellationEnabled,
bool noiseSuppressionEnabled,
bool highPassFilterEnabled,
bool gainControllerEnabled)
{
using var request = FFIBridge.Instance.NewRequest<NewApmRequest>();
var newApm = request.request;
newApm.EchoCancellerEnabled = echoCancellationEnabled;
newApm.NoiseSuppressionEnabled = noiseSuppressionEnabled;
newApm.HighPassFilterEnabled = highPassFilterEnabled;
newApm.GainControllerEnabled = gainControllerEnabled;

using var response = request.Send();
FfiResponse res = response;
Handle = FfiHandle.FromOwnedHandle(res.NewApm.Apm.Handle);
}

/// <summary>
/// Process the provided audio frame using the configured audio processing features.
/// </summary>
/// <param name="data">The audio frame to process.</param>
/// <remarks>
/// Important: Audio frames must be exactly 10 ms in duration.
///
/// The input audio frame is modified in-place (if applicable) by the underlying audio
/// processing module (e.g., echo cancellation, noise suppression, etc.).
/// </remarks>
public void ProcessStream(AudioFrame data)
{
using var request = FFIBridge.Instance.NewRequest<ApmProcessStreamRequest>();
var processStream = request.request;
processStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
processStream.DataPtr = (ulong)data.Data;
processStream.Size = (uint)data.Length;
processStream.SampleRate = data.SampleRate;
processStream.NumChannels = data.NumChannels;

using var response = request.Send();
FfiResponse res = response;
if (res.ApmProcessStream.HasError)
{
throw new Exception(res.ApmProcessStream.Error);
}
}

/// <summary>
/// Process the reverse audio frame (typically used for echo cancellation in a full-duplex setup).
/// </summary>
/// <param name="data">The audio frame to process.</param>
/// <remarks>
/// Important: Audio frames must be exactly 10 ms in duration.
///
/// In an echo cancellation scenario, this method is used to process the "far-end" audio
/// prior to mixing or feeding it into the echo canceller. Like <see cref="ProcessStream"/>, the
/// input audio frame is modified in-place by the underlying processing module.
/// </remarks>
public void ProcessReverseStream(AudioFrame data)
{
using var request = FFIBridge.Instance.NewRequest<ApmProcessReverseStreamRequest>();
var processReverseStream = request.request;
processReverseStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
processReverseStream.DataPtr = (ulong)data.Data;
processReverseStream.Size = (uint)data.Length;
processReverseStream.SampleRate = data.SampleRate;
processReverseStream.NumChannels = data.NumChannels;

using var response = request.Send();
FfiResponse res = response;
if (res.ApmProcessReverseStream.HasError)
{
throw new Exception(res.ApmProcessReverseStream.Error);
}
}

/// <summary>
/// This must be called if and only if echo processing is enabled.
/// </summary>
/// <remarks>
/// Sets the `delay` in milliseconds between receiving a far-end frame in <see cref="ProcessReverseStream"/>
/// and receiving the corresponding echo in a near-end frame in <see cref="ProcessStream"/>.
///
/// The delay can be calculated as: delay = (t_render - t_analyze) + (t_process - t_capture)
///
/// Where:
/// - t_analyze: Time when frame is passed to <see cref="ProcessReverseStream"/>
/// - t_render: Time when first sample of frame is rendered by audio hardware
/// - t_capture: Time when first sample of frame is captured by audio hardware
/// - t_process: Time when frame is passed to <see cref="ProcessStream"/>
/// </remarks>
public void SetStreamDelayMs(int delayMs)
{
using var request = FFIBridge.Instance.NewRequest<ApmSetStreamDelayRequest>();
var setStreamDelay = request.request;
setStreamDelay.ApmHandle = (ulong)Handle.DangerousGetHandle();
setStreamDelay.DelayMs = delayMs;

using var response = request.Send();
FfiResponse res = response;
if (res.ApmSetStreamDelay.HasError)
{
throw new Exception(res.ApmSetStreamDelay.Error);
}
}

/// <summary>
/// The required duration for audio frames being processed.
/// </summary>
public const uint FRAME_DURATION_MS = 10;
}
}
11 changes: 11 additions & 0 deletions Runtime/Scripts/AudioProcessingModule.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Runtime/Scripts/BasicAudioSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public override void Play()

public override void Stop()
{
base.Stop();
_audioFilter.AudioRead -= OnAudioRead;
Source.Stop();
}
Expand Down
101 changes: 101 additions & 0 deletions Runtime/Scripts/Internal/AudioBuffer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
using System;
using LiveKit.Internal;

namespace LiveKit
{
/// <summary>
/// A thread-safe buffer for buffering audio samples.
/// </summary>
internal class AudioBuffer
{
private readonly uint _bufferDurationMs;
private RingBuffer _buffer;
private uint _channels;
private uint _sampleRate;
private object _lock = new object();

/// <summary>
/// Initializes a new audio sample buffer for holding samples for a given duration.
/// </summary>
internal AudioBuffer(uint bufferDurationMs = 200)
{
_bufferDurationMs = bufferDurationMs;
}

/// <summary>
/// Write audio samples.
/// </summary>
/// <remarks>
/// The float data will be converted to short format before being written to the buffer.
/// If the number of channels or sample rate changes, the buffer will be recreated.
/// </remarks>
/// <param name="data">The audio samples to write.</param>
/// <param name="channels">The number of channels in the audio data.</param>
/// <param name="sampleRate">The sample rate of the audio data in Hz.</param>
internal void Write(float[] data, uint channels, uint sampleRate)
{
static short FloatToS16(float v)
{
v *= 32768f;
v = Math.Min(v, 32767f);
v = Math.Max(v, -32768f);
return (short)(v + Math.Sign(v) * 0.5f);
}

var s16Data = new short[data.Length];
for (int i = 0; i < data.Length; i++)
{
s16Data[i] = FloatToS16(data[i]);
}
Capture(s16Data, channels, sampleRate);
}

private void Capture(short[] data, uint channels, uint sampleRate)
{
lock (_lock)
{
if (_buffer == null || channels != _channels || sampleRate != _sampleRate)
{
var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f));
_buffer?.Dispose();
_buffer = new RingBuffer(size * sizeof(short));
_channels = channels;
_sampleRate = sampleRate;
}
unsafe
{
fixed (short* pData = data)
{
var byteData = new ReadOnlySpan<byte>(pData, data.Length * sizeof(short));
_buffer.Write(byteData);
}
}
}
}

/// <summary>
/// Reads a frame that is the length of the given duration.
/// </summary>
/// <param name="durationMs">The duration of the audio samples to read in milliseconds.</param>
/// <returns>An AudioFrame containing the read audio samples or if there is not enough samples, null.</returns>
internal AudioFrame ReadDuration(uint durationMs)
{
lock (_lock)
{
if (_buffer == null) return null;

var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f));
var requiredLength = samplesForDuration * _channels * sizeof(short);
if (_buffer.AvailableRead() < requiredLength) return null;

var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration);
unsafe
{
var frameData = new Span<byte>(frame.Data.ToPointer(), frame.Length);
_buffer.Read(frameData);
}
return frame;
}
}
}
}
11 changes: 11 additions & 0 deletions Runtime/Scripts/Internal/AudioBuffer.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading