Skip to content

Add audio processing module #99

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions Runtime/Scripts/ApmReverseStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
using LiveKit.Internal;
using UnityEngine;

namespace LiveKit
{
/// <summary>
/// Captures and processes the reverse audio stream using an <see cref="AudioProcessingModule"/>.
/// </summary>
/// <remarks>
/// The reverse stream is captured from the scene's audio listener.
/// </remarks>
internal class ApmReverseStream
{
private readonly AudioBuffer _captureBuffer = new AudioBuffer();
private readonly AudioProcessingModule _apm; // APM is thread safe
private AudioFilter _audioFilter;

internal ApmReverseStream(AudioProcessingModule apm)
{
_apm = apm;
}

internal void Start()
{
var audioListener = GameObject.FindObjectOfType<AudioListener>();
if (audioListener == null)
{
Utils.Error("AudioListener not found in scene");
return;
}
_audioFilter = audioListener.gameObject.AddComponent<AudioFilter>();
_audioFilter.AudioRead += OnAudioRead;
}

internal void Stop()
{
if (_audioFilter != null)
Object.Destroy(_audioFilter);
}

private void OnAudioRead(float[] data, int channels, int sampleRate)
{
_captureBuffer.Write(data, (uint)channels, (uint)sampleRate);
while (true)
{
using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS);
if (frame == null) break;

_apm.ProcessReverseStream(frame);
}
}
Comment on lines +41 to +51
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this one too?

}
}
11 changes: 11 additions & 0 deletions Runtime/Scripts/ApmReverseStream.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Runtime/Scripts/AudioFrame.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
using LiveKit.Internal;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Tests")]

namespace LiveKit
{
Expand Down
135 changes: 135 additions & 0 deletions Runtime/Scripts/AudioProcessingModule.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
using LiveKit.Proto;
using LiveKit.Internal.FFIClients.Requests;
using LiveKit.Internal;
using System;
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Tests")]

namespace LiveKit
{
/// <summary>
/// Provides WebRTC audio processing capabilities including echo cancellation, noise suppression,
/// high-pass filtering, and gain control.
/// </summary>
public sealed class AudioProcessingModule
{
internal readonly FfiHandle Handle;

/// <summary>
/// Initializes an <see cref="AudioProcessingModule" /> instance with the specified audio processing features.
/// </summary>
/// <param name="echoCancellationEnabled">Whether to enable echo cancellation.</param>
/// <param name="noiseSuppressionEnabled">Whether to enable noise suppression.</param>
/// <param name="highPassFilterEnabled">Whether to enable high-pass filtering.</param>
/// <param name="gainControllerEnabled">Whether to enable gain control.</param>
public AudioProcessingModule(
bool echoCancellationEnabled,
bool noiseSuppressionEnabled,
bool highPassFilterEnabled,
bool gainControllerEnabled)
{
using var request = FFIBridge.Instance.NewRequest<NewApmRequest>();
var newApm = request.request;
newApm.EchoCancellerEnabled = echoCancellationEnabled;
newApm.NoiseSuppressionEnabled = noiseSuppressionEnabled;
newApm.HighPassFilterEnabled = highPassFilterEnabled;
newApm.GainControllerEnabled = gainControllerEnabled;

using var response = request.Send();
FfiResponse res = response;
Handle = FfiHandle.FromOwnedHandle(res.NewApm.Apm.Handle);
}

/// <summary>
/// Process the provided audio frame using the configured audio processing features.
/// </summary>
/// <param name="data">The audio frame to process.</param>
/// <remarks>
/// Important: Audio frames must be exactly 10 ms in duration.
///
/// The input audio frame is modified in-place (if applicable) by the underlying audio
/// processing module (e.g., echo cancellation, noise suppression, etc.).
/// </remarks>
public void ProcessStream(AudioFrame data)
{
using var request = FFIBridge.Instance.NewRequest<ApmProcessStreamRequest>();
var processStream = request.request;
processStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
processStream.DataPtr = (ulong)data.Data;
processStream.Size = (uint)data.Length;
processStream.SampleRate = data.SampleRate;
processStream.NumChannels = data.NumChannels;

using var response = request.Send();
FfiResponse res = response;
if (res.ApmProcessStream.HasError)
{
throw new Exception(res.ApmProcessStream.Error);
}
}

/// <summary>
/// Process the reverse audio frame (typically used for echo cancellation in a full-duplex setup).
/// </summary>
/// <param name="data">The audio frame to process.</param>
/// <remarks>
/// Important: Audio frames must be exactly 10 ms in duration.
///
/// In an echo cancellation scenario, this method is used to process the "far-end" audio
/// prior to mixing or feeding it into the echo canceller. Like <see cref="ProcessStream"/>, the
/// input audio frame is modified in-place by the underlying processing module.
/// </remarks>
public void ProcessReverseStream(AudioFrame data)
{
using var request = FFIBridge.Instance.NewRequest<ApmProcessReverseStreamRequest>();
var processReverseStream = request.request;
processReverseStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
processReverseStream.DataPtr = (ulong)data.Data;
processReverseStream.Size = (uint)data.Length;
processReverseStream.SampleRate = data.SampleRate;
processReverseStream.NumChannels = data.NumChannels;

using var response = request.Send();
FfiResponse res = response;
if (res.ApmProcessReverseStream.HasError)
{
throw new Exception(res.ApmProcessReverseStream.Error);
}
}

/// <summary>
/// This must be called if and only if echo processing is enabled.
/// </summary>
/// <remarks>
/// Sets the `delay` in milliseconds between receiving a far-end frame in <see cref="ProcessReverseStream"/>
/// and receiving the corresponding echo in a near-end frame in <see cref="ProcessStream"/>.
///
/// The delay can be calculated as: delay = (t_render - t_analyze) + (t_process - t_capture)
///
/// Where:
/// - t_analyze: Time when frame is passed to <see cref="ProcessReverseStream"/>
/// - t_render: Time when first sample of frame is rendered by audio hardware
/// - t_capture: Time when first sample of frame is captured by audio hardware
/// - t_process: Time when frame is passed to <see cref="ProcessStream"/>
/// </remarks>
public void SetStreamDelayMs(int delayMs)
{
using var request = FFIBridge.Instance.NewRequest<ApmSetStreamDelayRequest>();
var setStreamDelay = request.request;
setStreamDelay.ApmHandle = (ulong)Handle.DangerousGetHandle();
setStreamDelay.DelayMs = delayMs;

using var response = request.Send();
FfiResponse res = response;
if (res.ApmSetStreamDelay.HasError)
{
throw new Exception(res.ApmSetStreamDelay.Error);
}
}

/// <summary>
/// The required duration for audio frames being processed.
/// </summary>
public const uint FRAME_DURATION_MS = 10;
}
}
11 changes: 11 additions & 0 deletions Runtime/Scripts/AudioProcessingModule.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 94 additions & 0 deletions Runtime/Scripts/Internal/AudioBuffer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
using System;
using LiveKit.Internal;

namespace LiveKit
{
/// <summary>
/// A ring buffer for audio samples.
/// </summary>
internal class AudioBuffer
{
private readonly uint _bufferDurationMs;
private RingBuffer _buffer;
private uint _channels;
private uint _sampleRate;

/// <summary>
/// Initializes a new audio sample buffer for holding samples for a given duration.
/// </summary>
internal AudioBuffer(uint bufferDurationMs = 200)
{
_bufferDurationMs = bufferDurationMs;
}

/// <summary>
/// Write audio samples.
/// </summary>
/// <remarks>
/// The float data will be converted to short format before being written to the buffer.
/// If the number of channels or sample rate changes, the buffer will be recreated.
/// </remarks>
/// <param name="data">The audio samples to write.</param>
/// <param name="channels">The number of channels in the audio data.</param>
/// <param name="sampleRate">The sample rate of the audio data in Hz.</param>
internal void Write(float[] data, uint channels, uint sampleRate)
{
static short FloatToS16(float v)
{
v *= 32768f;
v = Math.Min(v, 32767f);
v = Math.Max(v, -32768f);
return (short)(v + Math.Sign(v) * 0.5f);
}

var s16Data = new short[data.Length];
for (int i = 0; i < data.Length; i++)
{
s16Data[i] = FloatToS16(data[i]);
}
Capture(s16Data, channels, sampleRate);
}

private void Capture(short[] data, uint channels, uint sampleRate)
{
if (_buffer == null || channels != _channels || sampleRate != _sampleRate)
{
var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f));
_buffer?.Dispose();
_buffer = new RingBuffer(size * sizeof(short));
_channels = channels;
_sampleRate = sampleRate;
}
unsafe
{
fixed (short* pData = data)
{
var byteData = new ReadOnlySpan<byte>(pData, data.Length * sizeof(short));
_buffer.Write(byteData);
}
}
}

/// <summary>
/// Reads a frame that is the length of the given duration.
/// </summary>
/// <param name="durationMs">The duration of the audio samples to read in milliseconds.</param>
/// <returns>An AudioFrame containing the read audio samples or if there is not enough samples, null.</returns>
internal AudioFrame ReadDuration(uint durationMs)
{
if (_buffer == null) return null;

var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f));
var requiredLength = samplesForDuration * _channels * sizeof(short);
if (_buffer.AvailableRead() < requiredLength) return null;

var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration);
unsafe
{
var frameData = new Span<byte>(frame.Data.ToPointer(), frame.Length);
_buffer.Read(frameData);
}
return frame;
}
}
}
11 changes: 11 additions & 0 deletions Runtime/Scripts/Internal/AudioBuffer.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Runtime/Scripts/Internal/FFIClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ internal sealed class FfiClient : IFFIClient
// participant events are not allowed in the fii protocol public event ParticipantEventReceivedDelegate ParticipantEventReceived;
public event VideoStreamEventReceivedDelegate? VideoStreamEventReceived;
public event AudioStreamEventReceivedDelegate? AudioStreamEventReceived;
public event CaptureAudioFrameReceivedDelegate? CaptureAudioFrameReceived;

public event PerformRpcReceivedDelegate? PerformRpcReceived;

Expand Down Expand Up @@ -287,6 +288,7 @@ static unsafe void FFICallback(UIntPtr data, UIntPtr size)
Instance.AudioStreamEventReceived?.Invoke(r.AudioStreamEvent!);
break;
case FfiEvent.MessageOneofCase.CaptureAudioFrame:
Instance.CaptureAudioFrameReceived?.Invoke(r.CaptureAudioFrame!);
break;
case FfiEvent.MessageOneofCase.PerformRpc:
Instance.PerformRpcReceived?.Invoke(r.PerformRpc!);
Expand Down
2 changes: 2 additions & 0 deletions Runtime/Scripts/Internal/FFIClients/FFIEvents.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ namespace LiveKit.Internal

internal delegate void SendTextReceivedDelegate(StreamSendTextCallback e);

internal delegate void CaptureAudioFrameReceivedDelegate(CaptureAudioFrameCallback e);

// Events
internal delegate void RoomEventReceivedDelegate(RoomEvent e);

Expand Down
13 changes: 13 additions & 0 deletions Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,19 @@ public static void Inject<T>(this FfiRequest ffiRequest, T request)
case E2eeRequest e2EeRequest:
ffiRequest.E2Ee = e2EeRequest;
break;
// Apm
case NewApmRequest newApmRequest:
ffiRequest.NewApm = newApmRequest;
break;
case ApmProcessStreamRequest apmProcessStreamRequest:
ffiRequest.ApmProcessStream = apmProcessStreamRequest;
break;
case ApmProcessReverseStreamRequest apmProcessReverseStreamRequest:
ffiRequest.ApmProcessReverseStream = apmProcessReverseStreamRequest;
break;
case ApmSetStreamDelayRequest apmSetStreamDelayRequest:
ffiRequest.ApmSetStreamDelay = apmSetStreamDelayRequest;
break;
// Rpc
case RegisterRpcMethodRequest registerRpcMethodRequest:
ffiRequest.RegisterRpcMethod = registerRpcMethodRequest;
Expand Down
Loading