livekit · ladvoc · Apr 2, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 5, 2025
diff --git a/Runtime/Scripts/ApmReverseStream.cs b/Runtime/Scripts/ApmReverseStream.cs
@@ -0,0 +1,53 @@
+using LiveKit.Internal;
+using UnityEngine;
+
+namespace LiveKit
+{
+    /// <summary>
+    /// Captures and processes the reverse audio stream using an <see cref="AudioProcessingModule"/>.
+    /// </summary>
+    /// <remarks>
+    /// The reverse stream is captured from the scene's audio listener.
+    /// </remarks>
+    internal class ApmReverseStream
+    {
+        private readonly AudioBuffer _captureBuffer = new AudioBuffer();
+        private readonly AudioProcessingModule _apm; // APM is thread safe
+        private AudioFilter _audioFilter;
+
+        internal ApmReverseStream(AudioProcessingModule apm)
+        {
+            _apm = apm;
+        }
+
+        internal void Start()
+        {
+            var audioListener = GameObject.FindObjectOfType<AudioListener>();
+            if (audioListener == null)
+            {
+                Utils.Error("AudioListener not found in scene");
+                return;
+            }
+            _audioFilter = audioListener.gameObject.AddComponent<AudioFilter>();
+            _audioFilter.AudioRead += OnAudioRead;
+        }
+
+        internal void Stop()
+        {
+            if (_audioFilter != null)
+                Object.Destroy(_audioFilter);
+        }
+
+        private void OnAudioRead(float[] data, int channels, int sampleRate)
+        {
+            _captureBuffer.Write(data, (uint)channels, (uint)sampleRate);
+            while (true)
+            {
+                using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS);
+                if (frame == null) break;
+
+                _apm.ProcessReverseStream(frame);
+            }
+        }
+    }
+}
diff --git a/Runtime/Scripts/ApmReverseStream.cs.meta b/Runtime/Scripts/ApmReverseStream.cs.meta
diff --git a/Runtime/Scripts/AudioFrame.cs b/Runtime/Scripts/AudioFrame.cs
@@ -3,6 +3,8 @@
 using LiveKit.Internal;
 using Unity.Collections;
 using Unity.Collections.LowLevel.Unsafe;
+using System.Runtime.CompilerServices;
+[assembly: InternalsVisibleTo("Tests")]
 
 namespace LiveKit
 {

diff --git a/Runtime/Scripts/AudioProcessingModule.cs b/Runtime/Scripts/AudioProcessingModule.cs
@@ -0,0 +1,135 @@
+using LiveKit.Proto;
+using LiveKit.Internal.FFIClients.Requests;
+using LiveKit.Internal;
+using System;
+using System.Runtime.CompilerServices;
+[assembly: InternalsVisibleTo("Tests")]
+
+namespace LiveKit
+{
+    /// <summary>
+    /// Provides WebRTC audio processing capabilities including echo cancellation, noise suppression,
+    /// high-pass filtering, and gain control.
+    /// </summary>
+    public sealed class AudioProcessingModule
+    {
+        internal readonly FfiHandle Handle;
+
+        /// <summary>
+        /// Initializes an <see cref="AudioProcessingModule" /> instance with the specified audio processing features.
+        /// </summary>
+        /// <param name="echoCancellationEnabled">Whether to enable echo cancellation.</param>
+        /// <param name="noiseSuppressionEnabled">Whether to enable noise suppression.</param>
+        /// <param name="highPassFilterEnabled">Whether to enable high-pass filtering.</param>
+        /// <param name="gainControllerEnabled">Whether to enable gain control.</param>
+        public AudioProcessingModule(
+            bool echoCancellationEnabled,
+            bool noiseSuppressionEnabled,
+            bool highPassFilterEnabled,
+            bool gainControllerEnabled)
+        {
+            using var request = FFIBridge.Instance.NewRequest<NewApmRequest>();
+            var newApm = request.request;
+            newApm.EchoCancellerEnabled = echoCancellationEnabled;
+            newApm.NoiseSuppressionEnabled = noiseSuppressionEnabled;
+            newApm.HighPassFilterEnabled = highPassFilterEnabled;
+            newApm.GainControllerEnabled = gainControllerEnabled;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            Handle = FfiHandle.FromOwnedHandle(res.NewApm.Apm.Handle);
+        }
+
+        /// <summary>
+        /// Process the provided audio frame using the configured audio processing features.
+        /// </summary>
+        /// <param name="data">The audio frame to process.</param>
+        /// <remarks>
+        /// Important: Audio frames must be exactly 10 ms in duration.
+        ///
+        /// The input audio frame is modified in-place (if applicable) by the underlying audio
+        /// processing module (e.g., echo cancellation, noise suppression, etc.).
+        /// </remarks>
+        public void ProcessStream(AudioFrame data)
+        {
+            using var request = FFIBridge.Instance.NewRequest<ApmProcessStreamRequest>();
+            var processStream = request.request;
+            processStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
+            processStream.DataPtr = (ulong)data.Data;
+            processStream.Size = (uint)data.Length;
+            processStream.SampleRate = data.SampleRate;
+            processStream.NumChannels = data.NumChannels;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            if (res.ApmProcessStream.HasError)
+            {
+                throw new Exception(res.ApmProcessStream.Error);
+            }
+        }
+
+        /// <summary>
+        /// Process the reverse audio frame (typically used for echo cancellation in a full-duplex setup).
+        /// </summary>
+        /// <param name="data">The audio frame to process.</param>
+        /// <remarks>
+        /// Important: Audio frames must be exactly 10 ms in duration.
+        ///
+        /// In an echo cancellation scenario, this method is used to process the "far-end" audio
+        /// prior to mixing or feeding it into the echo canceller. Like <see cref="ProcessStream"/>, the
+        /// input audio frame is modified in-place by the underlying processing module.
+        /// </remarks>
+        public void ProcessReverseStream(AudioFrame data)
+        {
+            using var request = FFIBridge.Instance.NewRequest<ApmProcessReverseStreamRequest>();
+            var processReverseStream = request.request;
+            processReverseStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
+            processReverseStream.DataPtr = (ulong)data.Data;
+            processReverseStream.Size = (uint)data.Length;
+            processReverseStream.SampleRate = data.SampleRate;
+            processReverseStream.NumChannels = data.NumChannels;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            if (res.ApmProcessReverseStream.HasError)
+            {
+                throw new Exception(res.ApmProcessReverseStream.Error);
+            }
+        }
+
+        /// <summary>
+        /// This must be called if and only if echo processing is enabled.
+        /// </summary>
+        /// <remarks>
+        /// Sets the `delay` in milliseconds between receiving a far-end frame in <see cref="ProcessReverseStream"/>
+        /// and receiving the corresponding echo in a near-end frame in <see cref="ProcessStream"/>.
+        ///
+        /// The delay can be calculated as: delay = (t_render - t_analyze) + (t_process - t_capture)
+        ///
+        /// Where:
+        /// - t_analyze: Time when frame is passed to <see cref="ProcessReverseStream"/>
+        /// - t_render: Time when first sample of frame is rendered by audio hardware
+        /// - t_capture: Time when first sample of frame is captured by audio hardware
+        /// - t_process: Time when frame is passed to <see cref="ProcessStream"/>
+        /// </remarks>
+        public void SetStreamDelayMs(int delayMs)
+        {
+            using var request = FFIBridge.Instance.NewRequest<ApmSetStreamDelayRequest>();
+            var setStreamDelay = request.request;
+            setStreamDelay.ApmHandle = (ulong)Handle.DangerousGetHandle();
+            setStreamDelay.DelayMs = delayMs;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            if (res.ApmSetStreamDelay.HasError)
+            {
+                throw new Exception(res.ApmSetStreamDelay.Error);
+            }
+        }
+
+        /// <summary>
+        /// The required duration for audio frames being processed.
+        /// </summary>
+        public const uint FRAME_DURATION_MS = 10;
+    }
+}
diff --git a/Runtime/Scripts/AudioProcessingModule.cs.meta b/Runtime/Scripts/AudioProcessingModule.cs.meta
diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs b/Runtime/Scripts/Internal/AudioBuffer.cs
@@ -0,0 +1,94 @@
+using System;
+using LiveKit.Internal;
+
+namespace LiveKit
+{
+    /// <summary>
+    /// A ring buffer for audio samples.
+    /// </summary>
+    internal class AudioBuffer
+    {
+        private readonly uint _bufferDurationMs;
+        private RingBuffer _buffer;
+        private uint _channels;
+        private uint _sampleRate;
+
+        /// <summary>
+        /// Initializes a new audio sample buffer for holding samples for a given duration.
+        /// </summary>
+        internal AudioBuffer(uint bufferDurationMs = 200)
+        {
+            _bufferDurationMs = bufferDurationMs;
+        }
+
+        /// <summary>
+        /// Write audio samples.
+        /// </summary>
+        /// <remarks>
+        /// The float data will be converted to short format before being written to the buffer.
+        /// If the number of channels or sample rate changes, the buffer will be recreated.
+        /// </remarks>
+        /// <param name="data">The audio samples to write.</param>
+        /// <param name="channels">The number of channels in the audio data.</param>
+        /// <param name="sampleRate">The sample rate of the audio data in Hz.</param>
+        internal void Write(float[] data, uint channels, uint sampleRate)
+        {
+            static short FloatToS16(float v)
+            {
+                v *= 32768f;
+                v = Math.Min(v, 32767f);
+                v = Math.Max(v, -32768f);
+                return (short)(v + Math.Sign(v) * 0.5f);
+            }
+
+            var s16Data = new short[data.Length];
+            for (int i = 0; i < data.Length; i++)
+            {
+                s16Data[i] = FloatToS16(data[i]);
+            }
+            Capture(s16Data, channels, sampleRate);
+        }
+
+        private void Capture(short[] data, uint channels, uint sampleRate)
+        {
+            if (_buffer == null || channels != _channels || sampleRate != _sampleRate)
+            {
+                var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f));
+                _buffer?.Dispose();
+                _buffer = new RingBuffer(size * sizeof(short));
+                _channels = channels;
+                _sampleRate = sampleRate;
+            }
+            unsafe
+            {
+                fixed (short* pData = data)
+                {
+                    var byteData = new ReadOnlySpan<byte>(pData, data.Length * sizeof(short));
+                    _buffer.Write(byteData);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Reads a frame that is the length of the given duration.
+        /// </summary>
+        /// <param name="durationMs">The duration of the audio samples to read in milliseconds.</param>
+        /// <returns>An AudioFrame containing the read audio samples or if there is not enough samples, null.</returns>
+        internal AudioFrame ReadDuration(uint durationMs)
+        {
+            if (_buffer == null) return null;
+
+            var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f));
+            var requiredLength = samplesForDuration * _channels * sizeof(short);
+            if (_buffer.AvailableRead() < requiredLength) return null;
+
+            var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration);
+            unsafe
+            {
+                var frameData = new Span<byte>(frame.Data.ToPointer(), frame.Length);
+                _buffer.Read(frameData);
+            }
+            return frame;
+        }
+    }
+}
diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs.meta b/Runtime/Scripts/Internal/AudioBuffer.cs.meta
diff --git a/Runtime/Scripts/Internal/FFIClient.cs b/Runtime/Scripts/Internal/FFIClient.cs
@@ -46,6 +46,7 @@ internal sealed class FfiClient : IFFIClient
         // participant events are not allowed in the fii protocol public event ParticipantEventReceivedDelegate ParticipantEventReceived;
         public event VideoStreamEventReceivedDelegate? VideoStreamEventReceived;
         public event AudioStreamEventReceivedDelegate? AudioStreamEventReceived;
+        public event CaptureAudioFrameReceivedDelegate? CaptureAudioFrameReceived;
 
         public event PerformRpcReceivedDelegate? PerformRpcReceived;
 
@@ -287,6 +288,7 @@ static unsafe void FFICallback(UIntPtr data, UIntPtr size)
                         Instance.AudioStreamEventReceived?.Invoke(r.AudioStreamEvent!);
                         break;
                     case FfiEvent.MessageOneofCase.CaptureAudioFrame:
+                         Instance.CaptureAudioFrameReceived?.Invoke(r.CaptureAudioFrame!);
                         break;
                     case FfiEvent.MessageOneofCase.PerformRpc:
                         Instance.PerformRpcReceived?.Invoke(r.PerformRpc!);

diff --git a/Runtime/Scripts/Internal/FFIClients/FFIEvents.cs b/Runtime/Scripts/Internal/FFIClients/FFIEvents.cs
@@ -48,6 +48,8 @@ namespace LiveKit.Internal
 
     internal delegate void SendTextReceivedDelegate(StreamSendTextCallback e);
 
+    internal delegate void CaptureAudioFrameReceivedDelegate(CaptureAudioFrameCallback e);
+
     // Events
     internal delegate void RoomEventReceivedDelegate(RoomEvent e);
 

diff --git a/Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs b/Runtime/Scripts/Internal/FFIClients/FfiRequestExtensions.cs
@@ -90,6 +90,19 @@ public static void Inject<T>(this FfiRequest ffiRequest, T request)
                 case E2eeRequest e2EeRequest:
                     ffiRequest.E2Ee = e2EeRequest;
                     break;
+                // Apm
+                case NewApmRequest newApmRequest:
+                    ffiRequest.NewApm = newApmRequest;
+                    break;
+                case ApmProcessStreamRequest apmProcessStreamRequest:
+                    ffiRequest.ApmProcessStream = apmProcessStreamRequest;
+                    break;
+                case ApmProcessReverseStreamRequest apmProcessReverseStreamRequest:
+                    ffiRequest.ApmProcessReverseStream = apmProcessReverseStreamRequest;
+                    break;
+                case ApmSetStreamDelayRequest apmSetStreamDelayRequest:
+                    ffiRequest.ApmSetStreamDelay = apmSetStreamDelayRequest;
+                    break;
                 // Rpc
                 case RegisterRpcMethodRequest registerRpcMethodRequest:
                     ffiRequest.RegisterRpcMethod = registerRpcMethodRequest;