livekit · ladvoc · Apr 2, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 5, 2025
diff --git a/Runtime/Scripts/ApmReverseStream.cs b/Runtime/Scripts/ApmReverseStream.cs
@@ -0,0 +1,63 @@
+using System.Threading;
+using LiveKit.Internal;
+using UnityEngine;
+
+namespace LiveKit
+{
+    /// <summary>
+    /// Captures and processes the reverse audio stream using an <see cref="AudioProcessingModule"/>.
+    /// </summary>
+    /// <remarks>
+    /// The reverse stream is captured from the scene's audio listener.
+    /// </remarks>
+    internal class ApmReverseStream
+    {
+        private readonly AudioBuffer _captureBuffer = new AudioBuffer();
+        private readonly AudioProcessingModule _apm; // APM is thread safe
+        private Thread _thread;
+        private AudioFilter _audioFilter;
+
+        internal ApmReverseStream(AudioProcessingModule apm)
+        {
+            _apm = apm;
+        }
+
+        internal void Start()
+        {
+            var audioListener = GameObject.FindObjectOfType<AudioListener>();
+            if (audioListener == null)
+            {
+                Utils.Error("AudioListener not found in scene");
+                return;
+            }
+            _audioFilter = audioListener.gameObject.AddComponent<AudioFilter>();
+            _audioFilter.AudioRead += OnAudioRead;
+
+            _thread = new Thread(ProcessReverseStream);
+            _thread.Start();
+        }
+
+        internal void Stop()
+        {
+            _thread?.Abort();
+            if (_audioFilter != null)
+                Object.Destroy(_audioFilter);
+        }
+
+        private void ProcessReverseStream()
+        {
+            while (true)
+            {
+                Thread.Sleep(Constants.TASK_DELAY);
+                using var frame = _captureBuffer.ReadDuration(AudioProcessingModule.FRAME_DURATION_MS);
+                if (frame == null) continue;
+                _apm.ProcessReverseStream(frame);
+            }
+        }
+
+        private void OnAudioRead(float[] data, int channels, int sampleRate)
+        {
+            _captureBuffer.Write(data, (uint)channels, (uint)sampleRate);
+        }
+    }
+}
diff --git a/Runtime/Scripts/ApmReverseStream.cs.meta b/Runtime/Scripts/ApmReverseStream.cs.meta
diff --git a/Runtime/Scripts/AudioFrame.cs b/Runtime/Scripts/AudioFrame.cs
@@ -3,6 +3,8 @@
 using LiveKit.Internal;
 using Unity.Collections;
 using Unity.Collections.LowLevel.Unsafe;
+using System.Runtime.CompilerServices;
+[assembly: InternalsVisibleTo("Tests")]
 
 namespace LiveKit
 {
@@ -23,6 +25,7 @@ public class AudioFrame : IDisposable
 
         public AudioFrameBufferInfo Info => _info;
 
+        private NativeArray<byte> _allocatedData; // Only used if the frame's data is allocated by Unity
         private IntPtr _dataPtr;
         public IntPtr Data => _dataPtr;
 
@@ -44,8 +47,8 @@ internal AudioFrame(uint sampleRate, uint numChannels, uint samplesPerChannel) {
             _samplesPerChannel = samplesPerChannel;
             unsafe
             {
-                var data = new NativeArray<byte>(Length, Allocator.Persistent);
-                _dataPtr = (IntPtr)NativeArrayUnsafeUtility.GetUnsafePtr(data);
+                _allocatedData = new NativeArray<byte>(Length, Allocator.Persistent);
+                _dataPtr = (IntPtr)NativeArrayUnsafeUtility.GetUnsafePtr(_allocatedData);
             }
         }
         ~AudioFrame()
@@ -63,6 +66,10 @@ protected virtual void Dispose(bool disposing)
         {
             if (!_disposed)
             {
+                if (_allocatedData.IsCreated)
+                {
+                    _allocatedData.Dispose();
+                }
                 _disposed = true;
             }
         }

diff --git a/Runtime/Scripts/AudioProcessingModule.cs b/Runtime/Scripts/AudioProcessingModule.cs
@@ -0,0 +1,135 @@
+using LiveKit.Proto;
+using LiveKit.Internal.FFIClients.Requests;
+using LiveKit.Internal;
+using System;
+using System.Runtime.CompilerServices;
+[assembly: InternalsVisibleTo("Tests")]
+
+namespace LiveKit
+{
+    /// <summary>
+    /// Provides WebRTC audio processing capabilities including echo cancellation, noise suppression,
+    /// high-pass filtering, and gain control.
+    /// </summary>
+    public sealed class AudioProcessingModule
+    {
+        internal readonly FfiHandle Handle;
+
+        /// <summary>
+        /// Initializes an <see cref="AudioProcessingModule" /> instance with the specified audio processing features.
+        /// </summary>
+        /// <param name="echoCancellationEnabled">Whether to enable echo cancellation.</param>
+        /// <param name="noiseSuppressionEnabled">Whether to enable noise suppression.</param>
+        /// <param name="highPassFilterEnabled">Whether to enable high-pass filtering.</param>
+        /// <param name="gainControllerEnabled">Whether to enable gain control.</param>
+        public AudioProcessingModule(
+            bool echoCancellationEnabled,
+            bool noiseSuppressionEnabled,
+            bool highPassFilterEnabled,
+            bool gainControllerEnabled)
+        {
+            using var request = FFIBridge.Instance.NewRequest<NewApmRequest>();
+            var newApm = request.request;
+            newApm.EchoCancellerEnabled = echoCancellationEnabled;
+            newApm.NoiseSuppressionEnabled = noiseSuppressionEnabled;
+            newApm.HighPassFilterEnabled = highPassFilterEnabled;
+            newApm.GainControllerEnabled = gainControllerEnabled;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            Handle = FfiHandle.FromOwnedHandle(res.NewApm.Apm.Handle);
+        }
+
+        /// <summary>
+        /// Process the provided audio frame using the configured audio processing features.
+        /// </summary>
+        /// <param name="data">The audio frame to process.</param>
+        /// <remarks>
+        /// Important: Audio frames must be exactly 10 ms in duration.
+        ///
+        /// The input audio frame is modified in-place (if applicable) by the underlying audio
+        /// processing module (e.g., echo cancellation, noise suppression, etc.).
+        /// </remarks>
+        public void ProcessStream(AudioFrame data)
+        {
+            using var request = FFIBridge.Instance.NewRequest<ApmProcessStreamRequest>();
+            var processStream = request.request;
+            processStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
+            processStream.DataPtr = (ulong)data.Data;
+            processStream.Size = (uint)data.Length;
+            processStream.SampleRate = data.SampleRate;
+            processStream.NumChannels = data.NumChannels;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            if (res.ApmProcessStream.HasError)
+            {
+                throw new Exception(res.ApmProcessStream.Error);
+            }
+        }
+
+        /// <summary>
+        /// Process the reverse audio frame (typically used for echo cancellation in a full-duplex setup).
+        /// </summary>
+        /// <param name="data">The audio frame to process.</param>
+        /// <remarks>
+        /// Important: Audio frames must be exactly 10 ms in duration.
+        ///
+        /// In an echo cancellation scenario, this method is used to process the "far-end" audio
+        /// prior to mixing or feeding it into the echo canceller. Like <see cref="ProcessStream"/>, the
+        /// input audio frame is modified in-place by the underlying processing module.
+        /// </remarks>
+        public void ProcessReverseStream(AudioFrame data)
+        {
+            using var request = FFIBridge.Instance.NewRequest<ApmProcessReverseStreamRequest>();
+            var processReverseStream = request.request;
+            processReverseStream.ApmHandle = (ulong)Handle.DangerousGetHandle();
+            processReverseStream.DataPtr = (ulong)data.Data;
+            processReverseStream.Size = (uint)data.Length;
+            processReverseStream.SampleRate = data.SampleRate;
+            processReverseStream.NumChannels = data.NumChannels;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            if (res.ApmProcessReverseStream.HasError)
+            {
+                throw new Exception(res.ApmProcessReverseStream.Error);
+            }
+        }
+
+        /// <summary>
+        /// This must be called if and only if echo processing is enabled.
+        /// </summary>
+        /// <remarks>
+        /// Sets the `delay` in milliseconds between receiving a far-end frame in <see cref="ProcessReverseStream"/>
+        /// and receiving the corresponding echo in a near-end frame in <see cref="ProcessStream"/>.
+        ///
+        /// The delay can be calculated as: delay = (t_render - t_analyze) + (t_process - t_capture)
+        ///
+        /// Where:
+        /// - t_analyze: Time when frame is passed to <see cref="ProcessReverseStream"/>
+        /// - t_render: Time when first sample of frame is rendered by audio hardware
+        /// - t_capture: Time when first sample of frame is captured by audio hardware
+        /// - t_process: Time when frame is passed to <see cref="ProcessStream"/>
+        /// </remarks>
+        public void SetStreamDelayMs(int delayMs)
+        {
+            using var request = FFIBridge.Instance.NewRequest<ApmSetStreamDelayRequest>();
+            var setStreamDelay = request.request;
+            setStreamDelay.ApmHandle = (ulong)Handle.DangerousGetHandle();
+            setStreamDelay.DelayMs = delayMs;
+
+            using var response = request.Send();
+            FfiResponse res = response;
+            if (res.ApmSetStreamDelay.HasError)
+            {
+                throw new Exception(res.ApmSetStreamDelay.Error);
+            }
+        }
+
+        /// <summary>
+        /// The required duration for audio frames being processed.
+        /// </summary>
+        public const uint FRAME_DURATION_MS = 10;
+    }
+}
diff --git a/Runtime/Scripts/AudioProcessingModule.cs.meta b/Runtime/Scripts/AudioProcessingModule.cs.meta
diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs
@@ -29,6 +29,7 @@ public override void Play()
 
         public override void Stop()
         {
+            base.Stop();
             _audioFilter.AudioRead -= OnAudioRead;
             Source.Stop();
         }

diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs b/Runtime/Scripts/Internal/AudioBuffer.cs
@@ -0,0 +1,101 @@
+using System;
+using LiveKit.Internal;
+
+namespace LiveKit
+{
+    /// <summary>
+    /// A thread-safe buffer for buffering audio samples.
+    /// </summary>
+    internal class AudioBuffer
+    {
+        private readonly uint _bufferDurationMs;
+        private RingBuffer _buffer;
+        private uint _channels;
+        private uint _sampleRate;
+        private object _lock = new object();
+
+        /// <summary>
+        /// Initializes a new audio sample buffer for holding samples for a given duration.
+        /// </summary>
+        internal AudioBuffer(uint bufferDurationMs = 200)
+        {
+            _bufferDurationMs = bufferDurationMs;
+        }
+
+        /// <summary>
+        /// Write audio samples.
+        /// </summary>
+        /// <remarks>
+        /// The float data will be converted to short format before being written to the buffer.
+        /// If the number of channels or sample rate changes, the buffer will be recreated.
+        /// </remarks>
+        /// <param name="data">The audio samples to write.</param>
+        /// <param name="channels">The number of channels in the audio data.</param>
+        /// <param name="sampleRate">The sample rate of the audio data in Hz.</param>
+        internal void Write(float[] data, uint channels, uint sampleRate)
+        {
+            static short FloatToS16(float v)
+            {
+                v *= 32768f;
+                v = Math.Min(v, 32767f);
+                v = Math.Max(v, -32768f);
+                return (short)(v + Math.Sign(v) * 0.5f);
+            }
+
+            var s16Data = new short[data.Length];
+            for (int i = 0; i < data.Length; i++)
+            {
+                s16Data[i] = FloatToS16(data[i]);
+            }
+            Capture(s16Data, channels, sampleRate);
+        }
+
+        private void Capture(short[] data, uint channels, uint sampleRate)
+        {
+            lock (_lock)
+            {
+                if (_buffer == null || channels != _channels || sampleRate != _sampleRate)
+                {
+                    var size = (int)(channels * sampleRate * (_bufferDurationMs / 1000f));
+                    _buffer?.Dispose();
+                    _buffer = new RingBuffer(size * sizeof(short));
+                    _channels = channels;
+                    _sampleRate = sampleRate;
+                }
+                unsafe
+                {
+                    fixed (short* pData = data)
+                    {
+                        var byteData = new ReadOnlySpan<byte>(pData, data.Length * sizeof(short));
+                        _buffer.Write(byteData);
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Reads a frame that is the length of the given duration.
+        /// </summary>
+        /// <param name="durationMs">The duration of the audio samples to read in milliseconds.</param>
+        /// <returns>An AudioFrame containing the read audio samples or if there is not enough samples, null.</returns>
+        internal AudioFrame ReadDuration(uint durationMs)
+        {
+            lock (_lock)
+            {
+                if (_buffer == null) return null;
+
+                var samplesForDuration = (uint)(_sampleRate * (durationMs / 1000f));
+                var requiredLength = samplesForDuration * _channels * sizeof(short);
+                if (_buffer.AvailableRead() < requiredLength) return null;
+
+                var frame = new AudioFrame(_sampleRate, _channels, samplesForDuration);
+                unsafe
+                {
+                    var frameData = new Span<byte>(frame.Data.ToPointer(), frame.Length);
+                    _buffer.Read(frameData);
+                }
+                return frame;
+            }
+        }
+    }
+}
diff --git a/Runtime/Scripts/Internal/AudioBuffer.cs.meta b/Runtime/Scripts/Internal/AudioBuffer.cs.meta