Skip to content

Commit c6c049f

Browse files
com.rest.elevenlabs 3.5.1 (#121)
- Fixed generated clip playback for non-streaming clips - Updated usages of Task.Delay with WebGL friendly Awaiters.DelayAsync - Updated TextToSpeechDemo - Updated Deps - Updated Unit Tests
1 parent 7226398 commit c6c049f

File tree

6 files changed

+66
-43
lines changed

6 files changed

+66
-43
lines changed

Runtime/Common/GeneratedClip.cs

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -57,62 +57,61 @@ internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData, in
5757
public Guid TextHash { get; private set; }
5858

5959
[SerializeField]
60-
private AudioClip audioClip;
60+
private string cachedPath;
6161

6262
[Preserve]
63-
public AudioClip AudioClip
63+
public string CachedPath => cachedPath;
64+
65+
public ReadOnlyMemory<byte> ClipData { get; }
66+
67+
public float[] ClipSamples
6468
{
6569
get
6670
{
67-
if (audioClip == null && !ClipData.IsEmpty)
71+
if (clipSamples != null)
6872
{
69-
var samples = ClipSamples;
70-
71-
if (samples is { Length: > 0 })
72-
{
73-
audioClip = AudioClip.Create(Id, samples.Length, 1, SampleRate, false);
74-
audioClip.SetData(samples, 0);
75-
}
73+
return clipSamples;
7674
}
7775

78-
if (audioClip == null)
76+
if (ClipData.IsEmpty)
7977
{
80-
Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
78+
return Array.Empty<float>();
8179
}
8280

83-
return audioClip;
81+
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), inputSampleRate: SampleRate, outputSampleRate: AudioSettings.outputSampleRate);
82+
return clipSamples;
83+
8484
}
8585
}
86+
private float[] clipSamples;
87+
88+
public int SampleRate { get; }
8689

8790
[SerializeField]
88-
private string cachedPath;
91+
private AudioClip audioClip;
8992

9093
[Preserve]
91-
public string CachedPath => cachedPath;
92-
93-
public ReadOnlyMemory<byte> ClipData { get; }
94-
95-
private float[] clipSamples;
96-
97-
public float[] ClipSamples
94+
public AudioClip AudioClip
9895
{
9996
get
10097
{
101-
if (!ClipData.IsEmpty)
98+
if (audioClip == null &&
99+
ClipSamples is { Length: > 0 })
102100
{
103-
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), PCMFormatSize.SixteenBit, SampleRate, AudioSettings.outputSampleRate);
101+
audioClip = AudioClip.Create(Id, ClipSamples.Length, 1, AudioSettings.outputSampleRate, false);
102+
audioClip.SetData(ClipSamples, 0);
104103
}
105-
else if (audioClip != null)
104+
105+
if (audioClip == null)
106106
{
107-
clipSamples = new float[audioClip.samples];
108-
audioClip.GetData(clipSamples, 0);
107+
Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
109108
}
110109

111-
return clipSamples;
110+
return audioClip;
112111
}
113112
}
114113

115-
public int SampleRate { get; }
114+
public float Length => ClipSamples.Length / (float)AudioSettings.outputSampleRate;
116115

117116
public void OnBeforeSerialize() => textHash = TextHash.ToString();
118117

@@ -130,6 +129,12 @@ var path when path.EndsWith(".mp3") => AudioType.MPEG,
130129
_ => AudioType.UNKNOWN
131130
};
132131

132+
if (audioType == AudioType.UNKNOWN)
133+
{
134+
Debug.LogWarning($"Unable to load cached audio clip at {cachedPath}");
135+
return null;
136+
}
137+
133138
return await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, cancellationToken: cancellationToken);
134139
}
135140
}

Runtime/Dubbing/DubbingEndpoint.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
using System.Threading;
1111
using System.Threading.Tasks;
1212
using UnityEngine;
13+
using Utilities.Async;
1314
using Utilities.WebRequestRest;
1415
using Debug = UnityEngine.Debug;
1516

@@ -148,7 +149,7 @@ private async Task<DubbingProjectMetadata> WaitForDubbingCompletionAsync(Dubbing
148149
Debug.Log($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds.");
149150
}
150151

151-
await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false);
152+
await Awaiters.DelayAsync(pollingInterval, cancellationToken).ConfigureAwait(true);
152153
}
153154
else
154155
{

Runtime/TextToSpeech/TextToSpeechEndpoint.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Func
180180
var audioData = request.WithTimestamps ? accumulatedPCMData!.ToArray() : response.Data;
181181
var cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);
182182

183-
return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
183+
return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), frequency, cachedPath)
184184
{
185185
TimestampedTranscriptCharacters = accumulatedTranscriptData?.ToArray() ?? Array.Empty<TimestampedTranscriptCharacter>()
186186
};

Samples~/TextToSpeech/TextToSpeechDemo.cs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
using ElevenLabs.TextToSpeech;
55
using ElevenLabs.Voices;
66
using System;
7+
using System.Diagnostics;
78
using System.Linq;
89
using System.Threading;
910
using System.Threading.Tasks;
1011
using UnityEngine;
1112
using Utilities.Async;
1213
using Utilities.Audio;
14+
using Debug = UnityEngine.Debug;
1315

1416
namespace ElevenLabs.Demo
1517
{
@@ -63,18 +65,29 @@ private async void Start()
6365
}
6466

6567
var request = new TextToSpeechRequest(voice, message, model: Model.FlashV2_5, outputFormat: OutputFormat.PCM_24000);
68+
var stopwatch = Stopwatch.StartNew();
6669
var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
6770
{
6871
await streamAudioSource.BufferCallbackAsync(partialClip.ClipSamples);
6972
}, cancellationToken: destroyCancellationToken);
70-
await new WaitUntil(() => streamAudioSource.IsEmpty || destroyCancellationToken.IsCancellationRequested);
71-
destroyCancellationToken.ThrowIfCancellationRequested();
72-
((AudioSource)streamAudioSource).clip = voiceClip.AudioClip;
73+
var elapsedTime = (float)stopwatch.Elapsed.TotalSeconds;
74+
var playbackTime = voiceClip.Length - elapsedTime;
75+
76+
if (debug)
77+
{
78+
Debug.Log($"Elapsed time: {elapsedTime:F} seconds");
79+
Debug.Log($"voice clip length: {voiceClip.Length:F} seconds");
80+
Debug.Log($"playback time: {playbackTime:F} seconds");
81+
}
82+
83+
await Awaiters.DelayAsync(TimeSpan.FromSeconds(playbackTime + 1f), destroyCancellationToken);
7384

7485
if (debug)
7586
{
7687
Debug.Log($"Full clip: {voiceClip.Id}");
7788
}
89+
90+
((AudioSource)streamAudioSource).PlayOneShot(voiceClip);
7891
}
7992
catch (Exception e)
8093
{

Tests/Test_Fixture_04_TextToSpeechEndpoint.cs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public async Task Test_01_TextToSpeech()
2121
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
2222
Assert.NotNull(voiceClip);
2323
Assert.NotNull(voiceClip.AudioClip);
24+
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
2425
Debug.Log(voiceClip.Id);
2526
}
2627

@@ -36,9 +37,10 @@ public async Task Test_02_StreamTextToSpeech()
3637
Assert.NotNull(partialClips);
3738
Assert.IsNotEmpty(partialClips);
3839
Assert.NotNull(voiceClip);
39-
Assert.IsNotNull(voiceClip.AudioClip);
4040
Debug.Log(voiceClip.Id);
4141
Debug.Log(voiceClip.CachedPath);
42+
Assert.IsNotNull(voiceClip.AudioClip);
43+
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
4244
}
4345

4446
[Test]
@@ -50,9 +52,10 @@ public async Task Test_03_TextToSpeech_Transcription()
5052
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", withTimestamps: true);
5153
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
5254
Assert.NotNull(voiceClip);
53-
Assert.NotNull(voiceClip.AudioClip);
5455
Debug.Log(voiceClip.Id);
5556
Debug.Log(voiceClip.CachedPath);
57+
Assert.NotNull(voiceClip.AudioClip);
58+
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
5659
Assert.NotNull(voiceClip.TimestampedTranscriptCharacters);
5760
Assert.IsNotEmpty(voiceClip.TimestampedTranscriptCharacters);
5861
Debug.Log("| Character | Start Time | End Time |");
@@ -88,9 +91,10 @@ public async Task Test_04_StreamTextToSpeech_Transcription()
8891
Assert.NotNull(partialClips);
8992
Assert.IsNotEmpty(partialClips);
9093
Assert.NotNull(voiceClip);
91-
Assert.IsNotNull(voiceClip.AudioClip);
9294
Debug.Log(voiceClip.Id);
9395
Debug.Log(voiceClip.CachedPath);
96+
Assert.IsNotNull(voiceClip.AudioClip);
97+
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
9498
Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
9599
}
96100

@@ -111,10 +115,11 @@ public async Task Test_05_LanguageEnforced_TextToSpeech()
111115
languageCode: "cs");
112116
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
113117
Assert.NotNull(voiceClip);
114-
Assert.NotNull(voiceClip.AudioClip);
115-
Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
116118
Debug.Log(voiceClip.Id);
117119
Debug.Log(voiceClip.CachedPath);
120+
Assert.NotNull(voiceClip.AudioClip);
121+
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
122+
Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
118123
}
119124
}
120125
}

package.json

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"displayName": "ElevenLabs",
44
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
55
"keywords": [],
6-
"version": "3.5.0",
6+
"version": "3.5.1",
77
"unity": "2021.3",
88
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
99
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
@@ -17,10 +17,9 @@
1717
"url": "https://github.com/StephenHodgson"
1818
},
1919
"dependencies": {
20-
"com.utilities.rest": "3.3.1",
21-
"com.utilities.audio": "2.2.1",
22-
"com.utilities.encoder.ogg": "4.2.0",
23-
"com.utilities.encoder.wav": "2.2.0"
20+
"com.utilities.rest": "3.3.2",
21+
"com.utilities.encoder.ogg": "4.2.1",
22+
"com.utilities.encoder.wav": "2.2.1"
2423
},
2524
"samples": [
2625
{

0 commit comments

Comments
 (0)