Skip to content

Commit b480299

Browse files
com.openai.unity 8.4.4 (#313)
- refactored AudioEndpoint speech requests - added AudioEndpoint.GetSpeechAsync - deprecated AudioEndpoint.CreateSpeechAsync - deprecated AudioEndpoint.CreateSpeechStreamAsync - added SpeechClip response - added Realtime.ResponseAudioResponse.AudioSamples - updated all sample scenes with new OnAudioFilterRead examples for better playback - updated unit tests
1 parent 2e54704 commit b480299

File tree

15 files changed

+755
-669
lines changed

15 files changed

+755
-669
lines changed

OpenAI/Packages/com.openai.unity/Documentation~/README.md

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ The recommended installation method is though the unity package manager and [Ope
6161
- [List Models](#list-models)
6262
- [Retrieve Models](#retrieve-model)
6363
- [Delete Fine Tuned Model](#delete-fine-tuned-model)
64-
- [Realtime](#realtime) :new:
65-
- [Create Realtime Session](#create-realtime-session) :new:
66-
- [Client Events](#client-events) :new:
67-
- [Sending Client Events](#sending-client-events) :new:
68-
- [Server Events](#server-events) :new:
69-
- [Receiving Server Events](#receiving-server-events) :new:
64+
- [Realtime](#realtime)
65+
- [Create Realtime Session](#create-realtime-session)
66+
- [Client Events](#client-events)
67+
- [Sending Client Events](#sending-client-events)
68+
- [Server Events](#server-events)
69+
- [Receiving Server Events](#receiving-server-events)
7070
- [Assistants](#assistants)
7171
- [List Assistants](#list-assistants)
7272
- [Create Assistant](#create-assistant)
@@ -118,7 +118,7 @@ The recommended installation method is though the unity package manager and [Ope
118118
- [Streaming](#chat-streaming)
119119
- [Tools](#chat-tools)
120120
- [Vision](#chat-vision)
121-
- [Audio](#chat-audio) :new:
121+
- [Audio](#chat-audio)
122122
- [Structured Outputs](#chat-structured-outputs)
123123
- [Json Mode](#chat-json-mode)
124124
- [Audio](#audio)
@@ -1555,6 +1555,7 @@ Debug.Log($"{result.FirstChoice.Message.Role}: {result.FirstChoice} | Finish Rea
15551555
#### [Chat Audio](https://platform.openai.com/docs/guides/audio)
15561556

15571557
```csharp
1558+
var api = new OpenAIClient();
15581559
var messages = new List<Message>
15591560
{
15601561
new Message(Role.System, "You are a helpful assistant."),
@@ -1662,9 +1663,9 @@ Generates audio from the input text.
16621663
```csharp
16631664
var api = new OpenAIClient();
16641665
var request = new SpeechRequest("Hello world!");
1665-
var (path, clip) = await api.AudioEndpoint.CreateSpeechAsync(request);
1666-
audioSource.PlayOneShot(clip);
1667-
Debug.Log(path);
1666+
var speechClip = await api.AudioEndpoint.CreateSpeechAsync(request);
1667+
audioSource.PlayOneShot(speechClip);
1668+
Debug.Log(speechClip);
16681669
```
16691670

16701671
##### [Stream Speech]
@@ -1673,11 +1674,17 @@ Generate streamed audio from the input text.
16731674

16741675
```csharp
16751676
var api = new OpenAIClient();
1676-
var request = new SpeechRequest("Hello world!");
1677-
var (path, clip) = await api.AudioEndpoint.CreateSpeechStreamAsync(request, partialClip => audioSource.PlayOneShot(partialClip));
1678-
Debug.Log(path);
1677+
var request = new SpeechRequest("Hello world!", responseFormat: SpeechResponseFormat.PCM);
1678+
var speechClip = await api.AudioEndpoint.CreateSpeechStreamAsync(request, partialClip =>
1679+
{
1680+
audioSource.PlayOneShot(partialClip);
1681+
});
1682+
Debug.Log(speechClip);
16791683
```
16801684

1685+
> [!NOTE]
1686+
> Checkout any of the demo scenes for best practices on how to handle playback with `OnAudioFilterRead`.
1687+
16811688
#### [Create Transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
16821689

16831690
Transcribes audio into the input language.

OpenAI/Packages/com.openai.unity/Runtime/Audio/AudioEndpoint.cs

Lines changed: 24 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
using System.Threading;
88
using System.Threading.Tasks;
99
using UnityEngine;
10-
using UnityEngine.Networking;
1110
using Utilities.WebRequestRest;
1211

1312
namespace OpenAI.Audio
@@ -27,25 +26,29 @@ internal AudioEndpoint(OpenAIClient client) : base(client) { }
2726

2827
private static readonly object mutex = new();
2928

30-
/// <summary>
31-
/// Generates audio from the input text.
32-
/// </summary>
33-
/// <param name="request"><see cref="SpeechRequest"/>.</param>
34-
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
35-
/// <returns><see cref="AudioClip"/> and the cached path.</returns>
36-
[Function("Generates audio from the input text.")]
29+
[Obsolete("use GetSpeechAsync")]
3730
public async Task<Tuple<string, AudioClip>> CreateSpeechAsync(SpeechRequest request, CancellationToken cancellationToken = default)
3831
=> await CreateSpeechStreamAsync(request, null, cancellationToken);
3932

33+
[Obsolete("use GetSpeechAsync")]
34+
public async Task<Tuple<string, AudioClip>> CreateSpeechStreamAsync(SpeechRequest request, Action<AudioClip> partialClipCallback, CancellationToken cancellationToken = default)
35+
{
36+
var result = await GetSpeechAsync(request, speechClip =>
37+
{
38+
partialClipCallback.Invoke(speechClip.AudioClip);
39+
}, cancellationToken);
40+
return Tuple.Create(result.CachePath, result.AudioClip);
41+
}
42+
4043
/// <summary>
41-
/// Generates streaming audio from the input text.
44+
/// Generates audio from the input text.
4245
/// </summary>
4346
/// <param name="request"><see cref="SpeechRequest"/>.</param>
44-
/// <param name="partialClipCallback">Optional, partial <see cref="AudioClip"/> callback used to stream audio.</param>
47+
/// <param name="partialClipCallback">Optional, partial <see cref="SpeechClip"/> callback used to stream audio.</param>
4548
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
46-
/// <returns><see cref="AudioClip"/> and the cached path.</returns>
47-
[Function("Generates streaming audio from the input text.")]
48-
public async Task<Tuple<string, AudioClip>> CreateSpeechStreamAsync(SpeechRequest request, Action<AudioClip> partialClipCallback, CancellationToken cancellationToken = default)
49+
/// <returns><see cref="SpeechClip"/></returns>
50+
[Function("Generates audio from the input text.")]
51+
public async Task<SpeechClip> GetSpeechAsync(SpeechRequest request, Action<SpeechClip> partialClipCallback = null, CancellationToken cancellationToken = default)
4952
{
5053
if (partialClipCallback != null && request.ResponseFormat != SpeechResponseFormat.PCM)
5154
{
@@ -70,52 +73,16 @@ public async Task<Tuple<string, AudioClip>> CreateSpeechStreamAsync(SpeechReques
7073

7174
Rest.TryGetDownloadCacheItem(clipName, out var cachedPath);
7275

73-
if (request.ResponseFormat == SpeechResponseFormat.PCM)
74-
{
75-
var part = 0;
76-
var response = await Rest.PostAsync(
77-
GetUrl("/speech"),
78-
payload,
79-
StreamCallback,
80-
eventChunkSize: 8192,
81-
new RestParameters(client.DefaultRequestHeaders),
82-
cancellationToken);
83-
response.Validate(EnableDebug);
84-
var samples = Utilities.Audio.PCMEncoder.Decode(response.Data);
85-
await File.WriteAllBytesAsync(cachedPath, response.Data, cancellationToken).ConfigureAwait(true);
86-
return new Tuple<string, AudioClip>(cachedPath, AudioClip.Create(clipName, samples.Length, 1, 24000, false));
87-
88-
void StreamCallback(Response partialResponse)
89-
{
90-
var chunk = Utilities.Audio.PCMEncoder.Decode(partialResponse.Data);
91-
var partialClip = AudioClip.Create($"{clipName}_{++part}", chunk.Length, 1, 24000, false);
92-
93-
if (!partialClip.SetData(chunk, 0))
94-
{
95-
Debug.LogError("Failed to set pcm data to partial clip.");
96-
return;
97-
}
98-
99-
partialClipCallback?.Invoke(partialClip);
100-
}
101-
}
76+
var part = 0;
77+
var pcmResponse = await Rest.PostAsync(GetUrl("/speech"), payload, StreamCallback, 8192, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
78+
pcmResponse.Validate(EnableDebug);
79+
await File.WriteAllBytesAsync(cachedPath, pcmResponse.Data, cancellationToken).ConfigureAwait(true);
80+
return new SpeechClip(clipName, cachedPath, new ReadOnlyMemory<byte>(pcmResponse.Data));
10281

103-
var audioFormat = request.ResponseFormat switch
82+
void StreamCallback(Response partialResponse)
10483
{
105-
SpeechResponseFormat.MP3 => AudioType.MPEG,
106-
SpeechResponseFormat.WAV => AudioType.WAV,
107-
_ => throw new NotSupportedException(request.ResponseFormat.ToString())
108-
};
109-
110-
var clip = await Rest.DownloadAudioClipAsync(
111-
GetUrl("/speech"),
112-
audioFormat,
113-
UnityWebRequest.kHttpVerbPOST,
114-
clipName,
115-
payload,
116-
parameters: new RestParameters(client.DefaultRequestHeaders, debug: EnableDebug),
117-
cancellationToken: cancellationToken);
118-
return new Tuple<string, AudioClip>(cachedPath, clip);
84+
partialClipCallback?.Invoke(new SpeechClip($"{clipName}_{++part}", null, partialResponse.Data));
85+
}
11986
}
12087

12188
/// <summary>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Licensed under the MIT License. See LICENSE in the project root for license information.
2+
3+
using System;
4+
using UnityEngine;
5+
using UnityEngine.Scripting;
6+
using Utilities.Audio;
7+
8+
namespace OpenAI.Audio
9+
{
10+
[Preserve]
11+
public sealed class SpeechClip
12+
{
13+
[Preserve]
14+
internal SpeechClip(string name, string cachePath, ReadOnlyMemory<byte> audioData, int sampleRate = 24000)
15+
{
16+
Name = name;
17+
CachePath = cachePath;
18+
AudioData = audioData;
19+
SampleRate = sampleRate;
20+
}
21+
22+
[Preserve]
23+
public string Name { get; }
24+
25+
[Preserve]
26+
public string CachePath { get; }
27+
28+
[Preserve]
29+
public ReadOnlyMemory<byte> AudioData { get; }
30+
31+
[Preserve]
32+
public float[] AudioSamples
33+
=> PCMEncoder.Resample(PCMEncoder.Decode(AudioData.ToArray()), SampleRate, 44100);
34+
35+
[Preserve]
36+
public int SampleRate { get; }
37+
38+
[Preserve]
39+
public AudioClip AudioClip
40+
{
41+
get
42+
{
43+
var samples = AudioSamples;
44+
var clip = AudioClip.Create(Name, samples.Length, 1, 44100, false);
45+
clip.SetData(samples, 0);
46+
return clip;
47+
}
48+
}
49+
50+
[Preserve]
51+
public static implicit operator AudioClip(SpeechClip clip) => clip?.AudioClip;
52+
53+
[Preserve]
54+
public static implicit operator string(SpeechClip clip) => clip?.CachePath;
55+
}
56+
}

OpenAI/Packages/com.openai.unity/Runtime/Audio/SpeechClip.cs.meta

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

OpenAI/Packages/com.openai.unity/Runtime/Realtime/ResponseAudioResponse.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed under the MIT License. See LICENSE in the project root for license information.
22

33
using Newtonsoft.Json;
4+
using System;
45
using UnityEngine;
56
using UnityEngine.Scripting;
67
using Utilities.Audio;
@@ -72,6 +73,11 @@ internal ResponseAudioResponse(
7273
[JsonProperty("delta")]
7374
public string Delta { get; }
7475

76+
[Preserve]
77+
[JsonIgnore]
78+
public float[] AudioSamples
79+
=> PCMEncoder.Resample(PCMEncoder.Decode(Convert.FromBase64String(Delta)), 24000, 44100);
80+
7581
[Preserve]
7682
[JsonIgnore]
7783
public bool IsDelta => Type.EndsWith("delta");
@@ -83,8 +89,8 @@ internal ResponseAudioResponse(
8389
[Preserve]
8490
public static implicit operator AudioClip(ResponseAudioResponse response)
8591
{
86-
var audioSamples = PCMEncoder.Decode(System.Convert.FromBase64String(response.Delta));
87-
var audioClip = AudioClip.Create($"{response.ItemId}_{response.OutputIndex}_delta", audioSamples.Length, 1, 24000, false);
92+
var audioSamples = response.AudioSamples;
93+
var audioClip = AudioClip.Create($"{response.ItemId}_{response.OutputIndex}_delta", audioSamples.Length, 1, 44100, false);
8894
audioClip.SetData(audioSamples, 0);
8995
return audioClip;
9096
}

0 commit comments

Comments
 (0)