Skip to content

Commit b2d68e5

Browse files
authored
Whisper Endpoint ByteArray Request (#31)
* feat: file data type * feat: ext method for adding file directly from bytes * feat: if file path does not exists use file data * feat: savewav file updated to use memory stream * feat: whisper sample uses file bytes. no file saved locally
1 parent 79343f2 commit b2d68e5

File tree

6 files changed

+68
-39
lines changed

6 files changed

+68
-39
lines changed

Runtime/DataTypes.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,16 @@ public struct ChatMessage
120120

121121
#region Audio Transcriptions Data Types
122122

123+
public struct FileData
124+
{
125+
public byte[] Data;
126+
public string Name;
127+
}
128+
123129
public class CreateAudioRequestBase
124130
{
125131
public string File { get; set; }
132+
public FileData FileData { get; set; }
126133
public string Model { get; set; }
127134
public string Prompt { get; set; }
128135
public string ResponseFormat { get; set; } = AudioResponseFormat.Json;

Runtime/OpenAIApi.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,14 @@ public async Task<CreateAudioResponse> CreateAudioTranscription(CreateAudioTrans
264264
var path = $"{BASE_PATH}/audio/transcriptions";
265265

266266
var form = new List<IMultipartFormSection>();
267-
form.AddFile(request.File, "file", $"audio/{Path.GetExtension(request.File)}");
267+
if (string.IsNullOrEmpty(request.File))
268+
{
269+
form.AddData(request.FileData, "file", $"audio/{Path.GetExtension(request.File)}");
270+
}
271+
else
272+
{
273+
form.AddFile(request.File, "file", $"audio/{Path.GetExtension(request.File)}");
274+
}
268275
form.AddValue(request.Model, "model");
269276
form.AddValue(request.Prompt, "prompt");
270277
form.AddValue(request.ResponseFormat, "response_format");

Runtime/Utils/ExtensionMethods.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,22 @@ public static void AddFile(this List<IMultipartFormSection> form, string path, s
2323
}
2424
}
2525

26+
/// <summary>
27+
/// Read a file and add it to this form.
28+
/// </summary>
29+
/// <param name="form">List of multipart form sections.</param>
30+
/// <param name="data">Byte array data of the file to attach.</param>
31+
/// <param name="name">Name of the form field.</param>
32+
/// <param name="contentType">Content type of the file.</param>
33+
public static void AddData(this List<IMultipartFormSection> form, FileData data, string name, string contentType)
34+
{
35+
if (data.Data != null)
36+
{
37+
var fileName = Path.GetFileName(data.Name);
38+
form.Add(new MultipartFormFileSection(name, data.Data, fileName, contentType));
39+
}
40+
}
41+
2642
/// <summary>
2743
/// Add a primitive value to the form.
2844
/// </summary>

Samples~/Whisper/SavWav.cs renamed to Samples~/Whisper/SaveWav.cs

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
using UnityEngine;
3030
using System.Collections.Generic;
3131

32-
public static class SavWav {
32+
public static class SaveWav {
3333

3434
const int HEADER_SIZE = 44;
3535

36-
public static bool Save(string filename, AudioClip clip) {
36+
public static byte[] Save(string filename, AudioClip clip) {
3737
if (!filename.ToLower().EndsWith(".wav")) {
3838
filename += ".wav";
3939
}
@@ -43,14 +43,12 @@ public static bool Save(string filename, AudioClip clip) {
4343
// Make sure directory exists if user is saving to sub dir.
4444
Directory.CreateDirectory(Path.GetDirectoryName(filepath));
4545

46-
using (var fileStream = CreateEmpty(filepath)) {
47-
48-
ConvertAndWrite(fileStream, clip);
49-
50-
WriteHeader(fileStream, clip);
46+
using (var memoryStream = CreateEmpty(filepath))
47+
{
48+
ConvertAndWrite(memoryStream, clip);
49+
WriteHeader(memoryStream, clip);
50+
return memoryStream.GetBuffer();
5151
}
52-
53-
return true; // TODO: return false if there's a failure saving the file
5452
}
5553

5654
public static AudioClip TrimSilence(AudioClip clip, float min) {
@@ -91,19 +89,19 @@ public static AudioClip TrimSilence(List<float> samples, float min, int channels
9189
return clip;
9290
}
9391

94-
static FileStream CreateEmpty(string filepath) {
95-
var fileStream = new FileStream(filepath, FileMode.Create);
96-
byte emptyByte = new byte();
92+
static MemoryStream CreateEmpty(string filepath) {
93+
var memoryStream = new MemoryStream();
94+
byte emptyByte = new byte();
9795

98-
for(int i = 0; i < HEADER_SIZE; i++) //preparing the header
99-
{
100-
fileStream.WriteByte(emptyByte);
101-
}
96+
for(int i = 0; i < HEADER_SIZE; i++) //preparing the header
97+
{
98+
memoryStream.WriteByte(emptyByte);
99+
}
102100

103-
return fileStream;
101+
return memoryStream;
104102
}
105103

106-
static void ConvertAndWrite(FileStream fileStream, AudioClip clip) {
104+
static void ConvertAndWrite(MemoryStream memoryStream, AudioClip clip) {
107105

108106
var samples = new float[clip.samples];
109107

@@ -125,60 +123,60 @@ static void ConvertAndWrite(FileStream fileStream, AudioClip clip) {
125123
byteArr.CopyTo(bytesData, i * 2);
126124
}
127125

128-
fileStream.Write(bytesData, 0, bytesData.Length);
126+
memoryStream.Write(bytesData, 0, bytesData.Length);
129127
}
130128

131-
static void WriteHeader(FileStream fileStream, AudioClip clip) {
129+
static void WriteHeader(MemoryStream memoryStream, AudioClip clip) {
132130

133131
var hz = clip.frequency;
134132
var channels = clip.channels;
135133
var samples = clip.samples;
136134

137-
fileStream.Seek(0, SeekOrigin.Begin);
135+
memoryStream.Seek(0, SeekOrigin.Begin);
138136

139137
Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
140-
fileStream.Write(riff, 0, 4);
138+
memoryStream.Write(riff, 0, 4);
141139

142-
Byte[] chunkSize = BitConverter.GetBytes(fileStream.Length - 8);
143-
fileStream.Write(chunkSize, 0, 4);
140+
Byte[] chunkSize = BitConverter.GetBytes(memoryStream.Length - 8);
141+
memoryStream.Write(chunkSize, 0, 4);
144142

145143
Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
146-
fileStream.Write(wave, 0, 4);
144+
memoryStream.Write(wave, 0, 4);
147145

148146
Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
149-
fileStream.Write(fmt, 0, 4);
147+
memoryStream.Write(fmt, 0, 4);
150148

151149
Byte[] subChunk1 = BitConverter.GetBytes(16);
152-
fileStream.Write(subChunk1, 0, 4);
150+
memoryStream.Write(subChunk1, 0, 4);
153151

154152
UInt16 two = 2;
155153
UInt16 one = 1;
156154

157155
Byte[] audioFormat = BitConverter.GetBytes(one);
158-
fileStream.Write(audioFormat, 0, 2);
156+
memoryStream.Write(audioFormat, 0, 2);
159157

160158
Byte[] numChannels = BitConverter.GetBytes(channels);
161-
fileStream.Write(numChannels, 0, 2);
159+
memoryStream.Write(numChannels, 0, 2);
162160

163161
Byte[] sampleRate = BitConverter.GetBytes(hz);
164-
fileStream.Write(sampleRate, 0, 4);
162+
memoryStream.Write(sampleRate, 0, 4);
165163

166164
Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2
167-
fileStream.Write(byteRate, 0, 4);
165+
memoryStream.Write(byteRate, 0, 4);
168166

169167
UInt16 blockAlign = (ushort) (channels * 2);
170-
fileStream.Write(BitConverter.GetBytes(blockAlign), 0, 2);
168+
memoryStream.Write(BitConverter.GetBytes(blockAlign), 0, 2);
171169

172170
UInt16 bps = 16;
173171
Byte[] bitsPerSample = BitConverter.GetBytes(bps);
174-
fileStream.Write(bitsPerSample, 0, 2);
172+
memoryStream.Write(bitsPerSample, 0, 2);
175173

176174
Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
177-
fileStream.Write(datastring, 0, 4);
175+
memoryStream.Write(datastring, 0, 4);
178176

179177
Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
180-
fileStream.Write(subChunk2, 0, 4);
178+
memoryStream.Write(subChunk2, 0, 4);
181179

182180
// fileStream.Close();
183181
}
184-
}
182+
}
File renamed without changes.

Samples~/Whisper/Whisper.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@ private async void EndRecording()
4242

4343
isRecording = false;
4444
Microphone.End(null);
45-
SavWav.Save(fileName, clip);
45+
byte[] data = SaveWav.Save(fileName, clip);
4646

4747
var req = new CreateAudioTranscriptionsRequest
4848
{
49-
File = Application.persistentDataPath + "/" + fileName,
49+
FileData = new FileData() {Data = data, Name = "audio.wav"},
50+
// File = Application.persistentDataPath + "/" + fileName,
5051
Model = "whisper-1",
5152
Language = "en"
5253
};

0 commit comments

Comments
 (0)