Skip to content

Commit 68eb4bb

Browse files
committed
Added start of audio streaming
1 parent c130862 commit 68eb4bb

File tree

3 files changed

+137
-11
lines changed

3 files changed

+137
-11
lines changed

pkg/whisper/api/logging.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package api
2+
3+
import (
4+
"log"
5+
"net/http"
6+
"sync/atomic"
7+
"time"
8+
)
9+
10+
var (
11+
req int32
12+
)
13+
14+
func wrapLogging(fn http.HandlerFunc) http.HandlerFunc {
15+
return func(w http.ResponseWriter, r *http.Request) {
16+
req := nextReq()
17+
delta := time.Now()
18+
log.Printf("R%d %s %s", req, r.Method, r.URL)
19+
fn(w, r)
20+
log.Printf("R%d Took %v", req, time.Since(delta).Truncate(time.Millisecond))
21+
}
22+
}
23+
24+
func nextReq() int32 {
25+
return atomic.AddInt32(&req, 1)
26+
}

pkg/whisper/api/register.go

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper) {
1616
// Health: GET /v1/health
1717
// returns an empty OK response
18-
mux.HandleFunc(joinPath(base, "health"), func(w http.ResponseWriter, r *http.Request) {
18+
mux.HandleFunc(joinPath(base, "health"), wrapLogging(func(w http.ResponseWriter, r *http.Request) {
1919
defer r.Body.Close()
2020

2121
switch r.Method {
@@ -24,14 +24,14 @@ func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper
2424
default:
2525
httpresponse.Error(w, http.StatusMethodNotAllowed)
2626
}
27-
})
27+
}))
2828

2929
// List Models: GET /v1/models
3030
// returns available models
3131
// Download Model: POST /v1/models?stream={bool}
3232
// downloads a model from the server
3333
// if stream is true then progress is streamed back to the client
34-
mux.HandleFunc(joinPath(base, "models"), func(w http.ResponseWriter, r *http.Request) {
34+
mux.HandleFunc(joinPath(base, "models"), wrapLogging(func(w http.ResponseWriter, r *http.Request) {
3535
defer r.Body.Close()
3636

3737
switch r.Method {
@@ -42,13 +42,13 @@ func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper
4242
default:
4343
httpresponse.Error(w, http.StatusMethodNotAllowed)
4444
}
45-
})
45+
}))
4646

4747
// Get: GET /v1/models/{id}
4848
// returns an existing model
4949
// Delete: DELETE /v1/models/{id}
5050
// deletes an existing model
51-
mux.HandleFunc(joinPath(base, "models/{id}"), func(w http.ResponseWriter, r *http.Request) {
51+
mux.HandleFunc(joinPath(base, "models/{id}"), wrapLogging(func(w http.ResponseWriter, r *http.Request) {
5252
defer r.Body.Close()
5353

5454
id := r.PathValue("id")
@@ -60,12 +60,12 @@ func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper
6060
default:
6161
httpresponse.Error(w, http.StatusMethodNotAllowed)
6262
}
63-
})
63+
}))
6464

6565
// Translate: POST /v1/audio/translations
6666
// Translates audio into english or another language - language parameter should be set to the
6767
// destination language of the audio. Will default to english if not set.
68-
mux.HandleFunc(joinPath(base, "audio/translations"), func(w http.ResponseWriter, r *http.Request) {
68+
mux.HandleFunc(joinPath(base, "audio/translations"), wrapLogging(func(w http.ResponseWriter, r *http.Request) {
6969
defer r.Body.Close()
7070

7171
switch r.Method {
@@ -74,12 +74,12 @@ func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper
7474
default:
7575
httpresponse.Error(w, http.StatusMethodNotAllowed)
7676
}
77-
})
77+
}))
7878

7979
// Transcribe: POST /v1/audio/transcriptions
8080
// Transcribes audio into the input language - language parameter should be set to the source
8181
// language of the audio
82-
mux.HandleFunc(joinPath(base, "audio/transcriptions"), func(w http.ResponseWriter, r *http.Request) {
82+
mux.HandleFunc(joinPath(base, "audio/transcriptions"), wrapLogging(func(w http.ResponseWriter, r *http.Request) {
8383
defer r.Body.Close()
8484

8585
switch r.Method {
@@ -88,7 +88,21 @@ func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper
8888
default:
8989
httpresponse.Error(w, http.StatusMethodNotAllowed)
9090
}
91-
})
91+
}))
92+
93+
// Transcribe: POST /v1/audio/transcriptions/{model-id}
94+
// Transcribes streamed media into the input language
95+
mux.HandleFunc(joinPath(base, "audio/transcriptions/{model}"), wrapLogging(func(w http.ResponseWriter, r *http.Request) {
96+
defer r.Body.Close()
97+
98+
model := r.PathValue("model")
99+
switch r.Method {
100+
case http.MethodPost:
101+
TranscribeStream(r.Context(), whisper, w, r, model)
102+
default:
103+
httpresponse.Error(w, http.StatusMethodNotAllowed)
104+
}
105+
}))
92106
}
93107

94108
/////////////////////////////////////////////////////////////////////////////

pkg/whisper/api/transcribe.go

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ func TranscribeFile(ctx context.Context, service *whisper.Whisper, w http.Respon
122122
if translate {
123123
result.Task = "translate"
124124
}
125-
result.Duration = schema.Timestamp(segmenter.Duration())
126125
result.Language = task.Language()
127126

128127
// Output the header
@@ -142,6 +141,93 @@ func TranscribeFile(ctx context.Context, service *whisper.Whisper, w http.Respon
142141
return err
143142
}
144143

144+
// Set the language and duration
145+
result.Language = task.Language()
146+
result.Duration = schema.Timestamp(segmenter.Duration())
147+
148+
// Return success
149+
return nil
150+
}); err != nil {
151+
if stream != nil {
152+
stream.Write("error", err.Error())
153+
} else {
154+
httpresponse.Error(w, http.StatusInternalServerError, err.Error())
155+
}
156+
return
157+
}
158+
159+
// Return transcription if not streaming
160+
if stream == nil {
161+
httpresponse.JSON(w, result, http.StatusOK, 2)
162+
} else {
163+
stream.Write("ok")
164+
}
165+
}
166+
167+
func TranscribeStream(ctx context.Context, service *whisper.Whisper, w http.ResponseWriter, r *http.Request, modelId string) {
168+
var query queryTranscribe
169+
if err := httprequest.Query(&query, r.URL.Query()); err != nil {
170+
httpresponse.Error(w, http.StatusBadRequest, err.Error())
171+
return
172+
}
173+
174+
// Get the model
175+
model := service.GetModelById(modelId)
176+
if model == nil {
177+
httpresponse.Error(w, http.StatusNotFound, "model not found")
178+
return
179+
}
180+
181+
// Create a segmenter - read segments based on 10 second segment size
182+
segmenter, err := segmenter.New(r.Body, 10*time.Second, whisper.SampleRate)
183+
if err != nil {
184+
httpresponse.Error(w, http.StatusBadRequest, err.Error())
185+
return
186+
}
187+
188+
// Create a text stream
189+
var stream *httpresponse.TextStream
190+
if query.Stream {
191+
if stream = httpresponse.NewTextStream(w); stream == nil {
192+
httpresponse.Error(w, http.StatusInternalServerError, "Cannot create text stream")
193+
return
194+
}
195+
defer stream.Close()
196+
}
197+
198+
// Get context for the model, perform transcription
199+
var result *schema.Transcription
200+
if err := service.WithModel(model, func(task *task.Context) error {
201+
// Set parameters for ttranslation, default to auto
202+
task.SetTranslate(false)
203+
if err := task.SetLanguage("auto"); err != nil {
204+
return err
205+
}
206+
207+
// TODO: Set temperature, etc
208+
209+
// Create response
210+
result = task.Result()
211+
result.Task = "transcribe"
212+
result.Language = task.Language()
213+
214+
// Output the header
215+
if stream != nil {
216+
stream.Write("task", result)
217+
}
218+
219+
// Read samples and transcribe them
220+
if err := segmenter.Decode(ctx, func(ts time.Duration, buf []float32) error {
221+
// Perform the transcription, output segments in realtime, return any errors
222+
return task.Transcribe(ctx, ts, buf, stream != nil, func(segment *schema.Segment) {
223+
if stream != nil {
224+
stream.Write("segment", segment)
225+
}
226+
})
227+
}); err != nil {
228+
return err
229+
}
230+
145231
// Set the language
146232
result.Language = task.Language()
147233

0 commit comments

Comments
 (0)