Skip to content

Commit cce8218

Browse files
committed
Added transcribe and delete commands
1 parent f5c0abf commit cce8218

File tree

14 files changed

+182
-31
lines changed

14 files changed

+182
-31
lines changed

Makefile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ BUILD_DIR := build
1616

1717
# Build flags
1818
BUILD_MODULE := $(shell cat go.mod | head -1 | cut -d ' ' -f 2)
19-
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitSource=${BUILD_MODULE}
20-
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitTag=$(shell git describe --tags --always)
21-
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitBranch=$(shell git name-rev HEAD --name-only --always)
22-
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitHash=$(shell git rev-parse HEAD)
23-
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GoBuildTime=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ')
19+
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitSource=${BUILD_MODULE}
20+
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitTag=$(shell git describe --tags --always)
21+
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitBranch=$(shell git name-rev HEAD --name-only --always)
22+
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitHash=$(shell git rev-parse HEAD)
23+
BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GoBuildTime=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ')
2424
BUILD_FLAGS = -ldflags "-s -w $(BUILD_LD_FLAGS)"
2525

2626
# If GGML_CUDA is set, then add a cuda tag for the go ${BUILD FLAGS}

README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Speech-to-Text in golang. This is an early development version.
44

5-
* `cmd` contains an OpenAI-API compatible server
5+
* `cmd` contains an OpenAI-API compatible service
66
* `pkg` contains the `whisper` service and client
77
* `sys` contains the `whisper` bindings to the `whisper.cpp` library
88
* `third_party` is a submodule for the whisper.cpp source
@@ -11,6 +11,7 @@ Speech-to-Text in golang. This is an early development version.
1111

1212
(Note: Docker images are not created yet - this is some forward planning!)
1313

14+
You can either run the whisper service as a CLI command or in a docker container.
1415
There are docker images for arm64 and amd64 (Intel). The arm64 image is built for
1516
Jetson GPU support specifically, but it will also run on Raspberry Pi's.
1617

@@ -19,14 +20,15 @@ In order to utilize a NVIDIA GPU, you'll need to install the
1920

2021
A docker volume should be created called "whisper" can be used for storing the Whisper language
2122
models. You can see which models are available to download locally [here](https://huggingface.co/ggerganov/whisper.cpp).
22-
The following command will run the server on port 8080:
23+
24+
The following command will run the server on port 8080 for an NVIDIA GPU:
2325

2426
```bash
2527
docker run \
2628
--name whisper-server --rm \
2729
--runtime nvidia --gpus all \ # When using a NVIDIA GPU
2830
-v whisper:/models -p 8080:8080 -e WHISPER_DATA=/models \
29-
ghcr.io/mutablelogic/go-whisper:latest
31+
ghcr.io/mutablelogic/go-whisper
3032
```
3133

3234
If you include a `-debug` flag at the end, you'll get more verbose output. The API is then
@@ -92,6 +94,11 @@ The following `Makefile` targets can be used:
9294

9395
See all the other targets in the `Makefile` for more information.
9496

97+
## Developing
98+
99+
The `cmd/examples` directory contains a simple example of how to use the `whisper` package
100+
in your own code.
101+
95102
## Status
96103

97104
Still in development. See this [issue](https://github.com/mutablelogic/go-whisper/issues/1) for

cmd/server/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ import (
1414
context "github.com/mutablelogic/go-server/pkg/context"
1515
httpserver "github.com/mutablelogic/go-server/pkg/httpserver"
1616
whisper "github.com/mutablelogic/go-whisper"
17+
version "github.com/mutablelogic/go-whisper/pkg/version"
1718
api "github.com/mutablelogic/go-whisper/pkg/whisper/api"
18-
version "github.com/mutablelogic/go-whisper/pkg/whisper/version"
1919
)
2020

2121
func main() {

cmd/whisper/delete.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package main
2+
3+
// Packages
4+
5+
type DeleteCmd struct {
6+
Model string `arg:"" help:"Model id to delete"`
7+
}
8+
9+
func (cmd *DeleteCmd) Run(ctx *Globals) error {
10+
if err := ctx.service.DeleteModelById(cmd.Model); err != nil {
11+
return err
12+
}
13+
return ModelsCmd{}.Run(ctx)
14+
}

cmd/whisper/download.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"log"
5+
"time"
56

67
// Packages
78
"github.com/djthorpe/go-tablewriter"
@@ -12,8 +13,13 @@ type DownloadCmd struct {
1213
}
1314

1415
func (cmd *DownloadCmd) Run(ctx *Globals) error {
16+
t := time.Now()
1517
model, err := ctx.service.DownloadModel(ctx.ctx, cmd.Model, func(curBytes, totalBytes uint64) {
16-
log.Printf("Downloaded %d of %d bytes", curBytes, totalBytes)
18+
if time.Since(t) > time.Second {
19+
pct := float64(curBytes) / float64(totalBytes) * 100
20+
log.Printf("Downloaded %.0f%%", pct)
21+
t = time.Now()
22+
}
1723
})
1824
if err != nil {
1925
return err

cmd/whisper/main.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"context"
5+
"log"
56
"os"
67
"path/filepath"
78
"syscall"
@@ -26,9 +27,12 @@ type Globals struct {
2627

2728
type CLI struct {
2829
Globals
29-
Models ModelsCmd `cmd:"models" help:"List models"`
30-
Download DownloadCmd `cmd:"download" help:"Download a model"`
31-
Server ServerCmd `cmd:"server" help:"Run the whisper server"`
30+
Transcribe TranscribeCmd `cmd:"transcribe" help:"Transcribe from file"`
31+
Models ModelsCmd `cmd:"models" help:"List models"`
32+
Download DownloadCmd `cmd:"download" help:"Download a model"`
33+
Delete DeleteCmd `cmd:"delete" help:"Delete a model"`
34+
Server ServerCmd `cmd:"server" help:"Run the whisper service"`
35+
Version VersionCmd `cmd:"version" help:"Print version information"`
3236
}
3337

3438
func main() {
@@ -53,7 +57,11 @@ func main() {
5357
)
5458

5559
// Create a whisper server - set options
56-
opts := []whisper.Opt{}
60+
opts := []whisper.Opt{
61+
whisper.OptLog(func(line string) {
62+
log.Println(line)
63+
}),
64+
}
5765
if cli.Globals.Debug {
5866
opts = append(opts, whisper.OptDebug())
5967
}

cmd/whisper/models.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
package main
22

33
import (
4+
"errors"
5+
46
// Packages
57
"github.com/djthorpe/go-tablewriter"
68
)
79

810
type ModelsCmd struct{}
911

10-
func (*ModelsCmd) Run(ctx *Globals) error {
11-
return ctx.writer.Write(ctx.service.ListModels(), tablewriter.OptHeader())
12+
func (ModelsCmd) Run(ctx *Globals) error {
13+
models := ctx.service.ListModels()
14+
if len(models) == 0 {
15+
return errors.New("no models found")
16+
} else {
17+
return ctx.writer.Write(ctx.service.ListModels(), tablewriter.OptHeader())
18+
}
1219
}

cmd/whisper/server.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package main
22

33
import (
44
"log"
5-
"net/http"
65

76
// Packages
87
"github.com/mutablelogic/go-server/pkg/httpserver"
@@ -15,17 +14,11 @@ type ServerCmd struct {
1514
}
1615

1716
func (cmd *ServerCmd) Run(ctx *Globals) error {
18-
// Create a mux for serving requests, then register the endpoints with the mux
19-
mux := http.NewServeMux()
20-
21-
// Register the endpoints
22-
api.RegisterEndpoints(cmd.Endpoint, mux, ctx.service)
23-
2417
// Create a new HTTP server
25-
log.Println("List address", cmd.Listen)
18+
log.Println("Listen address", cmd.Listen)
2619
server, err := httpserver.Config{
2720
Listen: cmd.Listen,
28-
Router: mux,
21+
Router: api.RegisterEndpoints(cmd.Endpoint, ctx.service, nil),
2922
}.New()
3023
if err != nil {
3124
return err

cmd/whisper/transcribe.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package main
2+
3+
import (
4+
"os"
5+
"time"
6+
7+
// Packages
8+
9+
whisper "github.com/mutablelogic/go-whisper"
10+
"github.com/mutablelogic/go-whisper/pkg/whisper/schema"
11+
segmenter "github.com/mutablelogic/go-whisper/pkg/whisper/segmenter"
12+
task "github.com/mutablelogic/go-whisper/pkg/whisper/task"
13+
14+
// Namespace imports
15+
. "github.com/djthorpe/go-errors"
16+
)
17+
18+
type TranscribeCmd struct {
19+
Model string `arg:"" help:"Model to use"`
20+
Path string `arg:"" help:"Path to audio file"`
21+
Language string `flag:"language" help:"Language to transcribe"`
22+
Format string `flag:"format" help:"Output format" default:"text" enum:"text,srt,vtt,json"`
23+
}
24+
25+
func (cmd *TranscribeCmd) Run(ctx *Globals) error {
26+
// Get the model
27+
model := ctx.service.GetModelById(cmd.Model)
28+
if model == nil {
29+
return ErrNotFound.With(cmd.Model)
30+
}
31+
32+
// Open the audio file
33+
f, err := os.Open(cmd.Path)
34+
if err != nil {
35+
return err
36+
}
37+
defer f.Close()
38+
39+
// Create a segmenter - read segments based on requested segment size
40+
segmenter, err := segmenter.New(f, 0, whisper.SampleRate)
41+
if err != nil {
42+
return err
43+
}
44+
defer segmenter.Close()
45+
46+
// Perform the transcription
47+
return ctx.service.WithModel(model, func(taskctx *task.Context) error {
48+
// Transcribe
49+
taskctx.SetTranslate(false)
50+
taskctx.SetDiarize(false)
51+
52+
// Set language
53+
if cmd.Language != "" {
54+
if err := taskctx.SetLanguage(cmd.Language); err != nil {
55+
return err
56+
}
57+
}
58+
59+
// Read samples and transcribe them
60+
if err := segmenter.Decode(ctx.ctx, func(ts time.Duration, buf []float32) error {
61+
// Perform the transcription, return any errors
62+
return taskctx.Transcribe(ctx.ctx, ts, buf, func(segment *schema.Segment) {
63+
ctx.writer.Write(segment)
64+
})
65+
}); err != nil {
66+
return err
67+
}
68+
69+
return nil
70+
})
71+
}

cmd/whisper/version.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package main
2+
3+
import (
4+
"runtime"
5+
6+
// Packages
7+
"github.com/mutablelogic/go-whisper/pkg/version"
8+
)
9+
10+
type VersionCmd struct{}
11+
12+
func (cmd *VersionCmd) Run(ctx *Globals) error {
13+
type kv struct {
14+
Key string `json:"name"`
15+
Value string `json:"value" writer:",width:60"`
16+
}
17+
var metadata = []kv{}
18+
if version.GitSource != "" {
19+
metadata = append(metadata, kv{"source", version.GitSource})
20+
}
21+
if version.GitBranch != "" {
22+
metadata = append(metadata, kv{"branch", version.GitBranch})
23+
}
24+
if version.GitTag != "" {
25+
metadata = append(metadata, kv{"tag", version.GitTag})
26+
}
27+
if version.GitHash != "" {
28+
metadata = append(metadata, kv{"hash", version.GitHash})
29+
}
30+
if version.GoBuildTime != "" {
31+
metadata = append(metadata, kv{"build time", version.GoBuildTime})
32+
}
33+
metadata = append(metadata, kv{"go version", runtime.Version()})
34+
metadata = append(metadata, kv{"os", runtime.GOOS + "/" + runtime.GOARCH})
35+
36+
return ctx.writer.Write(metadata)
37+
}

0 commit comments

Comments
 (0)