Skip to content

Commit c774eec

Browse files
authored
go : improve model download (#2756)
* Updated models download URL * Updated list of models available All of the high efficiency quantized models are rejected when trying to download. They exist on the server. Let's allow them. * added path prefix for whisper-cli in message to user. The message is misleading if this script is called from another script in a different folder. So the message has to be fixed. * undid download URL change I made earlier. Fixed filepath.Join(urlPath, model) bug. * Undid download URL change I made earlier. Seems that the old URL works but only when provided a model to download. Still doesn't explain why there's a different download URL that also works. Please elucidate in docs. * Fixed URLForModel Function's bug filepath.Join is designed for filesystem paths, and it uses backslashes (\) on Windows. URLs, however, require forward slashes (/), so the use of filepath.Join is inappropriate for constructing URLs. The fmt.Sprintf function ensures that forward slashes are used. * Fixed URL trailing / double slash bug Ensure no double slash by trimming trailing '/' from srcUrl if present * Fixed bad download URL, missing ggml prefix Not sure if that was a bug I introduced but it was trying to download without the prefix. * Added question before downloading all models. Added download size estimate HEAD Requests: Efficiently fetches file sizes without downloading the content. Interactive Workflow: Allows the user to make informed decisions about downloading all models. Safe Defaults: Aborts if the user does not explicitly confirm. * Fixed Unbuffered channel warning. warning in context.go : misuse of unbuffered os.Signal channel as argument to signal. The warning indicates that the unbuffered channel used in signal.Notify in context.go may be misused. In Go, unbuffered channels can cause potential deadlocks if signals are sent faster than they are received. * Fixed download size calculation, download URL prefix bug, added link to models URL for user. The URL formatter was prepending the model name to the formatted model name in the URL * Added logs and exes to gitignore * Delete bindings/go/examples/go-model-download/go-model-download.exe * Delete whisper_build.log
1 parent 5b481a2 commit c774eec

File tree

4 files changed

+136
-26
lines changed

4 files changed

+136
-26
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,5 @@ cmake-build-debug/
5858
.cxx/
5959
.gradle/
6060
local.properties
61+
.log
62+
.exe

bindings/go/examples/go-model-download/context.go

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,23 @@ import (
99
// ContextForSignal returns a context object which is cancelled when a signal
1010
// is received. It returns nil if no signal parameter is provided
1111
func ContextForSignal(signals ...os.Signal) context.Context {
12-
if len(signals) == 0 {
13-
return nil
14-
}
12+
if len(signals) == 0 {
13+
return nil
14+
}
1515

16-
ch := make(chan os.Signal)
17-
ctx, cancel := context.WithCancel(context.Background())
16+
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
17+
ctx, cancel := context.WithCancel(context.Background())
1818

19-
// Send message on channel when signal received
20-
signal.Notify(ch, signals...)
19+
// Send message on channel when signal received
20+
signal.Notify(ch, signals...)
2121

22-
// When any signal received, call cancel
23-
go func() {
24-
<-ch
25-
cancel()
26-
}()
22+
// When any signal is received, call cancel
23+
go func() {
24+
<-ch
25+
cancel()
26+
}()
2727

28-
// Return success
29-
return ctx
28+
// Return success
29+
return ctx
3030
}
31+

bindings/go/examples/go-model-download/main.go

Lines changed: 106 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/url"
1010
"os"
1111
"path/filepath"
12+
"strings"
1213
"syscall"
1314
"time"
1415
)
@@ -17,14 +18,27 @@ import (
1718
// CONSTANTS
1819

1920
const (
20-
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
21-
srcExt = ".bin" // Filename extension
22-
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
21+
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
22+
srcExt = ".bin" // Filename extension
23+
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
2324
)
2425

2526
var (
2627
// The models which will be downloaded, if no model is specified as an argument
27-
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
28+
modelNames = []string{
29+
"tiny", "tiny-q5_1", "tiny-q8_0",
30+
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
31+
"base", "base-q5_1", "base-q8_0",
32+
"base.en", "base.en-q5_1", "base.en-q8_0",
33+
"small", "small-q5_1", "small-q8_0",
34+
"small.en", "small.en-q5_1", "small.en-q8_0",
35+
"medium", "medium-q5_0", "medium-q8_0",
36+
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
37+
"large-v1",
38+
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
39+
"large-v3", "large-v3-q5_0",
40+
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
41+
}
2842
)
2943

3044
var (
@@ -44,7 +58,25 @@ var (
4458
func main() {
4559
flag.Usage = func() {
4660
name := filepath.Base(flag.CommandLine.Name())
47-
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
61+
fmt.Fprintf(flag.CommandLine.Output(), `
62+
Usage: %s [options] [<model>...]
63+
64+
Options:
65+
-out string Specify the output folder where models will be saved.
66+
Default: Current working directory.
67+
-timeout duration Set the maximum duration for downloading a model.
68+
Example: 10m, 1h (default: 30m0s).
69+
-quiet Suppress all output except errors.
70+
71+
Examples:
72+
1. Download a specific model:
73+
%s -out ./models tiny-q8_0
74+
75+
2. Download all models:
76+
%s -out ./models
77+
78+
`, name, name, name)
79+
4880
flag.PrintDefaults()
4981
}
5082
flag.Parse()
@@ -114,23 +146,87 @@ func GetOut() (string, error) {
114146
// GetModels returns the list of models to download
115147
func GetModels() []string {
116148
if flag.NArg() == 0 {
117-
return modelNames
118-
} else {
119-
return flag.Args()
149+
fmt.Println("No model specified.")
150+
fmt.Println("Preparing to download all models...")
151+
152+
// Calculate total download size
153+
fmt.Println("Calculating total download size...")
154+
totalSize, err := CalculateTotalDownloadSize(modelNames)
155+
if err != nil {
156+
fmt.Println("Error calculating download sizes:", err)
157+
os.Exit(1)
158+
}
159+
160+
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
161+
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
162+
fmt.Println("Would you like to download all models? (y/N)")
163+
164+
// Prompt for user input
165+
var response string
166+
fmt.Scanln(&response)
167+
if response != "y" && response != "Y" {
168+
fmt.Println("Aborting. Specify a model to download.")
169+
os.Exit(0)
170+
}
171+
172+
return modelNames // Return all models if confirmed
120173
}
174+
return flag.Args() // Return specific models if arguments are provided
175+
}
176+
177+
func CalculateTotalDownloadSize(models []string) (int64, error) {
178+
var totalSize int64
179+
client := http.Client{}
180+
181+
for _, model := range models {
182+
modelURL, err := URLForModel(model)
183+
if err != nil {
184+
return 0, err
185+
}
186+
187+
// Issue a HEAD request to get the file size
188+
req, err := http.NewRequest("HEAD", modelURL, nil)
189+
if err != nil {
190+
return 0, err
191+
}
192+
193+
resp, err := client.Do(req)
194+
if err != nil {
195+
return 0, err
196+
}
197+
resp.Body.Close()
198+
199+
if resp.StatusCode != http.StatusOK {
200+
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
201+
continue
202+
}
203+
204+
size := resp.ContentLength
205+
totalSize += size
206+
}
207+
return totalSize, nil
121208
}
122209

123210
// URLForModel returns the URL for the given model on huggingface.co
124211
func URLForModel(model string) (string, error) {
212+
// Ensure "ggml-" prefix is added only once
213+
if !strings.HasPrefix(model, "ggml-") {
214+
model = "ggml-" + model
215+
}
216+
217+
// Ensure ".bin" extension is added only once
125218
if filepath.Ext(model) != srcExt {
126219
model += srcExt
127220
}
221+
222+
// Parse the base URL
128223
url, err := url.Parse(srcUrl)
129224
if err != nil {
130225
return "", err
131-
} else {
132-
url.Path = filepath.Join(url.Path, model)
133226
}
227+
228+
// Ensure no trailing slash in the base URL
229+
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
134230
return url.String(), nil
135231
}
136232

models/download-ggml-model.cmd

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,18 @@ popd
88
set argc=0
99
for %%x in (%*) do set /A argc+=1
1010

11-
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo
11+
set models=tiny tiny-q5_1 tiny-q8_0 ^
12+
tiny.en tiny.en-q5_1 tiny.en-q8_0 ^
13+
base base-q5_1 base-q8_0 ^
14+
base.en base.en-q5_1 base.en-q8_0 ^
15+
small small-q5_1 small-q8_0 ^
16+
small.en small.en-q5_1 small.en-q8_0 ^
17+
medium medium-q5_0 medium-q8_0 ^
18+
medium.en medium.en-q5_0 medium.en-q8_0 ^
19+
large-v1 ^
20+
large-v2 large-v2-q5_0 large-v2-q8_0 ^
21+
large-v3 large-v3-q5_0 ^
22+
large-v3-turbo large-v3-turbo-q5_0 large-v3-turbo-q8_0
1223

1324
if %argc% neq 1 (
1425
echo.
@@ -50,7 +61,7 @@ if %ERRORLEVEL% neq 0 (
5061

5162
echo Done! Model %model% saved in %root_path%\models\ggml-%model%.bin
5263
echo You can now use it like this:
53-
echo build\bin\Release\whisper-cli.exe -m %root_path%\models\ggml-%model%.bin -f %root_path%\samples\jfk.wav
64+
echo %~dp0build\bin\Release\whisper-cli.exe -m %root_path%\models\ggml-%model%.bin -f %root_path%\samples\jfk.wav
5465

5566
goto :eof
5667

0 commit comments

Comments
 (0)