Skip to content

Commit 3591ac2

Browse files
committed
Merge remote-tracking branch 'ggerganov/master' into fix_decoding
* ggerganov/master: (73 commits) ci : disable failing CUDA and Java builds readme : fix references to download-ggml-model.sh (ggml-org#2427) make : remove "talk" target until updated ggml : add ggml-cpu-impl.h (skip) (#0) sync : ggml talk-llama : sync llama.cpp ggml : add AVX512DQ requirement for AVX512 builds (llama/9622) log : add CONT level for continuing previous log entry (llama/9610) threads: fix msvc build without openmp (llama/9615) cuda: add q8_0->f32 cpy operation (llama/9571) threads: improve ggml_barrier scaling with large number of threads (llama/9598) ggml : AVX512 gemm for Q4_0_8_8 (llama/9532) metal : use F32 prec for K*Q in vec FA (llama/9595) Revert "[SYCL] fallback mmvq (ggml/9088)" (llama/9579) musa: enable building fat binaries, enable unified memory, and disable Flash Attention on QY1 (MTT S80) (llama/9526) Fix merge error in #9454 (llama/9589) CUDA: enable Gemma FA for HIP/Pascal (llama/9581) RWKV v6: RWKV_WKV op CUDA implementation (llama/9454) ggml-alloc : fix list of allocated tensors with GGML_ALLOCATOR_DEBUG (llama/9573) Update CUDA graph on scale change plus clear nodes/params (llama/9550) ...
2 parents 6c089cd + 69339af commit 3591ac2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+10700
-2616
lines changed

.devops/cublas.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
1212
ARG CUDA_DOCKER_ARCH=all
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git cmake
15+
apt-get install -y build-essential git cmake libsdl2-dev
1616

1717
WORKDIR /app
1818

.devops/main-cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
1717
ENV GGML_CUDA=1
1818

1919
RUN apt-get update && \
20-
apt-get install -y build-essential \
20+
apt-get install -y build-essential libsdl2-dev \
2121
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
2222

2323
# Ref: https://stackoverflow.com/a/53464012

.devops/main.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ubuntu:22.04 AS runtime
1212
WORKDIR /app
1313

1414
RUN apt-get update && \
15-
apt-get install -y curl ffmpeg \
15+
apt-get install -y curl ffmpeg libsdl2-dev \
1616
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
1717

1818
COPY --from=build /app /app

.github/workflows/bindings-go.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ jobs:
1313
ubuntu-latest:
1414
runs-on: ubuntu-latest
1515
steps:
16-
- uses: actions/setup-go@v3
16+
- uses: actions/setup-go@v5
1717
with:
18-
go-version: '^1.19'
19-
- uses: actions/checkout@v1
18+
go-version: '^1.23'
19+
- uses: actions/checkout@v4
2020
- run: |
2121
cd bindings/go
2222
make test

.github/workflows/build.yml

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -586,30 +586,31 @@ jobs:
586586
cd whisper/examples/whisper.android
587587
./gradlew assembleRelease --no-daemon -PGGML_HOME=$PATH_TO_GGML
588588
589-
android_java:
590-
runs-on: ubuntu-latest
591-
592-
steps:
593-
- name: Clone
594-
uses: actions/checkout@v4
595-
596-
- name: set up JDK 11
597-
uses: actions/setup-java@v4
598-
with:
599-
java-version: '11'
600-
distribution: 'temurin'
601-
cache: gradle
602-
603-
- name: Setup Android SDK
604-
uses: android-actions/setup-android@v3
605-
with:
606-
cmdline-tools-version: 9.0
607-
608-
- name: Build
609-
run: |
610-
cd examples/whisper.android.java
611-
chmod +x ./gradlew
612-
./gradlew assembleRelease
589+
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
590+
# android_java:
591+
# runs-on: ubuntu-latest
592+
#
593+
# steps:
594+
# - name: Clone
595+
# uses: actions/checkout@v4
596+
#
597+
# - name: set up JDK 11
598+
# uses: actions/setup-java@v4
599+
# with:
600+
# java-version: '11'
601+
# distribution: 'temurin'
602+
# cache: gradle
603+
#
604+
# - name: Setup Android SDK
605+
# uses: android-actions/setup-android@v3
606+
# with:
607+
# cmdline-tools-version: 9.0
608+
#
609+
# - name: Build
610+
# run: |
611+
# cd examples/whisper.android.java
612+
# chmod +x ./gradlew
613+
# ./gradlew assembleRelease
613614

614615
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
615616
# java:

.github/workflows/docker.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ jobs:
1818
matrix:
1919
config:
2020
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
21-
- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
21+
#TODO: the cuda image keeps failing - disable for now
22+
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
23+
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
2224

2325
steps:
2426
- name: Check out the repo

Makefile

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ else
141141
command \
142142
stream \
143143
lsp \
144-
talk \
145144
talk-llama
145+
# talk (TODO: disalbed)
146146
endif
147147

148148
default: $(BUILD_TARGETS)
@@ -1080,10 +1080,12 @@ lsp: examples/lsp/lsp.cpp \
10801080
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
10811081
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
10821082

1083-
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
1084-
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1085-
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1086-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1083+
# TODO: disabled until update
1084+
# https://github.com/ggerganov/whisper.cpp/issues/1818
1085+
#talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
1086+
# $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1087+
# $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1088+
# $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
10871089

10881090
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
10891091
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)

README.md

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
2121
- Support for CPU-only inference
2222
- [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
2323
- [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
24+
- [Ascend NPU Support](https://github.com/ggerganov/whisper.cpp#ascend-npu-support)
2425
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
2526

2627
Supported platforms:
@@ -74,7 +75,7 @@ git clone https://github.com/ggerganov/whisper.cpp.git
7475
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
7576

7677
```bash
77-
bash ./models/download-ggml-model.sh base.en
78+
sh ./models/download-ggml-model.sh base.en
7879
```
7980

8081
Now build the [main](examples/main) example and transcribe an audio file like this:
@@ -145,7 +146,7 @@ options:
145146
-ng, --no-gpu [false ] disable GPU
146147
147148
148-
bash ./models/download-ggml-model.sh base.en
149+
sh ./models/download-ggml-model.sh base.en
149150
Downloading ggml model base.en ...
150151
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
151152
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
@@ -448,6 +449,39 @@ cmake -DWHISPER_MKL=ON ..
448449
WHISPER_MKL=1 make -j
449450
```
450451

452+
## Ascend NPU support
453+
454+
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
455+
456+
First, check if your Ascend NPU device is supported:
457+
458+
**Verified devices**
459+
| Ascend NPU | Status |
460+
|:-----------------------------:|:-------:|
461+
| Atlas 300T A2 | Support |
462+
463+
Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
464+
465+
Now build `whisper.cpp` with CANN support:
466+
467+
```
468+
mkdir build
469+
cd build
470+
cmake .. -D GGML_CANN=on
471+
make -j
472+
```
473+
474+
Run the inference examples as usual, for example:
475+
476+
```
477+
./build/bin/main -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
478+
```
479+
480+
*Notes:*
481+
482+
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
483+
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
484+
451485
## Docker
452486

453487
### Prerequisites

bindings/go/Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ EXAMPLES_DIR := $(wildcard examples/*)
1717
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
1818
LIBRARY_PATH := $(abspath ../..)
1919

20+
ifeq ($(GGML_CUDA),1)
21+
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
22+
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
23+
endif
24+
2025
ifeq ($(UNAME_S),Darwin)
2126
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
2227
endif

bindings/go/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
6262
make examples
6363
```
6464

65+
To build using cuda support add `GGML_CUDA=1`:
66+
67+
```bash
68+
GGML_CUDA=1 make examples
69+
```
70+
6571
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
6672

6773
```bash

0 commit comments

Comments
 (0)