Skip to content

Commit 21c0217

Browse files
authored
ggml: Add initial WebGPU backend (#14521)
* Minimal setup of webgpu backend with dawn. Just prints out the adapter and segfaults * Initialize webgpu device * Making progress on setting up the backend * Finish more boilerplate/utility functions * Organize file and work on alloc buffer * Add webgpu_context to prepare for actually running some shaders * Work on memset and add shader loading * Work on memset polyfill * Implement set_tensor as webgpu WriteBuffer, remove host_buffer stubs since webgpu doesn't support it * Implement get_tensor and buffer_clear * Finish rest of setup * Start work on compute graph * Basic mat mul working * Work on emscripten build * Basic WebGPU backend instructions * Use EMSCRIPTEN flag * Work on passing ci, implement 4d tensor multiplication * Pass thread safety test * Implement permuting for mul_mat and cpy * minor cleanups * Address feedback * Remove division by type size in cpy op * Fix formatting and add github action workflows for vulkan and metal (m-series) webgpu backends * Fix name * Fix macos dawn prefix path
1 parent b0f0ecc commit 21c0217

File tree

14 files changed

+1337
-0
lines changed

14 files changed

+1337
-0
lines changed

.github/workflows/build.yml

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,69 @@ jobs:
135135
cd build
136136
ctest -L main --verbose --timeout 900
137137
138+
macOS-latest-cmake-arm64-webgpu:
139+
runs-on: macos-14
140+
141+
steps:
142+
- name: Clone
143+
id: checkout
144+
uses: actions/checkout@v4
145+
146+
- name: ccache
147+
uses: hendrikmuhs/ccache-action@v1.2.16
148+
with:
149+
key: macOS-latest-cmake-arm64-webgpu
150+
evict-old-files: 1d
151+
152+
- name: Dependencies
153+
id: depends
154+
continue-on-error: true
155+
run: |
156+
brew update
157+
brew install curl
158+
159+
- name: Dawn Dependency
160+
id: dawn-depends
161+
run: |
162+
ARTIFACTS_JSON=$(curl -s -L \
163+
-H "Accept: application/vnd.github+json" \
164+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
165+
-H "X-GitHub-Api-Version: 2022-11-28" \
166+
"https://api.github.com/repos/google/dawn/actions/artifacts")
167+
echo "Finding latest macos-latest-Release artifact..."
168+
DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts
169+
| sort_by(.created_at)
170+
| reverse
171+
| map(select(.name | test("macos-latest-Release$")))
172+
| .[0].archive_download_url')
173+
if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then
174+
echo "No suitable Dawn artifact found!"
175+
exit 1
176+
fi
177+
echo "Downloading from: $DOWNLOAD_URL"
178+
curl -L \
179+
-H "Accept: application/vnd.github+json" \
180+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
181+
-o artifact.zip "$DOWNLOAD_URL"
182+
unzip artifact.zip
183+
mkdir dawn
184+
tar_file=$(find . -name '*.tar.gz' | head -n 1)
185+
echo "Extracting: $tar_file"
186+
tar -xvf "$tar_file" -C dawn --strip-components=1
187+
188+
- name: Build
189+
id: cmake_build
190+
run: |
191+
export CMAKE_PREFIX_PATH=dawn
192+
cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF
193+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
194+
195+
- name: Test
196+
id: cmake_test
197+
run: |
198+
cd build
199+
ctest -L main --verbose --timeout 900
200+
138201
ubuntu-cpu-cmake:
139202
strategy:
140203
matrix:
@@ -344,6 +407,72 @@ jobs:
344407
# This is using llvmpipe and runs slower than other backends
345408
ctest -L main --verbose --timeout 4200
346409
410+
ubuntu-22-cmake-webgpu:
411+
runs-on: ubuntu-22.04
412+
413+
steps:
414+
- name: Clone
415+
id: checkout
416+
uses: actions/checkout@v4
417+
418+
- name: ccache
419+
uses: hendrikmuhs/ccache-action@v1.2.16
420+
with:
421+
key: ubuntu-22-cmake-webgpu
422+
evict-old-files: 1d
423+
424+
- name: Vulkan SDK Dependencies
425+
id: vulkan-depends
426+
run: |
427+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
428+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
429+
sudo apt-get update -y
430+
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
431+
432+
- name: Dawn Dependency
433+
id: dawn-depends
434+
run: |
435+
sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
436+
ARTIFACTS_JSON=$(curl -s -L \
437+
-H "Accept: application/vnd.github+json" \
438+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
439+
-H "X-GitHub-Api-Version: 2022-11-28" \
440+
"https://api.github.com/repos/google/dawn/actions/artifacts")
441+
echo "Finding latest ubuntu-latest-Release artifact..."
442+
DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts
443+
| sort_by(.created_at)
444+
| reverse
445+
| map(select(.name | test("ubuntu-latest-Release$")))
446+
| .[0].archive_download_url')
447+
if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then
448+
echo "No suitable Dawn artifact found!"
449+
exit 1
450+
fi
451+
echo "Downloading from: $DOWNLOAD_URL"
452+
curl -L \
453+
-H "Accept: application/vnd.github+json" \
454+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
455+
-o artifact.zip "$DOWNLOAD_URL"
456+
unzip artifact.zip
457+
mkdir dawn
458+
tar_file=$(find . -name '*.tar.gz' | head -n 1)
459+
echo "Extracting: $tar_file"
460+
tar -xvf "$tar_file" -C dawn --strip-components=1
461+
462+
- name: Build
463+
id: cmake_build
464+
run: |
465+
export Dawn_DIR=dawn/lib64/cmake/Dawn
466+
cmake -B build -DGGML_WEBGPU=ON
467+
cmake --build build --config Release -j $(nproc)
468+
469+
- name: Test
470+
id: cmake_test
471+
run: |
472+
cd build
473+
# This is using llvmpipe and runs slower than other backends
474+
ctest -L main --verbose --timeout 3600
475+
347476
ubuntu-22-cmake-hip:
348477
runs-on: ubuntu-22.04
349478
container: rocm/dev-ubuntu-22.04:6.0.2

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
269269
| [Vulkan](docs/build.md#vulkan) | GPU |
270270
| [CANN](docs/build.md#cann) | Ascend NPU |
271271
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
272+
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
273+
272274
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
273275

274276
## Obtaining and quantizing models

ci/run.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
# # with VULKAN support
1717
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1818
#
19+
# # with WebGPU support
20+
# GG_BUILD_WEBGPU=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
21+
#
1922
# # with MUSA support
2023
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
2124
#
@@ -81,6 +84,10 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then
8184
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
8285
fi
8386

87+
if [ ! -z ${GG_BUILD_WEBGPU} ]; then
88+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1"
89+
fi
90+
8491
if [ ! -z ${GG_BUILD_MUSA} ]; then
8592
# Use qy1 by default (MTT S80)
8693
MUSA_ARCH=${MUSA_ARCH:-21}

docs/build.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,23 @@ ninja
557557

558558
To read documentation for how to build on Android, [click here](./android.md)
559559

560+
## WebGPU [In Progress]
561+
562+
The WebGPU backend relies on [Dawn](https://dawn.googlesource.com/dawn). Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/quickstart-cmake.md) to install Dawn locally so that llama.cpp can find it using CMake. The currrent implementation is up-to-date with Dawn commit `bed1a61`.
563+
564+
In the llama.cpp directory, build with CMake:
565+
566+
```
567+
cmake -B build -DGGML_WEBGPU=ON
568+
cmake --build build --config Release
569+
```
570+
571+
### Browser Support
572+
573+
WebGPU allows cross-platform access to the GPU from supported browsers. We utilize [Emscripten](https://emscripten.org/) to compile ggml's WebGPU backend to WebAssembly. Emscripten does not officially support WebGPU bindings yet, but Dawn currently maintains its own WebGPU bindings called emdawnwebgpu.
574+
575+
Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/src/emdawnwebgpu/) to download or build the emdawnwebgpu package (Note that it might be safer to build the emdawbwebgpu package locally, so that it stays in sync with the version of Dawn you have installed above). When building using CMake, the path to the emdawnwebgpu port file needs to be set with the flag `EMDAWNWEBGPU_DIR`.
576+
560577
## IBM Z & LinuxONE
561578

562579
To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)

ggml/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
181181
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
182182
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
183183
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
184+
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
185+
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
184186
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
185187
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
186188
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
@@ -270,6 +272,7 @@ set(GGML_PUBLIC_HEADERS
270272
include/ggml-rpc.h
271273
include/ggml-sycl.h
272274
include/ggml-vulkan.h
275+
include/ggml-webgpu.h
273276
include/gguf.h)
274277

275278
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

ggml/include/ggml-webgpu.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include "ggml.h"
4+
#include "ggml-backend.h"
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
#define GGML_WEBGPU_NAME "WebGPU"
11+
12+
// Needed for examples in ggml
13+
GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void);
14+
15+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void);
16+
17+
#ifdef __cplusplus
18+
}
19+
#endif

ggml/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ ggml_add_backend(MUSA)
370370
ggml_add_backend(RPC)
371371
ggml_add_backend(SYCL)
372372
ggml_add_backend(Vulkan)
373+
ggml_add_backend(WebGPU)
373374
ggml_add_backend(OpenCL)
374375

375376
foreach (target ggml-base ggml)

ggml/src/ggml-backend-reg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@
4545
#include "ggml-vulkan.h"
4646
#endif
4747

48+
#ifdef GGML_USE_WEBGPU
49+
#include "ggml-webgpu.h"
50+
#endif
51+
4852
#ifdef GGML_USE_OPENCL
4953
#include "ggml-opencl.h"
5054
#endif
@@ -173,6 +177,9 @@ struct ggml_backend_registry {
173177
#ifdef GGML_USE_VULKAN
174178
register_backend(ggml_backend_vk_reg());
175179
#endif
180+
#ifdef GGML_USE_WEBGPU
181+
register_backend(ggml_backend_webgpu_reg());
182+
#endif
176183
#ifdef GGML_USE_OPENCL
177184
register_backend(ggml_backend_opencl_reg());
178185
#endif

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
cmake_minimum_required(VERSION 3.13)
2+
3+
find_package(Python3 REQUIRED)
4+
5+
# Shader locations
6+
set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders")
7+
set(SHADER_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
8+
set(SHADER_HEADER "${SHADER_OUTPUT_DIR}/ggml-wgsl-shaders.hpp")
9+
file(MAKE_DIRECTORY ${SHADER_OUTPUT_DIR})
10+
11+
message(STATUS "Shader output dir: ${SHADER_OUTPUT_DIR}")
12+
13+
# Find all WGSL files
14+
file(GLOB WGSL_SHADER_FILES "${SHADER_DIR}/*.wgsl")
15+
16+
# Generate the header using a Python script
17+
add_custom_command(
18+
OUTPUT ${SHADER_HEADER}
19+
COMMAND ${CMAKE_COMMAND} -E echo "Embedding WGSL shaders to ggml-wgsl-shaders.hpp"
20+
COMMAND ${CMAKE_COMMAND} -E make_directory ${SHADER_OUTPUT_DIR}
21+
COMMAND ${CMAKE_COMMAND} -E env PYTHONIOENCODING=utf-8
22+
${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
23+
--input "${SHADER_DIR}"
24+
--output "${SHADER_HEADER}"
25+
DEPENDS ${WGSL_SHADER_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
26+
VERBATIM
27+
)
28+
29+
add_custom_target(generate_shaders DEPENDS ${SHADER_HEADER})
30+
31+
ggml_add_backend_library(ggml-webgpu
32+
ggml-webgpu.cpp
33+
${SHADER_HEADER}
34+
../../include/ggml-webgpu.h
35+
)
36+
37+
add_dependencies(ggml-webgpu generate_shaders)
38+
39+
if(EMSCRIPTEN)
40+
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")
41+
42+
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
43+
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
44+
else()
45+
find_package(Dawn REQUIRED)
46+
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
47+
endif()
48+
49+
if (GGML_WEBGPU_DEBUG)
50+
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
51+
endif()
52+
53+
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
54+
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})

0 commit comments

Comments
 (0)