Skip to content

ggml: Add initial WebGPU backend #14521

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
63eb646
Minimal setup of webgpu backend with dawn. Just prints out the adapte…
reeselevine May 20, 2025
c0a810e
Initialize webgpu device
reeselevine May 20, 2025
e50335c
Making progress on setting up the backend
reeselevine May 21, 2025
b17b164
Finish more boilerplate/utility functions
reeselevine May 21, 2025
e7071d1
Organize file and work on alloc buffer
reeselevine May 21, 2025
c9a53d2
Add webgpu_context to prepare for actually running some shaders
reeselevine May 22, 2025
8b860a2
Merge remote-tracking branch 'upstream/master' into webgpu
reeselevine Jun 4, 2025
9e0c611
Work on memset and add shader loading
reeselevine Jun 5, 2025
520f595
Work on memset polyfill
reeselevine Jun 5, 2025
2d24a8a
Implement set_tensor as webgpu WriteBuffer, remove host_buffer stubs …
reeselevine Jun 9, 2025
d0480ca
Implement get_tensor and buffer_clear
reeselevine Jun 9, 2025
f8a53ee
Finish rest of setup
reeselevine Jun 11, 2025
39d956d
Start work on compute graph
reeselevine Jun 11, 2025
3d92436
Merge remote-tracking branch 'upstream/master' into webgpu
reeselevine Jun 11, 2025
d036f10
Basic mat mul working
reeselevine Jun 12, 2025
b8a2207
Work on emscripten build
reeselevine Jun 17, 2025
c09bfc5
Basic WebGPU backend instructions
reeselevine Jun 17, 2025
aec3483
Merge remote-tracking branch 'upstream/master' into webgpu
reeselevine Jun 17, 2025
daa58e2
Use EMSCRIPTEN flag
reeselevine Jun 17, 2025
1c396a2
Work on passing ci, implement 4d tensor multiplication
reeselevine Jun 23, 2025
ecb945e
Pass thread safety test
reeselevine Jun 25, 2025
0f0543b
Implement permuting for mul_mat and cpy
reeselevine Jul 3, 2025
2eb7626
minor cleanups
reeselevine Jul 3, 2025
949b851
Merge remote-tracking branch 'upstream/master' into webgpu
reeselevine Jul 3, 2025
cbf4b96
Address feedback
reeselevine Jul 6, 2025
e0d8a71
Remove division by type size in cpy op
reeselevine Jul 6, 2025
40dd1f0
Fix formatting and add github action workflows for vulkan and metal (…
reeselevine Jul 15, 2025
8b31513
Fix name
reeselevine Jul 15, 2025
22f8dd4
Fix macos dawn prefix path
reeselevine Jul 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,69 @@ jobs:
cd build
ctest -L main --verbose --timeout 900

macOS-latest-cmake-arm64-webgpu:
runs-on: macos-14

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: macOS-latest-cmake-arm64-webgpu
evict-old-files: 1d

- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update
brew install curl

- name: Dawn Dependency
id: dawn-depends
run: |
ARTIFACTS_JSON=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/google/dawn/actions/artifacts")
echo "Finding latest macos-latest-Release artifact..."
DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts
| sort_by(.created_at)
| reverse
| map(select(.name | test("macos-latest-Release$")))
| .[0].archive_download_url')
if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then
echo "No suitable Dawn artifact found!"
exit 1
fi
echo "Downloading from: $DOWNLOAD_URL"
curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-o artifact.zip "$DOWNLOAD_URL"
unzip artifact.zip
mkdir dawn
tar_file=$(find . -name '*.tar.gz' | head -n 1)
echo "Extracting: $tar_file"
tar -xvf "$tar_file" -C dawn --strip-components=1

- name: Build
id: cmake_build
run: |
export CMAKE_PREFIX_PATH=dawn
cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900

ubuntu-cpu-cmake:
strategy:
matrix:
Expand Down Expand Up @@ -344,6 +407,72 @@ jobs:
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 3600

ubuntu-22-cmake-webgpu:
runs-on: ubuntu-22.04

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.16
with:
key: ubuntu-22-cmake-webgpu
evict-old-files: 1d

- name: Vulkan SDK Dependencies
id: vulkan-depends
run: |
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt-get update -y
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev

- name: Dawn Dependency
id: dawn-depends
run: |
sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
ARTIFACTS_JSON=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/google/dawn/actions/artifacts")
echo "Finding latest ubuntu-latest-Release artifact..."
DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts
| sort_by(.created_at)
| reverse
| map(select(.name | test("ubuntu-latest-Release$")))
| .[0].archive_download_url')
if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then
echo "No suitable Dawn artifact found!"
exit 1
fi
echo "Downloading from: $DOWNLOAD_URL"
curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-o artifact.zip "$DOWNLOAD_URL"
unzip artifact.zip
mkdir dawn
tar_file=$(find . -name '*.tar.gz' | head -n 1)
echo "Extracting: $tar_file"
tar -xvf "$tar_file" -C dawn --strip-components=1

- name: Build
id: cmake_build
run: |
export Dawn_DIR=dawn/lib64/cmake/Dawn
cmake -B build -DGGML_WEBGPU=ON
cmake --build build --config Release -j $(nproc)

- name: Test
id: cmake_test
run: |
cd build
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 3600

ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.0.2
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
| [Vulkan](docs/build.md#vulkan) | GPU |
| [CANN](docs/build.md#cann) | Ascend NPU |
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |

| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |

## Obtaining and quantizing models
Expand Down
7 changes: 7 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
# # with VULKAN support
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
# # with WebGPU support
# GG_BUILD_WEBGPU=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
# # with MUSA support
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
Expand Down Expand Up @@ -81,6 +84,10 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
fi

if [ ! -z ${GG_BUILD_WEBGPU} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1"
fi

if [ ! -z ${GG_BUILD_MUSA} ]; then
# Use qy1 by default (MTT S80)
MUSA_ARCH=${MUSA_ARCH:-21}
Expand Down
17 changes: 17 additions & 0 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,23 @@ ninja

To read documentation for how to build on Android, [click here](./android.md)

## WebGPU [In Progress]

The WebGPU backend relies on [Dawn](https://dawn.googlesource.com/dawn). Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/quickstart-cmake.md) to install Dawn locally so that llama.cpp can find it using CMake. The currrent implementation is up-to-date with Dawn commit `bed1a61`.

In the llama.cpp directory, build with CMake:

```
cmake -B build -DGGML_WEBGPU=ON
cmake --build build --config Release
```

### Browser Support

WebGPU allows cross-platform access to the GPU from supported browsers. We utilize [Emscripten](https://emscripten.org/) to compile ggml's WebGPU backend to WebAssembly. Emscripten does not officially support WebGPU bindings yet, but Dawn currently maintains its own WebGPU bindings called emdawnwebgpu.

Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/src/emdawnwebgpu/) to download or build the emdawnwebgpu package (Note that it might be safer to build the emdawbwebgpu package locally, so that it stays in sync with the version of Dawn you have installed above). When building using CMake, the path to the emdawnwebgpu port file needs to be set with the flag `EMDAWNWEBGPU_DIR`.

## IBM Z & LinuxONE

To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md)
Expand Down
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
Expand Down Expand Up @@ -270,6 +272,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
Expand Down
19 changes: 19 additions & 0 deletions ggml/include/ggml-webgpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_WEBGPU_NAME "WebGPU"

// Needed for examples in ggml
GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void);

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ ggml_add_backend(MUSA)
ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU)
ggml_add_backend(OpenCL)

foreach (target ggml-base ggml)
Expand Down
7 changes: 7 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
#include "ggml-vulkan.h"
#endif

#ifdef GGML_USE_WEBGPU
#include "ggml-webgpu.h"
#endif

#ifdef GGML_USE_OPENCL
#include "ggml-opencl.h"
#endif
Expand Down Expand Up @@ -173,6 +177,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_VULKAN
register_backend(ggml_backend_vk_reg());
#endif
#ifdef GGML_USE_WEBGPU
register_backend(ggml_backend_webgpu_reg());
#endif
#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
Expand Down
54 changes: 54 additions & 0 deletions ggml/src/ggml-webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
cmake_minimum_required(VERSION 3.13)

find_package(Python3 REQUIRED)

# Shader locations
set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders")
set(SHADER_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
set(SHADER_HEADER "${SHADER_OUTPUT_DIR}/ggml-wgsl-shaders.hpp")
file(MAKE_DIRECTORY ${SHADER_OUTPUT_DIR})

message(STATUS "Shader output dir: ${SHADER_OUTPUT_DIR}")

# Find all WGSL files
file(GLOB WGSL_SHADER_FILES "${SHADER_DIR}/*.wgsl")

# Generate the header using a Python script
add_custom_command(
OUTPUT ${SHADER_HEADER}
COMMAND ${CMAKE_COMMAND} -E echo "Embedding WGSL shaders to ggml-wgsl-shaders.hpp"
COMMAND ${CMAKE_COMMAND} -E make_directory ${SHADER_OUTPUT_DIR}
COMMAND ${CMAKE_COMMAND} -E env PYTHONIOENCODING=utf-8
${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
--input "${SHADER_DIR}"
--output "${SHADER_HEADER}"
DEPENDS ${WGSL_SHADER_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
VERBATIM
)

add_custom_target(generate_shaders DEPENDS ${SHADER_HEADER})

ggml_add_backend_library(ggml-webgpu
ggml-webgpu.cpp
${SHADER_HEADER}
../../include/ggml-webgpu.h
)

add_dependencies(ggml-webgpu generate_shaders)

if(EMSCRIPTEN)
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")

target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
else()
find_package(Dawn REQUIRED)
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
endif()

if (GGML_WEBGPU_DEBUG)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
endif()

target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})
Loading
Loading