Address feedback

reeselevine · reeselevine · commit cbf4b96924bc · 2025-07-06T08:40:28.000-07:00
diff --git a/docs/build.md b/docs/build.md
@@ -559,7 +559,7 @@ To read documentation for how to build on Android, [click here](./android.md)
 
 ## WebGPU [In Progress]
 
-The WebGPU backend relies on [Dawn](https://dawn.googlesource.com/dawn). Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/quickstart-cmake.md) to install Dawn locally so that llama.cpp can find it using CMake.
+The WebGPU backend relies on [Dawn](https://dawn.googlesource.com/dawn). Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/quickstart-cmake.md) to install Dawn locally so that llama.cpp can find it using CMake. The currrent implementation is up-to-date with Dawn commit `bed1a61`.
 
 In the llama.cpp directory, build with CMake:
 
diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp
@@ -264,8 +264,8 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
             uint32_t * params = (uint32_t *) ctx->cpy_params_host_buf.GetMappedRange();
             uint32_t ne = (uint32_t)ggml_nelements(node);
             params[0] = ne;
-            params[1] = src_misalignment;
-            params[2] = dst_misalignment;
+            params[1] = src_misalignment/ggml_type_size(src->type);
+            params[2] = dst_misalignment/ggml_type_size(node->type);
 
             // Convert byte-strides to element-strides
             params[3] = (uint32_t)src->nb[0]/ggml_type_size(src->type);
@@ -881,10 +881,11 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() {
     ctx.name = GGML_WEBGPU_NAME;
     ctx.device_count = 1;
 
-
-    wgpu::InstanceDescriptor instanceDescriptor{};
-    instanceDescriptor.capabilities.timedWaitAnyEnable = true;
-    webgpu_ctx->instance = wgpu::CreateInstance(&instanceDescriptor);
+    wgpu::InstanceDescriptor instance_descriptor{};
+    std::vector<wgpu::InstanceFeatureName> instance_features = {wgpu::InstanceFeatureName::TimedWaitAny};
+    instance_descriptor.requiredFeatures = instance_features.data();
+    instance_descriptor.requiredFeatureCount = instance_features.size();
+    webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor);
     GGML_ASSERT(webgpu_ctx->instance != nullptr);
 
     static ggml_backend_reg reg = {
diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl
@@ -8,8 +8,8 @@ var<storage, read_write> dst: array<f16>;
 
 struct Params {
     ne: u32,            // total number of elements
-    offset_src: u32,    // in bytes
-    offset_dst: u32,    // in bytes
+    offset_src: u32,    // in elements
+    offset_dst: u32,    // in elements
 
     // Strides (in elements) — may be permuted
     stride_src0: u32,
diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl
@@ -2,6 +2,7 @@ struct MulMatParams {
     m: u32,
     n: u32,
     k: u32,
+    // all strides are in elements
     stride_01: u32,
     stride_11: u32,
     stride_02: u32,
@@ -16,7 +17,7 @@ struct MulMatParams {
 };
 
 @group(0) @binding(0) var<storage, read_write> src0: array<f32>; // N rows, K columns
-@group(0) @binding(1) var<storage, read_write> src1: array<f32>; // M rows, K columns
+@group(0) @binding(1) var<storage, read_write> src1: array<f32>; // M rows, K columns (transposed)
 @group(0) @binding(2) var<storage, read_write> dst: array<f32>; // M rows, N columns
 
 @group(0) @binding(3) var<uniform> params: MulMatParams;