Skip to content

sync : ggml #3329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jul 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
03e85da
CUDA: add set rows for f32 and f16 (llama/14551)
am17an Jul 12, 2025
a6b85bc
metal : Add missing unary ops Metal support (llama/14660)
YavorGIvanov Jul 13, 2025
8610c4c
ggml : add build-time message to remind about ggml_set_rows (llama/14…
ggerganov Jul 13, 2025
e6509f7
cuda : add ELU support (llama/14657)
YavorGIvanov Jul 13, 2025
6055fb4
cuda : add set rows for bf16 (llama/14664)
CISC Jul 13, 2025
24643c0
sycl: Batched mulmat rework for oneDNN dispatch (llama/14617)
ShanoToni Jul 14, 2025
e5d8efc
SYCL: use 1D kernel for set_rows (llama/14618)
qnixsynapse Jul 14, 2025
07b4522
ggml : refactor llamafile_sgemm PPC code (llama/14673)
shalinib-ibm Jul 14, 2025
72dae6b
sycl: Hotfix for non dnnl codepath (llama/14677)
ShanoToni Jul 14, 2025
591bc24
cuda: fix build warnings in set-rows.cu (unused variable) (llama/14687)
yeahdongcn Jul 15, 2025
b2653a9
vulkan: add RTE variants for glu/add/sub/mul/div (llama/14653)
jeffbolznv Jul 15, 2025
bf13b82
vulkan: fix noncontig check for mat_mul_id splitting (llama/14683)
jeffbolznv Jul 15, 2025
26f3de1
ggml : add asserts (llama/14720)
ggerganov Jul 16, 2025
9b27887
llama : add high-throughput mode (llama/14363)
ggerganov Jul 16, 2025
c3f7c10
ggml: Add initial WebGPU backend (llama/14521)
reeselevine Jul 16, 2025
9664629
use max work group size for device to replace the magic number (llama…
NeoZhangJianyu Jul 18, 2025
7df67c2
CUDA: set_rows + cpy.cu refactor (llama/14712)
am17an Jul 18, 2025
c0f6c31
cuda : Fix Gemma3n not executed as CUDA_GRAPH on NVGPUs (llama/14741)
ORippler Jul 18, 2025
f476d9b
metal : fuse add, mul + add tests (llama/14596)
ggerganov Jul 18, 2025
74b5c27
sync : ggml
ggerganov Jul 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
Expand Down Expand Up @@ -270,6 +272,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
include/gguf.h)

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
Expand Down
19 changes: 19 additions & 0 deletions ggml/include/ggml-webgpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_WEBGPU_NAME "WebGPU"

// Needed for examples in ggml
GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void);

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ ggml_add_backend(MUSA)
ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU)
ggml_add_backend(OpenCL)

foreach (target ggml-base ggml)
Expand Down
15 changes: 0 additions & 15 deletions ggml/src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,6 @@ static bool ggml_is_view(const struct ggml_tensor * t) {
return t->view_src != NULL;
}

static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
if (a->type != b->type) {
return false;
}
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (a->ne[i] != b->ne[i]) {
return false;
}
if (a->nb[i] != b->nb[i]) {
return false;
}
}
return true;
}

// ops that return true for this function must not use restrict pointers for their backend implementations
static bool ggml_op_can_inplace(enum ggml_op op) {
switch (op) {
Expand Down
7 changes: 7 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
#include "ggml-vulkan.h"
#endif

#ifdef GGML_USE_WEBGPU
#include "ggml-webgpu.h"
#endif

#ifdef GGML_USE_OPENCL
#include "ggml-opencl.h"
#endif
Expand Down Expand Up @@ -173,6 +177,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_VULKAN
register_backend(ggml_backend_vk_reg());
#endif
#ifdef GGML_USE_WEBGPU
register_backend(ggml_backend_webgpu_reg());
#endif
#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
Expand Down
15 changes: 0 additions & 15 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,21 +352,6 @@ ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend) {

// backend copy

static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
if (a->type != b->type) {
return false;
}
for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (a->ne[i] != b->ne[i]) {
return false;
}
if (a->nb[i] != b->nb[i]) {
return false;
}
}
return true;
}

void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");

Expand Down
1 change: 1 addition & 0 deletions ggml/src/ggml-cann/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2090,6 +2090,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
{
// TODO: add support
// ref: https://github.com/ggml-org/llama.cpp/pull/14274
#pragma message("TODO: implement F32, F16, BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)")
return false;
} break;
case GGML_OP_CPY: {
Expand Down
Loading
Loading