Skip to content

Commit f14a70f

Browse files
Fix KleidiAI compilation errors with -DGGML_NATIVE=OFF (issue #14464)
This commit fixes compilation errors that occur when building with -DGGML_NATIVE=OFF, which resulted in zero-size arrays in KleidiAI code. Changes made: 1. kernels.cpp: - Add conditional compilation around gemm_gemv_kernels array - Provide fallback empty array when no ARM features available - Guard kernel selection functions with feature checks 2. kleidiai.cpp: - Replace GGML_ASSERT(kernels) with null pointer checks - Return appropriate error codes when no kernels available - Prevent crashes when KleidiAI is unavailable 3. CMakeLists.txt: - Add architecture check to only enable KleidiAI on ARM systems - Fix KleidiAI download URL (GitHub -> GitLab) - Use git clone instead of archive download for reliability Fixes: #14464 Tested: Successfully compiles with -DGGML_NATIVE=OFF on x86_64
1 parent 4b91d6f commit f14a70f

File tree

3 files changed

+29
-10
lines changed

3 files changed

+29
-10
lines changed

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
486486
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
487487
endif()
488488

489-
if (GGML_CPU_KLEIDIAI)
489+
if (GGML_CPU_KLEIDIAI AND GGML_CPU_AARCH64 AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64"))
490490
message(STATUS "Using KleidiAI optimized kernels if applicable")
491491

492492
# Disable the KleidiAI tests
@@ -495,17 +495,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
495495
# Fetch KleidiAI sources:
496496
include(FetchContent)
497497
set(KLEIDIAI_COMMIT_TAG "v1.9.0")
498-
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
499-
set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017")
498+
set(KLEIDIAI_DOWNLOAD_URL "https://git.gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_TAG}/kleidiai-${KLEIDIAI_COMMIT_TAG}.tar.gz")
499+
set(KLEIDIAI_ARCHIVE_MD5 "e4c9fcb5de397ba3532d593672d56e95")
500500

501501
if (POLICY CMP0135)
502502
cmake_policy(SET CMP0135 NEW)
503503
endif()
504504

505505
FetchContent_Declare(KleidiAI_Download
506-
URL ${KLEIDIAI_DOWNLOAD_URL}
507-
DOWNLOAD_EXTRACT_TIMESTAMP NEW
508-
URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5})
506+
GIT_REPOSITORY https://git.gitlab.arm.com/kleidi/kleidiai.git
507+
GIT_TAG ${KLEIDIAI_COMMIT_TAG}
508+
GIT_SHALLOW TRUE)
509509

510510
FetchContent_MakeAvailable(KleidiAI_Download)
511511
FetchContent_GetProperties(KleidiAI_Download

ggml/src/ggml-cpu/kleidiai/kernels.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
#include "kernels.h"
2626

2727
#define NELEMS(x) sizeof(x) / sizeof(*x)
28+
29+
// Check if any ARM features are available
30+
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8)
2831
static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
2932
#if defined(__ARM_FEATURE_SME)
3033
{
@@ -304,10 +307,15 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
304307
#endif
305308
#endif
306309
};
310+
#else
311+
// Fallback for when no ARM features are available - provide an empty array
312+
static ggml_kleidiai_kernels gemm_gemv_kernels[1] = {};
313+
#endif
307314

308315
ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, const ggml_tensor * tensor) {
309316
ggml_kleidiai_kernels * kernel = nullptr;
310317

318+
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8)
311319
if (tensor->op == GGML_OP_MUL_MAT && tensor->src[0] != nullptr && tensor->src[1] != nullptr) {
312320
for (size_t i = 0; i < NELEMS(gemm_gemv_kernels); ++i) {
313321
if ((cpu_features & gemm_gemv_kernels[i].required_cpu) == gemm_gemv_kernels[i].required_cpu &&
@@ -319,19 +327,22 @@ ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, c
319327
}
320328
}
321329
}
330+
#endif
322331

323332
return kernel;
324333
}
325334

326335
ggml_kleidiai_kernels * ggml_kleidiai_select_kernels_q4_0(cpu_feature features) {
327336
ggml_kleidiai_kernels * kernels = nullptr;
328337

338+
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8)
329339
for (size_t i = 0; i < NELEMS(gemm_gemv_kernels); ++i) {
330340
if ((features & gemm_gemv_kernels[i].required_cpu) == gemm_gemv_kernels[i].required_cpu) {
331341
kernels = &gemm_gemv_kernels[i];
332342
break;
333343
}
334344
}
345+
#endif
335346

336347
return kernels;
337348
}

ggml/src/ggml-cpu/kleidiai/kleidiai.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ static void transpose_f32kxn_f16nxk(size_t n, size_t k, float * dst, const uint1
103103
class tensor_traits : public ggml::cpu::tensor_traits {
104104
bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override {
105105
ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, op);
106-
GGML_ASSERT(kernels);
106+
if (!kernels) {
107+
return false; // No suitable kernel available
108+
}
107109
kernel_info * kernel = op->src[1]->ne[1] == 1 ? &kernels->gemv : &kernels->gemm;
108110

109111
size_t k = op->src[0]->ne[0];
@@ -148,7 +150,9 @@ class tensor_traits : public ggml::cpu::tensor_traits {
148150
GGML_TENSOR_BINARY_OP_LOCALS
149151

150152
ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst);
151-
GGML_ASSERT(kernels);
153+
if (!kernels) {
154+
return false; // No suitable kernel available
155+
}
152156

153157
kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm;
154158
GGML_ASSERT(kernel);
@@ -276,7 +280,9 @@ class tensor_traits : public ggml::cpu::tensor_traits {
276280
GGML_TENSOR_BINARY_OP_LOCALS
277281

278282
ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst);
279-
GGML_ASSERT(kernels);
283+
if (!kernels) {
284+
return false; // No suitable kernel available
285+
}
280286

281287
kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm;
282288
lhs_packing_info * lhs_info = &kernels->lhs_info;
@@ -344,7 +350,9 @@ class tensor_traits : public ggml::cpu::tensor_traits {
344350

345351
public:
346352
int repack(struct ggml_tensor * tensor, const void * data, size_t data_size) {
347-
GGML_ASSERT(ctx.kernels);
353+
if (!ctx.kernels) {
354+
return -1; // No suitable kernel available
355+
}
348356
const size_t n = tensor->ne[1];
349357
const size_t k = tensor->ne[0];
350358
size_t nr = ctx.kernels->gemm.get_nr();

0 commit comments

Comments
 (0)