Skip to content

Commit 2923d31

Browse files
committed
musa: disable MUL_MAT_ID (q2_k × f32) due to precision issues
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
1 parent 1e44f3b commit 2923d31

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3027,6 +3027,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
30273027
a->type == GGML_TYPE_F16 && b->type == GGML_TYPE_F16) {
30283028
return false;
30293029
}
3030+
if (GGML_CUDA_CC_IS_QY2(cc) && op->op == GGML_OP_MUL_MAT_ID &&
3031+
a->type == GGML_TYPE_Q2_K && b->type == GGML_TYPE_F32) {
3032+
return false;
3033+
}
30303034
}
30313035
#endif // GGML_USE_MUSA
30323036
switch (a->type) {

ggml/src/ggml-musa/mudnn.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
22

3-
#include "../include/ggml.h"
4-
#include "../ggml-cuda/common.cuh"
3+
#include "ggml-cuda/common.cuh"
4+
#include "ggml.h"
55

66
// Asynchronously copies data from src tensor to dst tensor using the provided context.
77
// Returns a musaError_t indicating success or failure.

0 commit comments

Comments
 (0)