musa: disable MUL_MAT_ID (q2_k × f32) due to precision issues

yeahdongcn · yeahdongcn · commit 2923d31a5ca7 · 2025-06-04T18:36:18.000+08:00
Signed-off-by: Xiaodong Ye &lt;xiaodong.ye@mthreads.com&gt;
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3027,6 +3027,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                             a->type == GGML_TYPE_F16 && b->type == GGML_TYPE_F16) {
                         return false;
                     }
+                    if (GGML_CUDA_CC_IS_QY2(cc) && op->op == GGML_OP_MUL_MAT_ID &&
+                            a->type == GGML_TYPE_Q2_K && b->type == GGML_TYPE_F32) {
+                        return false;
+                    }
                 }
 #endif // GGML_USE_MUSA
                 switch (a->type) {
diff --git a/ggml/src/ggml-musa/mudnn.cuh b/ggml/src/ggml-musa/mudnn.cuh
@@ -1,7 +1,7 @@
 #pragma once
 
-#include "../include/ggml.h"
-#include "../ggml-cuda/common.cuh"
+#include "ggml-cuda/common.cuh"
+#include "ggml.h"
 
 // Asynchronously copies data from src tensor to dst tensor using the provided context.
 // Returns a musaError_t indicating success or failure.

Original file line number	Diff line number	Diff line change
`@@ -3027,6 +3027,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g`
`3027`	`3027`	`a->type == GGML_TYPE_F16 && b->type == GGML_TYPE_F16) {`
`3028`	`3028`	`return false;`
`3029`	`3029`	`}`
	`3030`	`+ if (GGML_CUDA_CC_IS_QY2(cc) && op->op == GGML_OP_MUL_MAT_ID &&`
	`3031`	`+ a->type == GGML_TYPE_Q2_K && b->type == GGML_TYPE_F32) {`
	`3032`	`+ return false;`
	`3033`	`+ }`
`3030`	`3034`	`}`
`3031`	`3035`	`#endif // GGML_USE_MUSA`
`3032`	`3036`	`switch (a->type) {`