Skip to content

Commit e96a74a

Browse files
committed
Reapply "CUDA: faster Deepseek FA, add Turing support (ggml-org#13435)"
1 parent 7d7057b commit e96a74a

File tree

4 files changed

+270
-65
lines changed

4 files changed

+270
-65
lines changed

ggml/src/ggml-cuda/fattn-common.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,10 +826,14 @@ void launch_fattn(
826826
) {
827827
constexpr int ncols = ncols1 * ncols2;
828828

829+
const bool is_mla = DV == 512; // TODO better parameterization
830+
829831
const ggml_tensor * Q = dst->src[0];
830832
const ggml_tensor * K = dst->src[1];
831833
const ggml_tensor * V = dst->src[2];
832834

835+
GGML_ASSERT(V || is_mla);
836+
833837
const ggml_tensor * mask = dst->src[3];
834838

835839
ggml_tensor * KQV = dst;

0 commit comments

Comments
 (0)