Skip to content

Commit 8d719d0

Browse files
IMbackKJohannesGaessler
authored andcommitted
CUDA/HIP: optimize mmv paths taken for HIP devices (ggml-org#14324)
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
1 parent f01962e commit 8d719d0

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,11 @@ static bool fp32_mma_hardware_available(const int cc) {
271271
}
272272

273273
static bool bf16_mma_hardware_available(const int cc) {
274-
return GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE;
274+
return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) || GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3;
275+
}
276+
277+
static bool fp32_mma_hardware_available(const int cc) {
278+
return GGML_CUDA_CC_IS_CDNA(cc);
275279
}
276280

277281
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.

ggml/src/ggml-cuda/mmv.cu

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,11 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
472472
return ne11 <= 4;
473473
}
474474
return ne11 <= 3;
475+
} else if (GGML_CUDA_CC_IS_AMD(cc)) {
476+
if (fp32_mma_hardware_available(cc)) {
477+
return ne11 <= 3;
478+
}
479+
return ne11 <= 8;
475480
}
476481
return ne11 <= 8;
477482
case GGML_TYPE_F16:
@@ -484,6 +489,14 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
484489
return src0_small && ne11 <= 3;
485490
}
486491
return ne11 <= 8;
492+
} else if (GGML_CUDA_CC_IS_AMD(cc)) {
493+
if (fp16_mma_hardware_available(cc)) {
494+
if (GGML_CUDA_CC_IS_RDNA3(cc) || GGML_CUDA_CC_IS_RDNA4(cc)) {
495+
return ne11 <= 5;
496+
}
497+
return ne11 <= 2;
498+
}
499+
return ne11 <= 8;
487500
}
488501
return ne11 <= 8;
489502
case GGML_TYPE_BF16:
@@ -496,6 +509,11 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
496509
return src0_small && ne11 <= 3;
497510
}
498511
return ne11 <= 8;
512+
} else if (GGML_CUDA_CC_IS_AMD(cc)) {
513+
if (bf16_mma_hardware_available(cc)) {
514+
return ne11 <= 3;
515+
}
516+
return ne11 <= 8;
499517
}
500518
return ne11 <= 8;
501519
default:

0 commit comments

Comments
 (0)