We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b526478 commit c791a85Copy full SHA for c791a85
vllm/model_executor/layers/utils.py
@@ -84,7 +84,7 @@ def rocm_unquantized_gemm(x: torch.Tensor,
84
m = weight.shape[0]
85
cu_count = current_platform.get_cu_count()
86
87
- if m > 8 and 0 < n < 4:
+ if m > 8 and 0 < n <= 4:
88
out = ops.wvSplitK(weight, x_view, cu_count)
89
return out.view(*x.shape[:-1], weight.shape[0])
90
elif m % 4 == 0 and n == 1 and k <= 8192:
vllm/platforms/rocm.py
@@ -104,6 +104,7 @@ def device_id_to_physical_device_id(device_id: int) -> int:
104
return device_id
105
106
107
+@cache
108
def on_mi250_mi300() -> bool:
109
GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
110
return any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942"])
0 commit comments