[Bugfix] Restrict Machete to only run on Hopper (vllm-project#20830)

mgoin · py-andy-c · commit 728f7a24520b · 2025-07-14T23:19:26.000Z
Signed-off-by: mgoin &lt;mgoin64@gmail.com&gt;
diff --git a/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py b/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
@@ -32,6 +32,9 @@ def can_implement(cls,
         if not current_platform.is_cuda():
             return False, "Machete only supported on CUDA"
 
+        if not current_platform.is_device_capability(90):
+            return False, "Machete requires compute capability of 90 (Hopper)"
+
         if c.has_g_idx and\
             c.partition_weight_shape[0] != c.full_weight_shape[0]:
             return False, "Act reordering currently not supported by Machete, "\