[Bugfix] Fix Machete zero point issue for GPTQ models on SM90 (#21066)

mgoin · web-flow · commit 28a6d5423db6 · 2025-07-16T19:54:45.000-07:00
Signed-off-by: mgoin &lt;mgoin64@gmail.com&gt;
diff --git a/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py b/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
@@ -126,6 +126,11 @@ def apply_weights(self,
         if c.has_g_idx:
             x_2d = self.act_perm(x_2d)
 
+        if c.zero_points:
+            assert w_zp is not None
+        else:
+            w_zp = None
+
         output = ops.machete_mm(a=x_2d,
                                 b_q=w_q,
                                 b_type=c.weight_type,