vllm-project · cjackal · Jul 3, 2025 · Jul 3, 2025 · Jul 14, 2025 · Jul 14, 2025
diff --git a/setup.py b/setup.py
@@ -119,7 +119,7 @@ def localversion_func(version: ScmVersion) -> str:
         "tqdm>=4.0.0",
         # torch 1.10 and 1.11 do not support quantized onnx export
         "torch>=1.7.0,!=1.10,!=1.11",
-        "transformers>4.0",
+        "transformers>=4.52.0",
         "datasets",
         "accelerate>=0.20.3,!=1.1.0",
         "pynvml",

diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -465,11 +465,13 @@ def _apply_smoothing(self, model: Module) -> None:
             # Calculates the relative magnitude of the weights within
             # each of the quantization groups, and rescales each group
             # individually so that each group has weights on a 0-1 scale.
-            w_scale = weight.abs() / (weight.abs().amax(dim=1, keepdim=True) + 1e-6)
+            weight.abs_()
+            weight.div_(weight.amax(dim=1, keepdim=True) + 1e-6)
             # Resizes the rescaled weight matrix back up to its original dimensions
-            w_scale = w_scale.view(org_shape)
+            weight = weight.view(org_shape)
             # Gets the average rescaled magnitude for each output channel
-            w_mean = w_scale.mean(0)
+            w_mean = weight.mean(0)
+            del weight
 
             with calibration_forward_context(model), HooksMixin.disable_hooks():
                 # [STEP 3]: Compute output of module

diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py
@@ -116,9 +116,26 @@ class AWQMapping:
     ),
 ]
 
+# DeepseekV3
+_deepseek_mappings = [
+    AWQMapping(
+        "re:.*input_layernorm$",
+        # Some models use q_proj instead of q_a_proj
+        ["re:.*(q|q_a)_proj$", "re:.*kv_a_proj_with_mqa$"],
+    ),
+    AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]),
+    AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]),
+    AWQMapping(
+        "re:.*post_attention_layernorm$",
+        ["re:.*gate_proj$", "re:.*up_proj$"],
+    ),
+    AWQMapping("re:.*up_proj$", ["re:.*down_proj$"]),
+]
+
 AWQ_MAPPING_REGISTRY: Dict[str, list[AWQMapping]] = {
     "CohereForCausalLM": _cohere_mappings,
     "Cohere2ForCausalLM": _cohere_mappings,
+    "DeepseekV3ForCausalLM": _deepseek_mappings,
     "Gemma2ForCausalLM": _gemma_mappings,
     "Gemma3ForCausalLM": _gemma_mappings,
     "Gemma3ForConditionalGeneration": _gemma_mappings,