From 77c1d55e1e99e2a0a97cbf313df405bf0d83848a Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 3 Jul 2025 14:39:49 +0000 Subject: [PATCH 1/9] add deepseek_v3 awq mapping Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- src/llmcompressor/modifiers/awq/mappings.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index 693406ec3..7794e6425 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -116,6 +116,21 @@ class AWQMapping: ), ] +# DeepseekV3 +_deepseek_mappings = [ + AWQMapping( + "re:.*input_layernorm$", + ["re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], + ), + AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), + AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]), + AWQMapping( + "re:.*post_attention_layernorm$", + ["re:.*gate_proj$", "re:.*up_proj$"], + ), + AWQMapping("re:.*up_proj$", ["re:.*down_proj$"]), +] + AWQ_MAPPING_REGISTRY: Dict[str, list[AWQMapping]] = { "CohereForCausalLM": _cohere_mappings, "Cohere2ForCausalLM": _cohere_mappings, @@ -131,6 +146,7 @@ class AWQMapping: "Qwen2MoeForCausalLM": _moe_default_mappings, "Qwen3ForCausalLM": _default_mappings, "Qwen3MoeForCausalLM": _moe_default_mappings, + "DeepseekV3ForCausalLM": _deepseek_mappings, } From 0434b1f55fad0ec193c35e15c0d605797d69a8db Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 3 Jul 2025 15:04:38 +0000 Subject: [PATCH 2/9] sort `AWQ_MAPPING_REGISTRY` in alphabetical order Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- src/llmcompressor/modifiers/awq/mappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index 7794e6425..ca6313163 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -134,6 +134,7 @@ class AWQMapping: AWQ_MAPPING_REGISTRY: Dict[str, list[AWQMapping]] = { "CohereForCausalLM": _cohere_mappings, "Cohere2ForCausalLM": _cohere_mappings, + "DeepseekV3ForCausalLM": _deepseek_mappings, "Gemma2ForCausalLM": _gemma_mappings, "Gemma3ForCausalLM": _gemma_mappings, "Gemma3ForConditionalGeneration": _gemma_mappings, @@ -146,7 +147,6 @@ class AWQMapping: "Qwen2MoeForCausalLM": _moe_default_mappings, "Qwen3ForCausalLM": _default_mappings, "Qwen3MoeForCausalLM": _moe_default_mappings, - "DeepseekV3ForCausalLM": _deepseek_mappings, } From 11d17ec4a9285848ed398dc5ace58b11a69e5c05 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 14 Jul 2025 13:08:16 -0600 Subject: [PATCH 3/9] include q_proj in mapping Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/mappings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index ca6313163..55e9aaf26 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -120,7 +120,8 @@ class AWQMapping: _deepseek_mappings = [ AWQMapping( "re:.*input_layernorm$", - ["re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], + # Some models use q_proj + ["re:.*q_proj$", "re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], ), AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]), From 2b1e644cd93c34b3958967944ddc1deb0d25ffe1 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 14 Jul 2025 13:09:26 -0600 Subject: [PATCH 4/9] comment update Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/mappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index 55e9aaf26..ea8155136 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -120,7 +120,7 @@ class AWQMapping: _deepseek_mappings = [ AWQMapping( "re:.*input_layernorm$", - # Some models use q_proj + # Some models use q_proj instead of q_a_proj ["re:.*q_proj$", "re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], ), AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), From c6ca36c6640fc8149aa308c70169e9a7ab9cfba1 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Mon, 14 Jul 2025 14:20:10 -0600 Subject: [PATCH 5/9] fix OR regex mapping Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/mappings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/mappings.py b/src/llmcompressor/modifiers/awq/mappings.py index ea8155136..743f14e85 100644 --- a/src/llmcompressor/modifiers/awq/mappings.py +++ b/src/llmcompressor/modifiers/awq/mappings.py @@ -121,7 +121,7 @@ class AWQMapping: AWQMapping( "re:.*input_layernorm$", # Some models use q_proj instead of q_a_proj - ["re:.*q_proj$", "re:.*q_a_proj$", "re:.*kv_a_proj_with_mqa$"], + ["re:.*(q|q_a)_proj$", "re:.*kv_a_proj_with_mqa$"], ), AWQMapping("re:.*q_a_layernorm$", ["re:.*q_b_proj$"]), AWQMapping("re:.*kv_a_layernorm$", ["re:.*kv_b_proj$"]), From 5288bec95bb13136d8c7a2b2be0ad644d70f6baa Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Wed, 16 Jul 2025 02:19:28 +0900 Subject: [PATCH 6/9] decrease memory when calculating w_mean Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 7d5c3671f..6e533cc1a 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -465,11 +465,13 @@ def _apply_smoothing(self, model: Module) -> None: # Calculates the relative magnitude of the weights within # each of the quantization groups, and rescales each group # individually so that each group has weights on a 0-1 scale. - w_scale = weight.abs() / (weight.abs().amax(dim=1, keepdim=True) + 1e-6) + weight.abs_() + weight.div_(weight.amax(dim=1, keepdim=True) + 1e-6) # Resizes the rescaled weight matrix back up to its original dimensions - w_scale = w_scale.view(org_shape) + weight = weight.view(org_shape) # Gets the average rescaled magnitude for each output channel - w_mean = w_scale.mean(0) + w_mean = weight.mean(0) + del weight with calibration_forward_context(model), HooksMixin.disable_hooks(): # [STEP 3]: Compute output of module From 2bc623f0dab09ff3e3cde27fb84c9851d4c41e30 Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Wed, 16 Jul 2025 16:00:07 +0000 Subject: [PATCH 7/9] bump `transformers>=4.52` to import `Llama4Config` from `transformers.models` Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 04de6484d..6610467cf 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ def localversion_func(version: ScmVersion) -> str: "tqdm>=4.0.0", # torch 1.10 and 1.11 do not support quantized onnx export "torch>=1.7.0,!=1.10,!=1.11", - "transformers>4.0", + "transformers>=4.52.0", "datasets", "accelerate>=0.20.3,!=1.1.0", "pynvml", From 10a3cc2e34d076c8ac538e5b9021a5a57e7147ce Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 17 Jul 2025 09:33:43 +0900 Subject: [PATCH 8/9] Update setup.py Co-authored-by: Brian Dellabetta Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6610467cf..04de6484d 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ def localversion_func(version: ScmVersion) -> str: "tqdm>=4.0.0", # torch 1.10 and 1.11 do not support quantized onnx export "torch>=1.7.0,!=1.10,!=1.11", - "transformers>=4.52.0", + "transformers>4.0", "datasets", "accelerate>=0.20.3,!=1.1.0", "pynvml", From a37bba3cfbfbdd81c6ea7f76204a3f1c9a104c4b Mon Sep 17 00:00:00 2001 From: cjackal <44624812+cjackal@users.noreply.github.com> Date: Thu, 17 Jul 2025 00:38:36 +0000 Subject: [PATCH 9/9] move `Llama4Config` importation path for compatibility with wider transformers versions Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com> --- src/llmcompressor/modeling/llama4.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/llmcompressor/modeling/llama4.py b/src/llmcompressor/modeling/llama4.py index 02e3dc8fc..78f9d8ab2 100644 --- a/src/llmcompressor/modeling/llama4.py +++ b/src/llmcompressor/modeling/llama4.py @@ -1,8 +1,10 @@ from typing import Tuple import torch -from transformers.models import Llama4Config -from transformers.models.llama4.configuration_llama4 import Llama4TextConfig +from transformers.models.llama4.configuration_llama4 import ( + Llama4Config, + Llama4TextConfig, +) from transformers.models.llama4.modeling_llama4 import ( Llama4TextExperts, Llama4TextMLP,