Skip to content

Commit 53f1715

Browse files
zRzRzRzRzRzRzRChen-zexi
authored andcommitted
[Misc] Remove _maybe_ignore_quant_config from GLM4.1v (vllm-project#20432)
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
1 parent 96f9321 commit 53f1715

File tree

1 file changed

+9
-18
lines changed

1 file changed

+9
-18
lines changed

vllm/model_executor/models/glm4_1v.py

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@
5555
QKVParallelLinear,
5656
RowParallelLinear)
5757
from vllm.model_executor.layers.quantization import QuantizationConfig
58-
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
59-
from vllm.model_executor.layers.quantization.gptq_marlin import (
60-
GPTQMarlinConfig)
6158
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
6259
from vllm.model_executor.models.module_mapping import MultiModelKeys
6360
from vllm.multimodal import MULTIMODAL_REGISTRY
@@ -179,20 +176,20 @@ def __init__(
179176
hidden_features: int,
180177
bias: bool = False,
181178
quant_config: Optional[QuantizationConfig] = None,
179+
prefix: str = "",
182180
):
183181
super().__init__()
184182
self.gate_up_proj = MergedColumnParallelLinear(
185183
input_size=in_features,
186184
output_sizes=[hidden_features] * 2,
187185
bias=bias,
188186
quant_config=quant_config,
189-
)
190-
self.down_proj = RowParallelLinear(
191-
hidden_features,
192-
in_features,
193-
bias=bias,
194-
quant_config=quant_config,
195-
)
187+
prefix=f"{prefix}.gate_up_proj")
188+
self.down_proj = RowParallelLinear(hidden_features,
189+
in_features,
190+
bias=bias,
191+
quant_config=quant_config,
192+
prefix=f"{prefix}.down_proj")
196193
self.act_fn = SiluAndMul()
197194

198195
def forward(self, x: torch.Tensor):
@@ -407,6 +404,7 @@ def __init__(
407404
mlp_hidden_dim,
408405
bias=False,
409406
quant_config=quant_config,
407+
prefix=f"{prefix}.mlp",
410408
)
411409

412410
def forward(
@@ -1278,7 +1276,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12781276
self.visual = Glm4vVisionTransformer(
12791277
config.vision_config,
12801278
norm_eps=getattr(config, "rms_norm_eps", 1e-5),
1281-
quant_config=self._maybe_ignore_quant_config(quant_config),
1279+
quant_config=quant_config,
12821280
prefix=maybe_prefix(prefix, "visual"),
12831281
)
12841282

@@ -1291,13 +1289,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12911289
self.make_empty_intermediate_tensors = (
12921290
self.language_model.make_empty_intermediate_tensors)
12931291

1294-
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
1295-
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
1296-
# seems to avoid vision encoder sections for some models.
1297-
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
1298-
return None
1299-
return quant_config
1300-
13011292
def _validate_and_reshape_mm_tensor(self, mm_input: object,
13021293
name: str) -> torch.Tensor:
13031294
if not isinstance(mm_input, (torch.Tensor, list)):

0 commit comments

Comments
 (0)