55
55
QKVParallelLinear ,
56
56
RowParallelLinear )
57
57
from vllm .model_executor .layers .quantization import QuantizationConfig
58
- from vllm .model_executor .layers .quantization .gptq import GPTQConfig
59
- from vllm .model_executor .layers .quantization .gptq_marlin import (
60
- GPTQMarlinConfig )
61
58
from vllm .model_executor .model_loader .weight_utils import default_weight_loader
62
59
from vllm .model_executor .models .module_mapping import MultiModelKeys
63
60
from vllm .multimodal import MULTIMODAL_REGISTRY
@@ -179,20 +176,20 @@ def __init__(
179
176
hidden_features : int ,
180
177
bias : bool = False ,
181
178
quant_config : Optional [QuantizationConfig ] = None ,
179
+ prefix : str = "" ,
182
180
):
183
181
super ().__init__ ()
184
182
self .gate_up_proj = MergedColumnParallelLinear (
185
183
input_size = in_features ,
186
184
output_sizes = [hidden_features ] * 2 ,
187
185
bias = bias ,
188
186
quant_config = quant_config ,
189
- )
190
- self .down_proj = RowParallelLinear (
191
- hidden_features ,
192
- in_features ,
193
- bias = bias ,
194
- quant_config = quant_config ,
195
- )
187
+ prefix = f"{ prefix } .gate_up_proj" )
188
+ self .down_proj = RowParallelLinear (hidden_features ,
189
+ in_features ,
190
+ bias = bias ,
191
+ quant_config = quant_config ,
192
+ prefix = f"{ prefix } .down_proj" )
196
193
self .act_fn = SiluAndMul ()
197
194
198
195
def forward (self , x : torch .Tensor ):
@@ -407,6 +404,7 @@ def __init__(
407
404
mlp_hidden_dim ,
408
405
bias = False ,
409
406
quant_config = quant_config ,
407
+ prefix = f"{ prefix } .mlp" ,
410
408
)
411
409
412
410
def forward (
@@ -1278,7 +1276,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
1278
1276
self .visual = Glm4vVisionTransformer (
1279
1277
config .vision_config ,
1280
1278
norm_eps = getattr (config , "rms_norm_eps" , 1e-5 ),
1281
- quant_config = self . _maybe_ignore_quant_config ( quant_config ) ,
1279
+ quant_config = quant_config ,
1282
1280
prefix = maybe_prefix (prefix , "visual" ),
1283
1281
)
1284
1282
@@ -1291,13 +1289,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
1291
1289
self .make_empty_intermediate_tensors = (
1292
1290
self .language_model .make_empty_intermediate_tensors )
1293
1291
1294
- def _maybe_ignore_quant_config (self , quant_config : QuantizationConfig ):
1295
- # GPTQ configs do not have a list of ignored modules, however AutoGPTQ
1296
- # seems to avoid vision encoder sections for some models.
1297
- if isinstance (quant_config , (GPTQConfig , GPTQMarlinConfig )):
1298
- return None
1299
- return quant_config
1300
-
1301
1292
def _validate_and_reshape_mm_tensor (self , mm_input : object ,
1302
1293
name : str ) -> torch .Tensor :
1303
1294
if not isinstance (mm_input , (torch .Tensor , list )):
0 commit comments