Skip to content

Commit 9487035

Browse files
authored
[Quantization] Bump compressed-tensors version; update NVFP4A16 test model (#19224)
Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com>
1 parent 0d49483 commit 9487035

File tree

3 files changed

+5
-6
lines changed

3 files changed

+5
-6
lines changed

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ pyyaml
3737
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
3838
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
3939
einops # Required for Qwen2-VL.
40-
compressed-tensors == 0.9.4 # required for compressed-tensors
40+
compressed-tensors == 0.10.0 # required for compressed-tensors
4141
depyf==0.18.0 # required for profiling and debugging with compilation config
4242
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
4343
watchfiles # required for http server to monitor the updates of TLS files

tests/quantization/test_compressed_tensors.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -651,10 +651,9 @@ def check_model(model):
651651
assert output
652652

653653

654-
@pytest.mark.skip(reason="Skip until the model config is updated")
655654
def test_compressed_tensors_nvfp4a16(vllm_runner):
656655
# run weight only example
657-
model = "nm-testing/TinyLlama-1.1B-Chat-v1.0-FP4"
656+
model = "nm-testing/TinyLlama-1.1B-Chat-v1.0-NVFP4A16"
658657
with vllm_runner(model, enforce_eager=True) as llm:
659658

660659
def check_model(model):

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,15 +222,15 @@ def _is_fp4a16_nvfp4(self, weight_quant: BaseModel,
222222
input_quant: BaseModel):
223223

224224
is_weight_only = weight_quant is not None and input_quant is None
225-
is_group_quant = (
226-
weight_quant.strategy == QuantizationStrategy.GROUP.value)
225+
is_tensor_group_quant = (
226+
weight_quant.strategy == QuantizationStrategy.TENSOR_GROUP.value)
227227
is_symmetric = weight_quant.symmetric
228228

229229
is_group_size_16 = weight_quant.group_size == 16
230230
is_float_type = weight_quant.type == QuantizationType.FLOAT
231231
is_4_bits = weight_quant.num_bits == 4
232232

233-
return (is_weight_only and is_group_quant and is_float_type
233+
return (is_weight_only and is_tensor_group_quant and is_float_type
234234
and is_4_bits and is_group_size_16 and is_symmetric)
235235

236236
def _is_static_tensor_w8a8(self, weight_quant: BaseModel,

0 commit comments

Comments
 (0)