[CI] rename Qwen2.5-0.5B-Instruct-W8A8 model (#1145)

22dimensions · wangxiaoxin (A) · commit e343e13a842a · 2025-06-17T14:55:53.000+08:00
1. rename vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8-new to
vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8

Signed-off-by: 22dimensions &lt;waitingwind@foxmail.com&gt;
Signed-off-by: wangxiaoxin (A) &lt;wangxiaoxin7@huawei.com&gt;
diff --git a/tests/singlecard/test_offline_inference.py b/tests/singlecard/test_offline_inference.py
@@ -39,7 +39,7 @@
 MULTIMODALITY_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"]
 
 QUANTIZATION_MODELS = [
-    "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8-new",
+    "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8",
 ]
 os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
 

Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@`
`39`	`39`	`MULTIMODALITY_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"]`
`40`	`40`
`41`	`41`	`QUANTIZATION_MODELS = [`
`42`		`- "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8-new",`
	`42`	`+ "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8",`
`43`	`43`	`]`
`44`	`44`	`os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"`
`45`	`45`