Skip to content

Commit c0acc46

Browse files
committed
Add W8A8_DYNAMIC quantization inference test
The model of chosen is vllm-ascend/DeepSeek-V2-Lite-W8A8. Signed-off-by: sdmyzlp <lrwei2@petalmail.com>
1 parent 6dca835 commit c0acc46

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ jobs:
127127
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
128128
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
129129
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
130+
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
130131
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
131132
fi
132133
@@ -157,5 +158,6 @@ jobs:
157158
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
158159
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
159160
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
161+
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
160162
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
161163
fi

tests/multicard/test_offline_inference_distributed.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import os
2424
from unittest.mock import patch
2525

26-
import vllm # noqa: F401
26+
from modelscope import snapshot_download # type: ignore
2727
from vllm import SamplingParams
2828

2929
from tests.conftest import VllmRunner
@@ -95,3 +95,20 @@ def test_models_distributed_DeepSeek_dbo():
9595
distributed_executor_backend="mp",
9696
) as vllm_model:
9797
vllm_model.generate(example_prompts, sampling_params)
98+
99+
100+
def test_models_distributed_DeepSeek_W8A8():
101+
example_prompts = [
102+
"Hello, my name is",
103+
]
104+
max_tokens = 5
105+
106+
with VllmRunner(
107+
snapshot_download("vllm-ascend/DeepSeek-V2-Lite-W8A8"),
108+
max_model_len=8192,
109+
enforce_eager=True,
110+
dtype="auto",
111+
tensor_parallel_size=4,
112+
quantization="ascend",
113+
) as vllm_model:
114+
vllm_model.generate_greedy(example_prompts, max_tokens)

0 commit comments

Comments
 (0)