File tree Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Expand file tree Collapse file tree 2 files changed +20
-1
lines changed Original file line number Diff line number Diff line change @@ -127,6 +127,7 @@ jobs:
127
127
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
128
128
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
129
129
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
130
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
130
131
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
131
132
fi
132
133
@@ -157,5 +158,6 @@ jobs:
157
158
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
158
159
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
159
160
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
161
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
160
162
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
161
163
fi
Original file line number Diff line number Diff line change 23
23
import os
24
24
from unittest .mock import patch
25
25
26
- import vllm # noqa: F401
26
+ from modelscope import snapshot_download # type: ignore
27
27
from vllm import SamplingParams
28
28
29
29
from tests .conftest import VllmRunner
@@ -95,3 +95,20 @@ def test_models_distributed_DeepSeek_dbo():
95
95
distributed_executor_backend = "mp" ,
96
96
) as vllm_model :
97
97
vllm_model .generate (example_prompts , sampling_params )
98
+
99
+
100
+ def test_models_distributed_DeepSeek_W8A8 ():
101
+ example_prompts = [
102
+ "Hello, my name is" ,
103
+ ]
104
+ max_tokens = 5
105
+
106
+ with VllmRunner (
107
+ snapshot_download ("vllm-ascend/DeepSeek-V2-Lite-W8A8" ),
108
+ max_model_len = 8192 ,
109
+ enforce_eager = True ,
110
+ dtype = "auto" ,
111
+ tensor_parallel_size = 4 ,
112
+ quantization = "ascend" ,
113
+ ) as vllm_model :
114
+ vllm_model .generate_greedy (example_prompts , max_tokens )
You can’t perform that action at this time.
0 commit comments