15
15
# This file is a part of the vllm-ascend project.
16
16
#
17
17
18
- name : ' e2e test / basic '
18
+ name : ' test'
19
19
20
20
on :
21
21
schedule :
@@ -114,6 +114,56 @@ jobs:
114
114
echo "::add-matcher::.github/workflows/matchers/mypy.json"
115
115
tools/mypy.sh 1 ${{ matrix.python-version }}
116
116
117
+ ut :
118
+ needs : [lint]
119
+ name : unit test
120
+ if : ${{ needs.lint.result == 'success' }}
121
+ runs-on : ubuntu-latest
122
+ container :
123
+ image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
124
+ env :
125
+ VLLM_LOGGING_LEVEL : ERROR
126
+ VLLM_USE_MODELSCOPE : True
127
+ strategy :
128
+ matrix :
129
+ vllm_version : [main, v0.9.1]
130
+ steps :
131
+ - name : Install packages
132
+ run : |
133
+ apt-get update -y
134
+ apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev
135
+
136
+ - name : Checkout vllm-project/vllm repo
137
+ uses : actions/checkout@v4
138
+ with :
139
+ repository : vllm-project/vllm
140
+ ref : ${{ matrix.vllm_version }}
141
+ path : ./vllm-empty
142
+
143
+ - name : Install vllm-project/vllm from source
144
+ working-directory : ./vllm-empty
145
+ run : |
146
+ VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
147
+ python3 -m pip uninstall -y triton
148
+
149
+ - name : Checkout vllm-project/vllm-ascend repo
150
+ uses : actions/checkout@v4
151
+
152
+ - name : Install vllm-project/vllm-ascend
153
+ run : |
154
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
155
+ python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
156
+ python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
157
+
158
+ - name : Run unit test for V1 Engine
159
+ env :
160
+ VLLM_USE_V1 : 1
161
+ VLLM_WORKER_MULTIPROC_METHOD : spawn
162
+ TORCH_DEVICE_BACKEND_AUTOLOAD : 0
163
+ run : |
164
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
165
+ pytest -sv tests/ut
166
+
117
167
e2e :
118
168
needs : [lint]
119
169
if : ${{ needs.lint.result == 'success' }}
@@ -122,7 +172,7 @@ jobs:
122
172
matrix :
123
173
os : [linux-arm64-npu-1]
124
174
vllm_version : [main, v0.9.1]
125
- name : vLLM Ascend test
175
+ name : singlecard e2e test
126
176
runs-on : ${{ matrix.os }}
127
177
container :
128
178
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -168,53 +218,47 @@ jobs:
168
218
pip install -r requirements-dev.txt
169
219
pip install -v -e .
170
220
171
- - name : Run vllm-project/vllm-ascend test for V1 Engine
221
+ - name : Run e2e test for V1 Engine
172
222
env :
173
223
VLLM_USE_V1 : 1
174
224
VLLM_WORKER_MULTIPROC_METHOD : spawn
175
225
VLLM_USE_MODELSCOPE : True
176
226
run : |
177
- pytest -sv tests/singlecard/test_offline_inference.py
227
+ pytest -sv tests/e2e/ singlecard/test_offline_inference.py
178
228
# TODO: switch hf to modelscope
179
229
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
180
- pytest -sv tests/singlecard/test_ilama_lora.py
230
+ pytest -sv tests/e2e/ singlecard/test_ilama_lora.py
181
231
# TODO(sss): guided decoding doesn't work, fix it later
182
- # pytest -sv tests/singlecard/test_guided_decoding.py
183
- # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
184
- pytest -sv tests/singlecard/test_ascend_config.py
185
- pytest -sv tests/singlecard/test_camem.py
186
- pytest -sv tests/singlecard/ \
187
- --ignore=tests/singlecard/test_offline_inference.py \
188
- --ignore=tests/singlecard/test_ilama_lora.py \
189
- --ignore=tests/singlecard/test_guided_decoding.py \
190
- --ignore=tests/singlecard/test_ascend_config.py \
191
- --ignore=tests/singlecard/test_camem.py
232
+ # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
233
+ pytest -sv tests/e2e/singlecard/test_camem.py
234
+ pytest -sv tests/e2e/singlecard/ \
235
+ --ignore=tests/e2e/singlecard/test_offline_inference.py \
236
+ --ignore=tests/e2e/singlecard/test_ilama_lora.py \
237
+ --ignore=tests/e2e/singlecard/test_guided_decoding.py \
238
+ --ignore=tests/e2e/singlecard/test_camem.py
192
239
193
- - name : Run vllm-project/vllm-ascend test on V0 engine
240
+ - name : Run e2e test on V0 engine
194
241
if : ${{ github.event_name == 'schedule' }}
195
242
env :
196
243
VLLM_USE_V1 : 0
197
244
VLLM_USE_MODELSCOPE : True
198
245
run : |
199
- pytest -sv tests/singlecard/test_offline_inference.py
246
+ pytest -sv tests/e2e/ singlecard/test_offline_inference.py
200
247
# TODO: switch hf to modelscope
201
248
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
202
- pytest -sv tests/singlecard/test_ilama_lora.py
249
+ pytest -sv tests/e2e/ singlecard/test_ilama_lora.py
203
250
# guided decoding doesn't work, fix it later
204
- # pytest -sv tests/singlecard/test_guided_decoding.py
205
- pytest -sv tests/singlecard/test_camem.py
206
- # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
207
- pytest -sv tests/singlecard/test_ascend_config.py
208
- pytest -sv tests/singlecard/test_prompt_embedding.py
209
- pytest -sv tests/singlecard/ \
210
- --ignore=tests/singlecard/test_offline_inference.py \
211
- --ignore=tests/singlecard/test_ilama_lora.py \
212
- --ignore=tests/singlecard/test_guided_decoding.py \
213
- --ignore=tests/singlecard/test_camem.py \
214
- --ignore=tests/singlecard/test_ascend_config.py \
215
- --ignore=tests/singlecard/test_prompt_embedding.py \
216
- --ignore=tests/singlecard/core/test_ascend_scheduler.py \
217
- --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
251
+ # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
252
+ pytest -sv tests/e2e/singlecard/test_camem.py
253
+ pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
254
+ pytest -sv tests/e2e/singlecard/ \
255
+ --ignore=tests/e2e/singlecard/test_offline_inference.py \
256
+ --ignore=tests/e2e/singlecard/test_ilama_lora.py \
257
+ --ignore=tests/e2e/singlecard/test_guided_decoding.py \
258
+ --ignore=tests/e2e/singlecard/test_camem.py \
259
+ --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
260
+ --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
261
+ --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
218
262
219
263
e2e-4-cards :
220
264
needs : [e2e]
@@ -224,7 +268,7 @@ jobs:
224
268
matrix :
225
269
os : [linux-arm64-npu-4]
226
270
vllm_version : [main, v0.9.1]
227
- name : vLLM Ascend test
271
+ name : multicard e2e test
228
272
runs-on : ${{ matrix.os }}
229
273
container :
230
274
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -279,14 +323,14 @@ jobs:
279
323
run : |
280
324
# TODO: switch hf to modelscope
281
325
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
282
- pytest -sv tests/multicard/test_ilama_lora_tp2.py
283
- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
326
+ pytest -sv tests/e2e/ multicard/test_ilama_lora_tp2.py
327
+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py will raise error.
284
328
# To avoid oom, we need to run the test in a single process.
285
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
286
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
287
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
288
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
289
- pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
329
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
330
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
331
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_topk
332
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
333
+ pytest -sv tests/e2e/ multicard/ --ignore=tests/e2e/ multicard/test_ilama_lora_tp2.py --ignore=tests/e2e /multicard/test_offline_inference_distributed.py
290
334
291
335
- name : Run vllm-project/vllm-ascend test on V0 engine
292
336
if : ${{ github.event_name == 'schedule' }}
@@ -296,11 +340,11 @@ jobs:
296
340
run : |
297
341
# TODO: switch hf to modelscope
298
342
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
299
- pytest -sv tests/multicard/test_ilama_lora_tp2.py
300
- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
343
+ pytest -sv tests/e2e/ multicard/test_ilama_lora_tp2.py
344
+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py will raise error.
301
345
# To avoid oom, we need to run the test in a single process.
302
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
303
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
304
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
305
- pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
306
- pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
346
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
347
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
348
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_topk
349
+ pytest -sv tests/e2e/ multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
350
+ pytest -sv tests/e2e/ multicard/ --ignore=tests/e2e/ multicard/test_ilama_lora_tp2.py --ignore=tests/e2e /multicard/test_offline_inference_distributed.py
0 commit comments