18
18
name : ' test'
19
19
20
20
on :
21
- schedule :
22
- - cron : ' 0 23 * * *'
23
21
pull_request :
24
22
branches :
25
23
- ' main'
@@ -44,12 +42,6 @@ defaults:
44
42
run :
45
43
shell : bash -el {0}
46
44
47
- # only cancel in-progress runs of the same workflow
48
- # and ignore the lint / 1 card / 4 cards test type
49
- concurrency :
50
- group : ${{ github.workflow }}-${{ github.ref }}
51
- cancel-in-progress : true
52
-
53
45
jobs :
54
46
lint :
55
47
runs-on : ubuntu-latest
@@ -114,171 +106,32 @@ jobs:
114
106
echo "::add-matcher::.github/workflows/matchers/mypy.json"
115
107
tools/mypy.sh 1 ${{ matrix.python-version }}
116
108
117
- ut :
118
- needs : [lint]
119
- name : unit test
120
- if : ${{ needs.lint.result == 'success' }}
121
- runs-on : ubuntu-latest
122
- container :
123
- image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
124
- env :
125
- VLLM_LOGGING_LEVEL : ERROR
126
- VLLM_USE_MODELSCOPE : True
127
- strategy :
128
- matrix :
129
- vllm_version : [main, v0.9.1]
130
- steps :
131
- - name : Install packages
132
- run : |
133
- apt-get update -y
134
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev
135
-
136
- - name : Checkout vllm-project/vllm repo
137
- uses : actions/checkout@v4
138
- with :
139
- repository : vllm-project/vllm
140
- ref : ${{ matrix.vllm_version }}
141
- path : ./vllm-empty
142
-
143
- - name : Install vllm-project/vllm from source
144
- working-directory : ./vllm-empty
145
- run : |
146
- VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
147
- python3 -m pip uninstall -y triton
148
-
149
- - name : Checkout vllm-project/vllm-ascend repo
150
- uses : actions/checkout@v4
151
-
152
- - name : Install vllm-project/vllm-ascend
153
- run : |
154
- export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
155
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
156
- python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
157
- python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
158
-
159
- - name : Run unit test for V1 Engine
160
- env :
161
- VLLM_USE_V1 : 1
162
- VLLM_WORKER_MULTIPROC_METHOD : spawn
163
- TORCH_DEVICE_BACKEND_AUTOLOAD : 0
164
- run : |
165
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
166
- pytest -sv tests/ut
167
-
168
109
e2e :
169
110
needs : [lint]
170
111
if : ${{ needs.lint.result == 'success' }}
171
112
strategy :
172
113
max-parallel : 2
173
114
matrix :
174
- os : [linux-arm64-npu-1]
175
- vllm_version : [main, v0.9.1]
176
- name : singlecard e2e test
177
- runs-on : ${{ matrix.os }}
178
- container :
179
- # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
180
- image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
181
- env :
182
- VLLM_LOGGING_LEVEL : ERROR
183
- steps :
184
- - name : Check npu and CANN info
185
- run : |
186
- npu-smi info
187
- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
188
-
189
- - name : Config mirrors
190
- run : |
191
- sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
192
- pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
193
- apt-get update -y
194
- apt install git -y
195
- git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
196
-
197
- - name : Checkout vllm-project/vllm-ascend repo
198
- uses : actions/checkout@v4
199
-
200
- - name : Install system dependencies
201
- run : |
202
- apt-get -y install `cat packages.txt`
203
- apt-get -y install gcc g++ cmake libnuma-dev
204
-
205
- - name : Checkout vllm-project/vllm repo
206
- uses : actions/checkout@v4
207
- with :
208
- repository : vllm-project/vllm
209
- ref : ${{ matrix.vllm_version }}
210
- path : ./vllm-empty
211
-
212
- - name : Install vllm-project/vllm from source
213
- working-directory : ./vllm-empty
214
- run : |
215
- VLLM_TARGET_DEVICE=empty pip install -e .
216
-
217
- - name : Install vllm-project/vllm-ascend
218
- env :
219
- PIP_EXTRA_INDEX_URL : https://mirrors.huaweicloud.com/ascend/repos/pypi
220
- run : |
221
- pip install -r requirements-dev.txt
222
- pip install -v -e .
223
-
224
- - name : Run e2e test for V1 Engine
225
- env :
226
- VLLM_USE_V1 : 1
227
- VLLM_WORKER_MULTIPROC_METHOD : spawn
228
- VLLM_USE_MODELSCOPE : True
229
- run : |
230
- pytest -sv tests/e2e/singlecard/test_offline_inference.py
231
- # TODO: switch hf to modelscope
232
- VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
233
- pytest -sv tests/e2e/singlecard/test_ilama_lora.py
234
- # TODO(sss): guided decoding doesn't work, fix it later
235
- # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
236
- pytest -sv tests/e2e/singlecard/test_camem.py
237
- pytest -sv tests/e2e/singlecard/ \
238
- --ignore=tests/e2e/singlecard/test_offline_inference.py \
239
- --ignore=tests/e2e/singlecard/test_ilama_lora.py \
240
- --ignore=tests/e2e/singlecard/test_guided_decoding.py \
241
- --ignore=tests/e2e/singlecard/test_camem.py
242
-
243
- - name : Run e2e test on V0 engine
244
- if : ${{ github.event_name == 'schedule' }}
245
- env :
246
- VLLM_USE_V1 : 0
247
- VLLM_USE_MODELSCOPE : True
248
- run : |
249
- pytest -sv tests/e2e/singlecard/test_offline_inference.py
250
- # TODO: switch hf to modelscope
251
- VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
252
- pytest -sv tests/e2e/singlecard/test_ilama_lora.py
253
- # guided decoding doesn't work, fix it later
254
- # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
255
- pytest -sv tests/e2e/singlecard/test_camem.py
256
- pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
257
- pytest -sv tests/e2e/singlecard/ \
258
- --ignore=tests/e2e/singlecard/test_offline_inference.py \
259
- --ignore=tests/e2e/singlecard/test_ilama_lora.py \
260
- --ignore=tests/e2e/singlecard/test_guided_decoding.py \
261
- --ignore=tests/e2e/singlecard/test_camem.py \
262
- --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
263
- --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
264
- --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
265
-
266
- e2e-4-cards :
267
- needs : [e2e]
268
- if : ${{ needs.e2e.result == 'success' }}
269
- strategy :
270
- max-parallel : 1
271
- matrix :
272
- os : [linux-arm64-npu-4]
273
- vllm_version : [main, v0.9.1]
274
- name : multicard e2e test
115
+ os : [linux-arm64-npu-1, linux-arm64-npu-4]
116
+ vllm_version : [v0.9.1]
117
+ concurrency :
118
+ group : >
119
+ ${{
120
+ matrix.os == 'linux-arm64-npu-4'
121
+ && github.event.pull_request.number
122
+ && format('pr-{0}-limit-npu-4', github.event.pull_request.number)
123
+ || format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_version, github.event.pull_request.number)
124
+ }}
125
+ cancel-in-progress : false
126
+ name : vLLM Ascend test
275
127
runs-on : ${{ matrix.os }}
276
128
container :
277
129
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
278
130
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
279
131
env :
132
+ HF_ENDPOINT : https://hf-mirror.com
133
+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
280
134
VLLM_LOGGING_LEVEL : ERROR
281
- VLLM_USE_MODELSCOPE : True
282
135
steps :
283
136
- name : Check npu and CANN info
284
137
run : |
@@ -324,32 +177,64 @@ jobs:
324
177
env :
325
178
VLLM_USE_V1 : 1
326
179
VLLM_WORKER_MULTIPROC_METHOD : spawn
327
- VLLM_USE_MODELSCOPE : True
328
180
run : |
329
- # TODO: switch hf to modelscope
330
- VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
331
- pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
332
- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
333
- # To avoid oom, we need to run the test in a single process.
334
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
335
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
336
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
337
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
338
- pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
181
+ if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
182
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
183
+ # guided decoding doesn't work, fix it later
184
+ # pytest -sv tests/singlecard/test_guided_decoding.py.py
185
+ # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
186
+ pytest -sv tests/singlecard/test_ascend_config.py
187
+ pytest -sv tests/singlecard/test_camem.py
188
+ pytest -sv tests/singlecard/core/test_ascend_scheduler.py
189
+ pytest -sv tests/singlecard/core/test_ascend_scheduler_e2e.py
190
+ pytest -sv tests/singlecard/ \
191
+ --ignore=tests/singlecard/test_offline_inference.py \
192
+ --ignore=tests/singlecard/test_guided_decoding.py \
193
+ --ignore=tests/singlecard/test_ascend_config.py \
194
+ --ignore=tests/singlecard/test_camem.py \
195
+ --ignore=tests/singlecard/core/test_ascend_scheduler.py \
196
+ --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
197
+ else
198
+ pytest -sv tests/multicard/test_ilama_lora_tp2.py
199
+ # To avoid oom, we need to run the test in a single process.
200
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_w4a8_deepseek.py::test_deepseek_W4A8
201
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
202
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
203
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
204
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
205
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
206
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
207
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py --ignore=tests/multicard/test_w4a8_deepseek.py
208
+ fi
339
209
340
210
- name : Run vllm-project/vllm-ascend test on V0 engine
341
211
if : ${{ github.event_name == 'schedule' }}
342
212
env :
343
213
VLLM_USE_V1 : 0
344
- VLLM_USE_MODELSCOPE : True
345
214
run : |
346
- # TODO: switch hf to modelscope
347
- VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
348
- pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
349
- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
350
- # To avoid oom, we need to run the test in a single process.
351
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
352
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
353
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
354
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
355
- pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
215
+ if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
216
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
217
+ # guided decoding doesn't work, fix it later
218
+ # pytest -sv tests/singlecard/test_guided_decoding.py.py
219
+ pytest -sv tests/singlecard/test_camem.py
220
+ # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
221
+ pytest -sv tests/singlecard/test_ascend_config.py
222
+ pytest -sv tests/singlecard/test_prompt_embedding.py
223
+ pytest -sv tests/singlecard/ \
224
+ --ignore=tests/singlecard/test_offline_inference.py \
225
+ --ignore=tests/singlecard/test_guided_decoding.py \
226
+ --ignore=tests/singlecard/test_camem.py \
227
+ --ignore=tests/singlecard/test_ascend_config.py \
228
+ --ignore=tests/singlecard/test_prompt_embedding.py \
229
+ --ignore=tests/singlecard/core/test_ascend_scheduler.py \
230
+ --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
231
+ else
232
+ pytest -sv tests/multicard/test_ilama_lora_tp2.py
233
+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
234
+ # To avoid oom, we need to run the test in a single process.
235
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
236
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
237
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
238
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
239
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
240
+ fi
0 commit comments