|
15 | 15 | # This file is a part of the vllm-ascend project.
|
16 | 16 | #
|
17 | 17 |
|
18 |
| -name: 'test' |
| 18 | +name: 'e2e test / basic' |
19 | 19 |
|
20 | 20 | on:
|
21 | 21 | schedule:
|
@@ -44,6 +44,12 @@ defaults:
|
44 | 44 | run:
|
45 | 45 | shell: bash -el {0}
|
46 | 46 |
|
| 47 | +# only cancel in-progress runs of the same workflow |
| 48 | +# and ignore the lint / 1 card / 4 cards test type |
| 49 | +concurrency: |
| 50 | + group: ${{ github.workflow }}-${{ github.ref }} |
| 51 | + cancel-in-progress: true |
| 52 | + |
47 | 53 | jobs:
|
48 | 54 | lint:
|
49 | 55 | runs-on: ubuntu-latest
|
@@ -114,25 +120,14 @@ jobs:
|
114 | 120 | strategy:
|
115 | 121 | max-parallel: 2
|
116 | 122 | matrix:
|
117 |
| - os: [linux-arm64-npu-1, linux-arm64-npu-4] |
| 123 | + os: [linux-arm64-npu-1] |
118 | 124 | vllm_version: [main, v0.9.1]
|
119 |
| - concurrency: |
120 |
| - group: > |
121 |
| - ${{ |
122 |
| - matrix.os == 'linux-arm64-npu-4' |
123 |
| - && github.event.pull_request.number |
124 |
| - && format('pr-{0}-limit-npu-4', github.event.pull_request.number) |
125 |
| - || format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_version, github.event.pull_request.number) |
126 |
| - }} |
127 |
| - cancel-in-progress: false |
128 | 125 | name: vLLM Ascend test
|
129 | 126 | runs-on: ${{ matrix.os }}
|
130 | 127 | container:
|
131 | 128 | # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
|
132 | 129 | image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
133 | 130 | env:
|
134 |
| - HF_ENDPOINT: https://hf-mirror.com |
135 |
| - HF_TOKEN: ${{ secrets.HF_TOKEN }} |
136 | 131 | VLLM_LOGGING_LEVEL: ERROR
|
137 | 132 | steps:
|
138 | 133 | - name: Check npu and CANN info
|
@@ -177,61 +172,135 @@ jobs:
|
177 | 172 | env:
|
178 | 173 | VLLM_USE_V1: 1
|
179 | 174 | VLLM_WORKER_MULTIPROC_METHOD: spawn
|
| 175 | + VLLM_USE_MODELSCOPE: True |
| 176 | + run: | |
| 177 | + pytest -sv tests/singlecard/test_offline_inference.py |
| 178 | + # TODO: switch hf to modelscope |
| 179 | + VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \ |
| 180 | + pytest -sv tests/singlecard/test_ilama_lora.py |
| 181 | + # TODO(sss): guided decoding doesn't work, fix it later |
| 182 | + # pytest -sv tests/singlecard/test_guided_decoding.py |
| 183 | + # test_ascend_config.py should be ran separately because it will regenerate the global config many times. |
| 184 | + pytest -sv tests/singlecard/test_ascend_config.py |
| 185 | + pytest -sv tests/singlecard/test_camem.py |
| 186 | + pytest -sv tests/singlecard/ \ |
| 187 | + --ignore=tests/singlecard/test_offline_inference.py \ |
| 188 | + --ignore=tests/singlecard/test_ilama_lora.py \ |
| 189 | + --ignore=tests/singlecard/test_guided_decoding.py \ |
| 190 | + --ignore=tests/singlecard/test_ascend_config.py \ |
| 191 | + --ignore=tests/singlecard/test_camem.py |
| 192 | +
|
| 193 | + - name: Run vllm-project/vllm-ascend test on V0 engine |
| 194 | + if: ${{ github.event_name == 'schedule' }} |
| 195 | + env: |
| 196 | + VLLM_USE_V1: 0 |
| 197 | + VLLM_USE_MODELSCOPE: True |
180 | 198 | run: |
|
181 |
| - if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then |
182 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py |
183 |
| - # guided decoding doesn't work, fix it later |
184 |
| - # pytest -sv tests/singlecard/test_guided_decoding.py.py |
185 |
| - # test_ascend_config.py should be ran separately because it will regenerate the global config many times. |
186 |
| - pytest -sv tests/singlecard/test_ascend_config.py |
187 |
| - pytest -sv tests/singlecard/test_camem.py |
188 |
| - pytest -sv tests/singlecard/core/test_ascend_scheduler.py |
189 |
| - pytest -sv tests/singlecard/core/test_ascend_scheduler_e2e.py |
190 |
| - pytest -sv tests/singlecard/ \ |
| 199 | + pytest -sv tests/singlecard/test_offline_inference.py |
| 200 | + # TODO: switch hf to modelscope |
| 201 | + VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \ |
| 202 | + pytest -sv tests/singlecard/test_ilama_lora.py |
| 203 | + # guided decoding doesn't work, fix it later |
| 204 | + # pytest -sv tests/singlecard/test_guided_decoding.py |
| 205 | + pytest -sv tests/singlecard/test_camem.py |
| 206 | + # test_ascend_config.py should be ran separately because it will regenerate the global config many times. |
| 207 | + pytest -sv tests/singlecard/test_ascend_config.py |
| 208 | + pytest -sv tests/singlecard/test_prompt_embedding.py |
| 209 | + pytest -sv tests/singlecard/ \ |
191 | 210 | --ignore=tests/singlecard/test_offline_inference.py \
|
| 211 | + --ignore=tests/singlecard/test_ilama_lora.py \ |
192 | 212 | --ignore=tests/singlecard/test_guided_decoding.py \
|
193 |
| - --ignore=tests/singlecard/test_ascend_config.py \ |
194 | 213 | --ignore=tests/singlecard/test_camem.py \
|
| 214 | + --ignore=tests/singlecard/test_ascend_config.py \ |
| 215 | + --ignore=tests/singlecard/test_prompt_embedding.py \ |
195 | 216 | --ignore=tests/singlecard/core/test_ascend_scheduler.py \
|
196 | 217 | --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
|
197 |
| - else |
| 218 | +
|
| 219 | + e2e-4-cards: |
| 220 | + needs: [e2e] |
| 221 | + if: ${{ needs.e2e.result == 'success' }} |
| 222 | + strategy: |
| 223 | + max-parallel: 1 |
| 224 | + matrix: |
| 225 | + os: [linux-arm64-npu-4] |
| 226 | + vllm_version: [main, v0.9.1] |
| 227 | + name: vLLM Ascend test |
| 228 | + runs-on: ${{ matrix.os }} |
| 229 | + container: |
| 230 | + # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready |
| 231 | + image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 |
| 232 | + env: |
| 233 | + VLLM_LOGGING_LEVEL: ERROR |
| 234 | + VLLM_USE_MODELSCOPE: True |
| 235 | + steps: |
| 236 | + - name: Check npu and CANN info |
| 237 | + run: | |
| 238 | + npu-smi info |
| 239 | + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info |
| 240 | +
|
| 241 | + - name: Config mirrors |
| 242 | + run: | |
| 243 | + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list |
| 244 | + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple |
| 245 | + apt-get update -y |
| 246 | + apt install git -y |
| 247 | + git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ |
| 248 | +
|
| 249 | + - name: Checkout vllm-project/vllm-ascend repo |
| 250 | + uses: actions/checkout@v4 |
| 251 | + |
| 252 | + - name: Install system dependencies |
| 253 | + run: | |
| 254 | + apt-get -y install `cat packages.txt` |
| 255 | + apt-get -y install gcc g++ cmake libnuma-dev |
| 256 | +
|
| 257 | + - name: Checkout vllm-project/vllm repo |
| 258 | + uses: actions/checkout@v4 |
| 259 | + with: |
| 260 | + repository: vllm-project/vllm |
| 261 | + ref: ${{ matrix.vllm_version }} |
| 262 | + path: ./vllm-empty |
| 263 | + |
| 264 | + - name: Install vllm-project/vllm from source |
| 265 | + working-directory: ./vllm-empty |
| 266 | + run: | |
| 267 | + VLLM_TARGET_DEVICE=empty pip install -e . |
| 268 | +
|
| 269 | + - name: Install vllm-project/vllm-ascend |
| 270 | + run: | |
| 271 | + pip install -r requirements-dev.txt |
| 272 | + pip install -v -e . |
| 273 | +
|
| 274 | + - name: Run vllm-project/vllm-ascend test for V1 Engine |
| 275 | + env: |
| 276 | + VLLM_USE_V1: 1 |
| 277 | + VLLM_WORKER_MULTIPROC_METHOD: spawn |
| 278 | + VLLM_USE_MODELSCOPE: True |
| 279 | + run: | |
| 280 | + # TODO: switch hf to modelscope |
| 281 | + VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \ |
198 | 282 | pytest -sv tests/multicard/test_ilama_lora_tp2.py
|
199 |
| - # To avoid oom, we need to run the test in a single process. |
200 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ |
201 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek |
202 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk |
203 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8 |
204 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py |
205 |
| - fi |
| 283 | + # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error. |
| 284 | + # To avoid oom, we need to run the test in a single process. |
| 285 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ |
| 286 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek |
| 287 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk |
| 288 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8 |
| 289 | + pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py |
206 | 290 |
|
207 | 291 | - name: Run vllm-project/vllm-ascend test on V0 engine
|
208 | 292 | if: ${{ github.event_name == 'schedule' }}
|
209 | 293 | env:
|
210 | 294 | VLLM_USE_V1: 0
|
| 295 | + VLLM_USE_MODELSCOPE: True |
211 | 296 | run: |
|
212 |
| - if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then |
213 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py |
214 |
| - # guided decoding doesn't work, fix it later |
215 |
| - # pytest -sv tests/singlecard/test_guided_decoding.py.py |
216 |
| - pytest -sv tests/singlecard/test_camem.py |
217 |
| - # test_ascend_config.py should be ran separately because it will regenerate the global config many times. |
218 |
| - pytest -sv tests/singlecard/test_ascend_config.py |
219 |
| - pytest -sv tests/singlecard/test_prompt_embedding.py |
220 |
| - pytest -sv tests/singlecard/ \ |
221 |
| - --ignore=tests/singlecard/test_offline_inference.py \ |
222 |
| - --ignore=tests/singlecard/test_guided_decoding.py \ |
223 |
| - --ignore=tests/singlecard/test_camem.py \ |
224 |
| - --ignore=tests/singlecard/test_ascend_config.py \ |
225 |
| - --ignore=tests/singlecard/test_prompt_embedding.py \ |
226 |
| - --ignore=tests/singlecard/core/test_ascend_scheduler.py \ |
227 |
| - --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py |
228 |
| - else |
| 297 | + # TODO: switch hf to modelscope |
| 298 | + VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \ |
229 | 299 | pytest -sv tests/multicard/test_ilama_lora_tp2.py
|
230 |
| - # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error. |
231 |
| - # To avoid oom, we need to run the test in a single process. |
232 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ |
233 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek |
234 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk |
235 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8 |
236 |
| - VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py |
237 |
| - fi |
| 300 | + # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error. |
| 301 | + # To avoid oom, we need to run the test in a single process. |
| 302 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ |
| 303 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek |
| 304 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk |
| 305 | + pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8 |
| 306 | + pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py |
0 commit comments