Skip to content

Commit 75c10ce

Browse files
authored
[CI][0.9.0][0.9.1] Update CI job (#1149)
1. drop main and add 0.9.1 check for 0.9.1-dev branch 2. cherry-pick b75cb78 to fix import error and make 0.9.1 works 3. Fix quantization test failure. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 706de02 commit 75c10ce

File tree

10 files changed

+37
-24
lines changed

10 files changed

+37
-24
lines changed

.github/dependabot.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@ version: 2
22
updates:
33
- package-ecosystem: "github-actions"
44
directory: "/"
5-
schedule:
6-
# Check for updates to GitHub Actions every week
7-
interval: "weekly"
85
open-pull-requests-limit: 2
96
reviewers:
107
- "Yikun"

.github/workflows/nightly_benchmarks.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,7 @@
1818
name: 'Benchmarks / Performance'
1919
# This workflow runs nightly benchmarks for vllm-ascend.
2020

21-
on:
22-
schedule:
23-
# Run at 02:00 everyday
24-
- cron: '00 18 * * *'
25-
21+
on:
2622
workflow_dispatch:
2723
# Allow manual triggering of the workflow
2824

.github/workflows/vllm_ascend_doctest.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ on:
2929
- 'tests/e2e/doctests/**'
3030
- 'tests/e2e/common.sh'
3131
- 'tests/e2e/run_doctests.sh'
32-
schedule:
33-
# Runs every 4 hours
34-
- cron: '0 */4 * * *'
3532

3633
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
3734
# declared as "shell: bash -el {0}" on steps that need to be properly activated.

.github/workflows/vllm_ascend_test.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
name: 'e2e test'
1919

2020
on:
21-
schedule:
22-
- cron: '0 23 * * *'
2321
pull_request:
2422
branches:
2523
- 'main'
@@ -44,7 +42,7 @@ jobs:
4442
max-parallel: 2
4543
matrix:
4644
os: [linux-arm64-npu-1, linux-arm64-npu-4]
47-
vllm_version: [main, v0.9.0]
45+
vllm_version: [v0.9.0, v0.9.1]
4846
concurrency:
4947
group: >
5048
${{

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@
1717
name: 'e2e test / long-term-test'
1818

1919
on:
20-
schedule:
21-
# Runs at 23:00 UTC (7:00 AM Beijing) every day
22-
- cron: '0 23 * * *'
2320
pull_request:
2421
types: [ labeled ]
2522

@@ -43,7 +40,7 @@ jobs:
4340
max-parallel: 2
4441
matrix:
4542
os: [linux-arm64-npu-1, linux-arm64-npu-4]
46-
vllm_version: [main, v0.9.0]
43+
vllm_version: [v0.9.0, v0.9.1]
4744
name: vLLM Ascend long term test
4845
runs-on: ${{ matrix.os }}
4946
container:

.github/workflows/vllm_ascend_test_pd.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@
1717
name: 'e2e test / pd-disaggregation'
1818

1919
on:
20-
schedule:
21-
# Runs at 23:00 UTC (7:00 AM Beijing) every day
22-
- cron: '0 23 * * *'
2320
pull_request:
2421
types: [ labeled ]
2522

@@ -41,7 +38,7 @@ jobs:
4138
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
4239
strategy:
4340
matrix:
44-
vllm_verison: [main, v0.9.0]
41+
vllm_verison: [v0.9.0, v0.9.1]
4542
name: vLLM Ascend prefilling decoding disaggregation test
4643
runs-on: linux-arm64-npu-static-8
4744

tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def __init__(
7878
enable_chunked_prefill: bool = False,
7979
swap_space: int = 4,
8080
enforce_eager: Optional[bool] = True,
81+
quantization: Optional[str] = None,
8182
**kwargs,
8283
) -> None:
8384
self.model = LLM(
@@ -94,6 +95,7 @@ def __init__(
9495
max_model_len=max_model_len,
9596
block_size=block_size,
9697
enable_chunked_prefill=enable_chunked_prefill,
98+
quantization=quantization,
9799
**kwargs,
98100
)
99101

tests/singlecard/test_offline_inference.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import pytest
2727
import vllm # noqa: F401
28+
from modelscope import snapshot_download # type: ignore[import-untyped]
2829
from vllm import SamplingParams
2930
from vllm.assets.image import ImageAsset
3031

@@ -33,12 +34,15 @@
3334

3435
MODELS = [
3536
"Qwen/Qwen2.5-0.5B-Instruct",
36-
"vllm-ascend/Qwen2.5-0.5B-Instruct-w8a8",
3737
"Qwen/Qwen3-0.6B-Base",
3838
]
3939
MULTIMODALITY_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"]
4040
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
4141

42+
QUANTIZATION_MODELS = [
43+
"vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8",
44+
]
45+
4246

4347
@pytest.mark.parametrize("model", MODELS)
4448
@pytest.mark.parametrize("dtype", ["half", "float16"])
@@ -59,6 +63,27 @@ def test_models(model: str, dtype: str, max_tokens: int) -> None:
5963
vllm_model.generate_greedy(example_prompts, max_tokens)
6064

6165

66+
@pytest.mark.parametrize("model", QUANTIZATION_MODELS)
67+
@pytest.mark.parametrize("max_tokens", [5])
68+
def test_quantization_models(model: str, max_tokens: int) -> None:
69+
prompt = "The following numbers of the sequence " + ", ".join(
70+
str(i) for i in range(1024)) + " are:"
71+
example_prompts = [prompt]
72+
73+
# NOTE: Using quantized model repo id from modelscope encounters an issue,
74+
# this pr (https://github.com/vllm-project/vllm/pull/19212) fix the issue,
75+
# after it is being merged, there's no need to download model explicitly.
76+
model_path = snapshot_download(model)
77+
78+
with VllmRunner(model_path,
79+
max_model_len=8192,
80+
enforce_eager=True,
81+
dtype="auto",
82+
gpu_memory_utilization=0.7,
83+
quantization="ascend") as vllm_model:
84+
vllm_model.generate_greedy(example_prompts, max_tokens)
85+
86+
6287
@pytest.mark.parametrize("model", MULTIMODALITY_MODELS)
6388
def test_multimodal(model, prompt_template, vllm_runner):
6489
image = ImageAsset("cherry_blossom") \

vllm_ascend/compilation/__init__.py

Whitespace-only changes.

vllm_ascend/worker/model_runner_v1.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,11 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
496496
# Update the block IDs.
497497
if not req_data.resumed_from_preemption:
498498
# Append the new blocks to the existing block IDs.
499-
req_state.block_ids.extend(req_data.new_block_ids)
499+
for block_ids, new_block_ids in zip( # type: ignore[call-overload]
500+
req_state.block_ids,
501+
req_data.new_block_ids,
502+
strict=True):
503+
block_ids.extend(new_block_ids)
500504
else:
501505
# The request is resumed from preemption.
502506
# Replace the existing block IDs with the new ones.

0 commit comments

Comments
 (0)