Skip to content

Commit 0dfe5f5

Browse files
wangxiyuanwangxiaoxin (A)
authored andcommitted
[CI] Upgrade vllm to 0.9.1 (#1165)
1. upgrade vllm to 0.9.1. 0.9.0 is not supported for main branch now. keep doc to 0.9.0 until we release the first 0.9.1 release. 2. disable V0 test for PR 3. move actionlint check to lint job Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent bbb5981 commit 0dfe5f5

File tree

19 files changed

+72
-320
lines changed

19 files changed

+72
-320
lines changed

.github/workflows/accuracy_test.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ on:
3434
# Current supported vLLM versions
3535
options:
3636
- main
37-
- v0.9.0.1
38-
- v0.9.0
37+
- v0.9.1
3938
- v0.7.3
4039
vllm-ascend-version:
4140
description: 'vllm-ascend version:'
@@ -159,7 +158,7 @@ jobs:
159158
repository: vllm-project/vllm
160159
path: ./vllm-empty
161160
# Please also update this when bump matched version
162-
ref: ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
161+
ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
163162

164163
- name: Install vllm-project/vllm from source
165164
working-directory: ./vllm-empty

.github/workflows/actionlint.yml

Lines changed: 0 additions & 53 deletions
This file was deleted.

.github/workflows/nightly_benchmarks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
strategy:
4747
matrix:
4848
include:
49-
- vllm_branch: v0.9.0
49+
- vllm_branch: v0.9.1
5050
vllm_ascend_branch: main
5151
container:
5252
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10

.github/workflows/vllm_ascend_test.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ on:
3131
- '!benchmarks/**'
3232
- 'tools/mypy.sh'
3333
- 'mypy.ini'
34+
- '.github/workflows/*.ya?ml'
35+
- '.github/workflows/actionlint.*'
36+
- '.github/workflows/matchers/actionlint.json'
3437

3538
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
3639
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -85,6 +88,13 @@ jobs:
8588
repository: vllm-project/vllm
8689
path: vllm-empty
8790

91+
- name: Actionlint Check
92+
env:
93+
SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
94+
run: |
95+
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
96+
tools/actionlint.sh -color
97+
8898
- name: Install vllm-project/vllm from source
8999
working-directory: vllm-empty
90100
run: |
@@ -103,7 +113,7 @@ jobs:
103113
max-parallel: 2
104114
matrix:
105115
os: [linux-arm64-npu-1, linux-arm64-npu-4]
106-
vllm_version: [v0.9.0, v0.9.1]
116+
vllm_version: [main, v0.9.1]
107117
concurrency:
108118
group: >
109119
${{
@@ -191,6 +201,7 @@ jobs:
191201
fi
192202
193203
- name: Run vllm-project/vllm-ascend test on V0 engine
204+
if: ${{ github.event_name == 'schedule' }}
194205
env:
195206
VLLM_USE_V1: 0
196207
run: |

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
max-parallel: 2
4141
matrix:
4242
os: [linux-arm64-npu-1, linux-arm64-npu-4]
43-
vllm_version: [v0.9.0, v0.9.1]
43+
vllm_version: [main, v0.9.1]
4444
name: vLLM Ascend long term test
4545
runs-on: ${{ matrix.os }}
4646
container:

.github/workflows/vllm_ascend_test_pd.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
3939
strategy:
4040
matrix:
41-
vllm_verison: [v0.9.0, v0.9.1]
41+
vllm_verison: [main, v0.9.1]
4242
name: vLLM Ascend prefilling decoding disaggregation test
4343
runs-on: linux-arm64-npu-static-8
4444

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
3737

3838
# Install vLLM
3939
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40-
ARG VLLM_TAG=v0.9.0
40+
ARG VLLM_TAG=v0.9.1
4141
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4242
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
4343
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

Dockerfile.openEuler

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
3434

3535
# Install vLLM
3636
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37-
ARG VLLM_TAG=v0.9.0
37+
ARG VLLM_TAG=v0.9.1
3838

3939
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4040
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

tests/singlecard/compile/test_simple.py

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
set_current_vllm_config)
1515
from vllm.utils import direct_register_custom_op
1616

17-
from vllm_ascend.utils import vllm_version_is
18-
1917
global_counter = 0
2018

2119
# create a library to hold the custom op
@@ -93,28 +91,14 @@ def test_simple_piecewise_compile():
9391
model = SillyModel(vllm_config=vllm_config, prefix="")
9492

9593
inputs = torch.randn(100).npu()
96-
97-
if vllm_version_is("0.9.0"):
98-
kwargs = {
99-
"num_graphs_seen": 1, # one graph for the model
100-
"num_piecewise_graphs_seen": 5, # 2 * num_layers + 1
101-
"num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers
102-
"num_backend_compilations":
103-
3, # num_piecewise_capturable_graphs_seen
104-
"num_cudagraph_caputured":
105-
6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
106-
}
107-
else:
108-
kwargs = {
109-
"num_graphs_seen": 1, # one graph for the model
110-
"num_piecewise_graphs_seen": 5, # 2 * num_layers + 1
111-
"num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers
112-
"num_backend_compilations":
113-
3, # num_piecewise_capturable_graphs_seen
114-
"num_cudagraph_captured":
115-
6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
116-
}
117-
94+
kwargs = {
95+
"num_graphs_seen": 1, # one graph for the model
96+
"num_piecewise_graphs_seen": 5, # 2 * num_layers + 1
97+
"num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers
98+
"num_backend_compilations": 3, # num_piecewise_capturable_graphs_seen
99+
"num_cudagraph_captured":
100+
6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
101+
}
118102
with compilation_counter.expect(kwargs):
119103

120104
model(inputs)

tests/singlecard/test_scheduler.py

Lines changed: 18 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
from vllm.v1.structured_output import StructuredOutputManager
3232

3333
from vllm_ascend.core.scheduler import AscendScheduler
34-
from vllm_ascend.utils import vllm_version_is
3534

3635
EOS_TOKEN_ID = 50256
3736

@@ -87,27 +86,15 @@ def create_scheduler(
8786
vllm_config = VllmConfig(scheduler_config=scheduler_config,
8887
model_config=model_config,
8988
cache_config=cache_config)
90-
91-
if vllm_version_is("0.9.0"):
92-
kv_cache_config = KVCacheConfig(
93-
num_blocks=10000, # A large number of blocks to hold all requests
94-
tensors={},
95-
kv_cache_groups=[
96-
KVCacheGroupSpec(['layer'],
97-
FullAttentionSpec(16, 1, 1, torch.float32,
98-
False))
99-
],
100-
)
101-
else:
102-
kv_cache_config = KVCacheConfig(
103-
num_blocks=10000, # A large number of blocks to hold all requests
104-
kv_cache_tensors=[KVCacheTensor(size=1024, shared_by=[1])],
105-
kv_cache_groups=[
106-
KVCacheGroupSpec(['layer'],
107-
FullAttentionSpec(16, 1, 1, torch.float32,
108-
False, None))
109-
],
110-
)
89+
kv_cache_config = KVCacheConfig(
90+
num_blocks=10000, # A large number of blocks to hold all requests
91+
kv_cache_tensors=[KVCacheTensor(size=1024, shared_by=[1])],
92+
kv_cache_groups=[
93+
KVCacheGroupSpec(['layer'],
94+
FullAttentionSpec(16, 1, 1, torch.float32, False,
95+
None))
96+
],
97+
)
11198
cache_config.num_gpu_blocks = 10000
11299
return AscendScheduler(
113100
vllm_config,
@@ -135,27 +122,15 @@ def create_requests(num_requests: int,
135122
else:
136123
mm_position = None
137124
mm_inputs = None
138-
if vllm_version_is("0.9.0"):
139-
request = Request(
140-
request_id=f"{i}",
141-
prompt_token_ids=[i] * num_tokens,
142-
sampling_params=sampling_params,
143-
multi_modal_inputs=mm_inputs,
144-
multi_modal_placeholders=mm_position,
145-
multi_modal_hashes=None,
146-
arrival_time=0,
147-
eos_token_id=EOS_TOKEN_ID,
148-
)
149-
else:
150-
request = Request(
151-
request_id=f"{i}",
152-
prompt_token_ids=[i] * num_tokens,
153-
sampling_params=sampling_params,
154-
multi_modal_inputs=mm_inputs,
155-
multi_modal_placeholders=mm_position,
156-
multi_modal_hashes=None,
157-
eos_token_id=EOS_TOKEN_ID,
158-
)
125+
request = Request(
126+
request_id=f"{i}",
127+
prompt_token_ids=[i] * num_tokens,
128+
sampling_params=sampling_params,
129+
multi_modal_inputs=mm_inputs,
130+
multi_modal_placeholders=mm_position,
131+
multi_modal_hashes=None,
132+
eos_token_id=EOS_TOKEN_ID,
133+
)
159134
requests.append(request)
160135
return requests
161136

0 commit comments

Comments
 (0)