Skip to content

Commit 4f59644

Browse files
authored
[CI] Upgrade vllm to 0.9.1 (#1165)
1. upgrade vllm to 0.9.1. 0.9.0 is not supported for main branch now. keep doc to 0.9.0 until we release the first 0.9.1 release. 2. disable V0 test for PR 3. move actionlint check to lint job Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent e46dc14 commit 4f59644

File tree

19 files changed

+72
-320
lines changed

19 files changed

+72
-320
lines changed

.github/workflows/accuracy_test.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ on:
3434
# Current supported vLLM versions
3535
options:
3636
- main
37-
- v0.9.0.1
38-
- v0.9.0
37+
- v0.9.1
3938
- v0.7.3
4039
vllm-ascend-version:
4140
description: 'vllm-ascend version:'
@@ -159,7 +158,7 @@ jobs:
159158
repository: vllm-project/vllm
160159
path: ./vllm-empty
161160
# Please also update this when bump matched version
162-
ref: ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
161+
ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
163162

164163
- name: Install vllm-project/vllm from source
165164
working-directory: ./vllm-empty

.github/workflows/actionlint.yml

Lines changed: 0 additions & 53 deletions
This file was deleted.

.github/workflows/nightly_benchmarks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
strategy:
5151
matrix:
5252
include:
53-
- vllm_branch: v0.9.0
53+
- vllm_branch: v0.9.1
5454
vllm_ascend_branch: main
5555
container:
5656
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10

.github/workflows/vllm_ascend_test.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ on:
3333
- '!benchmarks/**'
3434
- 'tools/mypy.sh'
3535
- 'mypy.ini'
36+
- '.github/workflows/*.ya?ml'
37+
- '.github/workflows/actionlint.*'
38+
- '.github/workflows/matchers/actionlint.json'
3639

3740
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
3841
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -87,6 +90,13 @@ jobs:
8790
repository: vllm-project/vllm
8891
path: vllm-empty
8992

93+
- name: Actionlint Check
94+
env:
95+
SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
96+
run: |
97+
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
98+
tools/actionlint.sh -color
99+
90100
- name: Install vllm-project/vllm from source
91101
working-directory: vllm-empty
92102
run: |
@@ -105,7 +115,7 @@ jobs:
105115
max-parallel: 2
106116
matrix:
107117
os: [linux-arm64-npu-1, linux-arm64-npu-4]
108-
vllm_version: [main, v0.9.0]
118+
vllm_version: [main, v0.9.1]
109119
concurrency:
110120
group: >
111121
${{
@@ -193,6 +203,7 @@ jobs:
193203
fi
194204
195205
- name: Run vllm-project/vllm-ascend test on V0 engine
206+
if: ${{ github.event_name == 'schedule' }}
196207
env:
197208
VLLM_USE_V1: 0
198209
run: |

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
max-parallel: 2
4444
matrix:
4545
os: [linux-arm64-npu-1, linux-arm64-npu-4]
46-
vllm_version: [main, v0.9.0]
46+
vllm_version: [main, v0.9.1]
4747
name: vLLM Ascend long term test
4848
runs-on: ${{ matrix.os }}
4949
container:

.github/workflows/vllm_ascend_test_pd.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
4242
strategy:
4343
matrix:
44-
vllm_verison: [main, v0.9.0]
44+
vllm_verison: [main, v0.9.1]
4545
name: vLLM Ascend prefilling decoding disaggregation test
4646
runs-on: linux-arm64-npu-static-8
4747

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
3737

3838
# Install vLLM
3939
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40-
ARG VLLM_TAG=v0.9.0
40+
ARG VLLM_TAG=v0.9.1
4141
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4242
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
4343
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

Dockerfile.openEuler

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
3434

3535
# Install vLLM
3636
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37-
ARG VLLM_TAG=v0.9.0
37+
ARG VLLM_TAG=v0.9.1
3838

3939
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4040
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

tests/singlecard/compile/test_simple.py

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
set_current_vllm_config)
1515
from vllm.utils import direct_register_custom_op
1616

17-
from vllm_ascend.utils import vllm_version_is
18-
1917
global_counter = 0
2018

2119
# create a library to hold the custom op
@@ -93,28 +91,14 @@ def test_simple_piecewise_compile():
9391
model = SillyModel(vllm_config=vllm_config, prefix="")
9492

9593
inputs = torch.randn(100).npu()
96-
97-
if vllm_version_is("0.9.0"):
98-
kwargs = {
99-
"num_graphs_seen": 1, # one graph for the model
100-
"num_piecewise_graphs_seen": 5, # 2 * num_layers + 1
101-
"num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers
102-
"num_backend_compilations":
103-
3, # num_piecewise_capturable_graphs_seen
104-
"num_cudagraph_caputured":
105-
6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
106-
}
107-
else:
108-
kwargs = {
109-
"num_graphs_seen": 1, # one graph for the model
110-
"num_piecewise_graphs_seen": 5, # 2 * num_layers + 1
111-
"num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers
112-
"num_backend_compilations":
113-
3, # num_piecewise_capturable_graphs_seen
114-
"num_cudagraph_captured":
115-
6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
116-
}
117-
94+
kwargs = {
95+
"num_graphs_seen": 1, # one graph for the model
96+
"num_piecewise_graphs_seen": 5, # 2 * num_layers + 1
97+
"num_piecewise_capturable_graphs_seen": 3, # 1 + num_layers
98+
"num_backend_compilations": 3, # num_piecewise_capturable_graphs_seen
99+
"num_cudagraph_captured":
100+
6 # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
101+
}
118102
with compilation_counter.expect(kwargs):
119103

120104
model(inputs)

tests/singlecard/test_scheduler.py

Lines changed: 18 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
from vllm.v1.structured_output import StructuredOutputManager
3232

3333
from vllm_ascend.core.scheduler import AscendScheduler
34-
from vllm_ascend.utils import vllm_version_is
3534

3635
EOS_TOKEN_ID = 50256
3736

@@ -87,27 +86,15 @@ def create_scheduler(
8786
vllm_config = VllmConfig(scheduler_config=scheduler_config,
8887
model_config=model_config,
8988
cache_config=cache_config)
90-
91-
if vllm_version_is("0.9.0"):
92-
kv_cache_config = KVCacheConfig(
93-
num_blocks=10000, # A large number of blocks to hold all requests
94-
tensors={},
95-
kv_cache_groups=[
96-
KVCacheGroupSpec(['layer'],
97-
FullAttentionSpec(16, 1, 1, torch.float32,
98-
False))
99-
],
100-
)
101-
else:
102-
kv_cache_config = KVCacheConfig(
103-
num_blocks=10000, # A large number of blocks to hold all requests
104-
kv_cache_tensors=[KVCacheTensor(size=1024, shared_by=[1])],
105-
kv_cache_groups=[
106-
KVCacheGroupSpec(['layer'],
107-
FullAttentionSpec(16, 1, 1, torch.float32,
108-
False, None))
109-
],
110-
)
89+
kv_cache_config = KVCacheConfig(
90+
num_blocks=10000, # A large number of blocks to hold all requests
91+
kv_cache_tensors=[KVCacheTensor(size=1024, shared_by=[1])],
92+
kv_cache_groups=[
93+
KVCacheGroupSpec(['layer'],
94+
FullAttentionSpec(16, 1, 1, torch.float32, False,
95+
None))
96+
],
97+
)
11198
cache_config.num_gpu_blocks = 10000
11299
return AscendScheduler(
113100
vllm_config,
@@ -135,27 +122,15 @@ def create_requests(num_requests: int,
135122
else:
136123
mm_position = None
137124
mm_inputs = None
138-
if vllm_version_is("0.9.0"):
139-
request = Request(
140-
request_id=f"{i}",
141-
prompt_token_ids=[i] * num_tokens,
142-
sampling_params=sampling_params,
143-
multi_modal_inputs=mm_inputs,
144-
multi_modal_placeholders=mm_position,
145-
multi_modal_hashes=None,
146-
arrival_time=0,
147-
eos_token_id=EOS_TOKEN_ID,
148-
)
149-
else:
150-
request = Request(
151-
request_id=f"{i}",
152-
prompt_token_ids=[i] * num_tokens,
153-
sampling_params=sampling_params,
154-
multi_modal_inputs=mm_inputs,
155-
multi_modal_placeholders=mm_position,
156-
multi_modal_hashes=None,
157-
eos_token_id=EOS_TOKEN_ID,
158-
)
125+
request = Request(
126+
request_id=f"{i}",
127+
prompt_token_ids=[i] * num_tokens,
128+
sampling_params=sampling_params,
129+
multi_modal_inputs=mm_inputs,
130+
multi_modal_placeholders=mm_position,
131+
multi_modal_hashes=None,
132+
eos_token_id=EOS_TOKEN_ID,
133+
)
159134
requests.append(request)
160135
return requests
161136

0 commit comments

Comments
 (0)