Upgrade vLLM to v0.9.2

Yikun · Yikun · commit c6ffd0a1e10d · 2025-07-07T23:21:17.000+08:00
Signed-off-by: Yikun Jiang &lt;yikunkero@gmail.com&gt;
diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
@@ -37,6 +37,7 @@ on:
         # Current supported vLLM versions
         options:
           - main
+          - v0.9.2
           - v0.9.1
           - v0.7.3
       vllm-ascend-version:
@@ -163,7 +164,7 @@ jobs:
           repository: vllm-project/vllm
           path: ./vllm-empty
           # Please also update this when bump matched version
-          ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
+          ref: ${{ github.event.inputs.vllm-version || 'v0.9.2rc2' }}
 
       - name: Install vllm-project/vllm from source
         working-directory: ./vllm-empty
diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml
@@ -50,7 +50,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - vllm_branch: v0.9.1
+          - vllm_branch: v0.9.2rc2
             vllm_ascend_branch: main
             vllm_use_v1: 1
       max-parallel: 1
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
@@ -144,7 +144,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [main, v0.9.1]
+        vllm_version: [main, v0.9.2rc2]
     steps:
       - name: Install packages
         run: |
@@ -201,7 +201,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-arm64-npu-1]
-        vllm_version: [main, v0.9.1]
+        vllm_version: [main, v0.9.2rc2]
     name: singlecard e2e test
     runs-on: ${{ matrix.os }}
     container:
@@ -302,7 +302,7 @@ jobs:
       max-parallel: 1
       matrix:
         os: [linux-arm64-npu-4]
-        vllm_version: [main, v0.9.1]
+        vllm_version: [main, v0.9.2rc2]
     name: multicard e2e test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_long_term.yaml b/.github/workflows/vllm_ascend_test_long_term.yaml
@@ -43,7 +43,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-arm64-npu-1, linux-arm64-npu-4]
-        vllm_version: [main, v0.9.1]
+        vllm_version: [main, v0.9.2rc2]
     name: vLLM Ascend long term test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_pd.yaml b/.github/workflows/vllm_ascend_test_pd.yaml
@@ -44,7 +44,7 @@ jobs:
         vllm_verison: [
             # revert me when V1 disaggregation prefill is merged in main
             # main, 
-            v0.9.1
+            v0.9.2rc2
           ]
     name: vLLM Ascend prefilling decoding disaggregation test
     runs-on: linux-arm64-npu-static-8
diff --git a/Dockerfile b/Dockerfile
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.9.1
+ARG VLLM_TAG=v0.9.2rc2
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.9.1
+ARG VLLM_TAG=v0.9.2rc2
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.9.1
+ARG VLLM_TAG=v0.9.2rc2
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.9.1
+ARG VLLM_TAG=v0.9.2rc2
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/docs/source/community/versioning_policy.md b/docs/source/community/versioning_policy.md
@@ -74,8 +74,8 @@ Usually, each minor version of vLLM (such as 0.7) will correspond to a vLLM Asce
 
 | Branch     | Status       | Note                                 |
 |------------|--------------|--------------------------------------|
-| main       | Maintained   | CI commitment for vLLM main branch and vLLM 0.9.x branch   |
-| v0.9.1-dev | Maintained   | CI commitment for vLLM 0.9.0 and 0.9.1 version |
+| main       | Maintained   | CI commitment for vLLM main branch and vLLM 0.9.2 branch   |
+| v0.9.1-dev | Maintained   | CI commitment for vLLM 0.9.1 version |
 | v0.7.3-dev | Maintained   | CI commitment for vLLM 0.7.3 version |
 | v0.7.1-dev | Unmaintained | Replaced by v0.7.3-dev               |
 
diff --git a/docs/source/user_guide/graph_mode.md b/docs/source/user_guide/graph_mode.md
@@ -12,7 +12,7 @@ From v0.9.1rc1 with V1 Engine, vLLM Ascend will run models in graph mode by defa
 
 There are two kinds for graph mode supported by vLLM Ascend:
 - **ACLGraph**: This is the default graph mode supported by vLLM Ascend. In v0.9.1rc1, only Qwen series models are well tested.
-- **TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek series models are supported. In v0.9.1rc2, we also support PanguProMoe with torchair.
+- **TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek series models are supported. In v0.9.2rc2, we also support PanguProMoe with torchair.
 
 ## Using ACLGraph
 ACLGraph is enabled by default. Take Qwen series models as an example, just set to use V1 Engine is enough.

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ jobs:`
`44`	`44`	`vllm_verison: [`
`45`	`45`	`# revert me when V1 disaggregation prefill is merged in main`
`46`	`46`	`# main,`
`47`		`- v0.9.1`
	`47`	`+ v0.9.2rc2`
`48`	`48`	`]`
`49`	`49`	`name: vLLM Ascend prefilling decoding disaggregation test`
`50`	`50`	`runs-on: linux-arm64-npu-static-8`