diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index 4acb9f670b..6086bbb457 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -37,6 +37,7 @@ on: # Current supported vLLM versions options: - main + - v0.9.2 - v0.9.1 - v0.7.3 vllm-ascend-version: @@ -163,7 +164,7 @@ jobs: repository: vllm-project/vllm path: ./vllm-empty # Please also update this when bump matched version - ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }} + ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }} - name: Install vllm-project/vllm from source working-directory: ./vllm-empty diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index c23cc724ef..6644e6f9ef 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -50,7 +50,7 @@ jobs: strategy: matrix: include: - - vllm_branch: v0.9.1 + - vllm_branch: v0.9.2 vllm_ascend_branch: main vllm_use_v1: 1 max-parallel: 1 diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index b82659be21..236b10f13c 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -138,13 +138,13 @@ jobs: if: ${{ needs.lint.result == 'success' || github.event_name == 'push' }} runs-on: ubuntu-latest container: - image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 + image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [main, v0.9.1] + vllm_version: [main, v0.9.2] steps: - name: Install packages run: | @@ -201,7 +201,7 @@ jobs: max-parallel: 2 matrix: os: [linux-arm64-npu-1] - vllm_version: [main, v0.9.1] + vllm_version: [main, v0.9.2] name: singlecard e2e test runs-on: ${{ matrix.os }} container: @@ -302,7 +302,7 @@ jobs: max-parallel: 1 matrix: os: [linux-arm64-npu-4] - vllm_version: [main, v0.9.1] + vllm_version: [main, v0.9.2] name: multicard e2e test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_long_term.yaml b/.github/workflows/vllm_ascend_test_long_term.yaml index bcf166941d..9a33b3aca8 100644 --- a/.github/workflows/vllm_ascend_test_long_term.yaml +++ b/.github/workflows/vllm_ascend_test_long_term.yaml @@ -43,7 +43,7 @@ jobs: max-parallel: 2 matrix: os: [linux-arm64-npu-1, linux-arm64-npu-4] - vllm_version: [main, v0.9.1] + vllm_version: [main, v0.9.2] name: vLLM Ascend long term test runs-on: ${{ matrix.os }} container: diff --git a/Dockerfile b/Dockerfile index e6ec009d28..f839568257 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.9.1 +ARG VLLM_TAG=v0.9.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p b/Dockerfile.310p index fffe73eeeb..4342ce1f4d 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.9.1 +ARG VLLM_TAG=v0.9.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index da4718cb29..7d4b23cf6b 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.9.1 +ARG VLLM_TAG=v0.9.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 4e414e04b7..eba0dcb366 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.9.1 +ARG VLLM_TAG=v0.9.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/docs/source/community/versioning_policy.md b/docs/source/community/versioning_policy.md index e20fd1534f..386b2b04c7 100644 --- a/docs/source/community/versioning_policy.md +++ b/docs/source/community/versioning_policy.md @@ -74,8 +74,8 @@ Usually, each minor version of vLLM (such as 0.7) will correspond to a vLLM Asce | Branch | Status | Note | |------------|--------------|--------------------------------------| -| main | Maintained | CI commitment for vLLM main branch and vLLM 0.9.x branch | -| v0.9.1-dev | Maintained | CI commitment for vLLM 0.9.0 and 0.9.1 version | +| main | Maintained | CI commitment for vLLM main branch and vLLM 0.9.2 branch | +| v0.9.1-dev | Maintained | CI commitment for vLLM 0.9.1 version | | v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version | | v0.7.1-dev | Unmaintained | Replaced by v0.7.3-dev | diff --git a/docs/source/user_guide/graph_mode.md b/docs/source/user_guide/graph_mode.md index a390bdbcc9..77e91dd62a 100644 --- a/docs/source/user_guide/graph_mode.md +++ b/docs/source/user_guide/graph_mode.md @@ -12,7 +12,7 @@ From v0.9.1rc1 with V1 Engine, vLLM Ascend will run models in graph mode by defa There are two kinds for graph mode supported by vLLM Ascend: - **ACLGraph**: This is the default graph mode supported by vLLM Ascend. In v0.9.1rc1, only Qwen series models are well tested. -- **TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek series models are supported. In v0.9.1rc2, we also support PanguProMoe with torchair. +- **TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek series models are supported. ## Using ACLGraph ACLGraph is enabled by default. Take Qwen series models as an example, just set to use V1 Engine is enough.