Skip to content

Commit 715da90

Browse files
committed
Revert "[DP][V1] Fix rank set in DP scenario & Bump torch-npu version to 2.5.1.post1.dev20250528 (vllm-project#1235)"
This reverts commit 96fa7ff.
1 parent 327ad9c commit 715da90

19 files changed

+47
-114
lines changed

.github/workflows/accuracy_test.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,6 @@ jobs:
173173

174174
- name: Install vllm-project/vllm-ascend
175175
working-directory: ./vllm-ascend
176-
env:
177-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
178176
run: |
179177
pip install -r requirements-dev.txt
180178
pip install -e .

.github/workflows/image_openeuler.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,6 @@ on:
1919
- '.github/workflows/image_openeuler.yml'
2020
- 'Dockerfile.openEuler'
2121
- 'vllm_ascend/**'
22-
- 'setup.py'
23-
- 'pyproject.toml'
24-
- 'requirements.txt'
25-
- 'cmake/**'
26-
- 'CMakeLists.txt'
27-
- 'csrc/**'
2822
push:
2923
# Publish image when tagging, the Dockerfile in tag will be build as tag image
3024
branches:

.github/workflows/image_ubuntu.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,6 @@ on:
1919
- '.github/workflows/image_ubuntu.yml'
2020
- 'Dockerfile'
2121
- 'vllm_ascend/**'
22-
- 'setup.py'
23-
- 'pyproject.toml'
24-
- 'requirements.txt'
25-
- 'cmake/**'
26-
- 'CMakeLists.txt'
27-
- 'csrc/**'
2822
push:
2923
# Publish image when tagging, the Dockerfile in tag will be build as tag image
3024
branches:

.github/workflows/nightly_benchmarks.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ jobs:
115115
VLLM_TARGET_DEVICE=empty pip install -e .
116116
117117
- name: Install vllm-project/vllm-ascend
118-
env:
119-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
120118
run: |
121119
pip install -e .
122120
pip install -r benchmarks/requirements-bench.txt

.github/workflows/vllm_ascend_test.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ jobs:
165165

166166
- name: Install vllm-project/vllm-ascend
167167
run: |
168-
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
169168
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
170169
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
171170
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
@@ -239,8 +238,6 @@ jobs:
239238
VLLM_TARGET_DEVICE=empty pip install -e .
240239
241240
- name: Install vllm-project/vllm-ascend
242-
env:
243-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
244241
run: |
245242
pip install -r requirements-dev.txt
246243
pip install -v -e .
@@ -338,8 +335,6 @@ jobs:
338335
VLLM_TARGET_DEVICE=empty pip install -e .
339336
340337
- name: Install vllm-project/vllm-ascend
341-
env:
342-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
343338
run: |
344339
pip install -r requirements-dev.txt
345340
pip install -v -e .

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,6 @@ jobs:
8888
VLLM_TARGET_DEVICE=empty pip install -e .
8989
9090
- name: Install vllm-project/vllm-ascend
91-
env:
92-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
9391
run: |
9492
pip install -r requirements-dev.txt
9593
pip install -v -e .

.github/workflows/vllm_ascend_test_pd.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,6 @@ jobs:
9797
VLLM_TARGET_DEVICE=empty pip install -e .
9898
9999
- name: Install vllm-project/vllm-ascend
100-
env:
101-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
102100
run: |
103101
pip install -r requirements-dev.txt
104102
pip install -v -e .

Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm
4646

4747
# Install vllm-ascend
4848
# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH
49-
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
50-
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
49+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
5150
source /usr/local/Ascend/nnal/atb/set_env.sh && \
5251
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
5352
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \

Dockerfile.openEuler

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ -
4343
python3 -m pip cache purge
4444

4545
# Install vllm-ascend
46-
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
47-
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
46+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
4847
source /usr/local/Ascend/nnal/atb/set_env.sh && \
4948
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
5049
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
3838
- Software:
3939
* Python >= 3.9, < 3.12
4040
* CANN >= 8.1.RC1
41-
* PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1.dev20250528
41+
* PyTorch >= 2.5.1, torch-npu >= 2.5.1
4242
* vLLM (the same version as vllm-ascend)
4343

4444
## Getting Started

README.zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
3939
- 软件:
4040
* Python >= 3.9, < 3.12
4141
* CANN >= 8.1.RC1
42-
* PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1.dev20250528
42+
* PyTorch >= 2.5.1, torch-npu >= 2.5.1
4343
* vLLM (与vllm-ascend版本一致)
4444

4545
## 开始使用

docs/source/installation.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ This document describes how to install vllm-ascend manually.
99
- A hardware with Ascend NPU. It's usually the Atlas 800 A2 series.
1010
- Software:
1111

12-
| Software | Supported version | Note |
13-
|---------------|----------------------------------|-------------------------------------------|
14-
| CANN | >= 8.1.RC1 | Required for vllm-ascend and torch-npu |
15-
| torch-npu | >= 2.5.1.post1.dev20250528 | Required for vllm-ascend |
16-
| torch | >= 2.5.1 | Required for torch-npu and vllm |
12+
| Software | Supported version | Note |
13+
|-----------|-------------------|----------------------------------------|
14+
| CANN | >= 8.1.RC1 | Required for vllm-ascend and torch-npu |
15+
| torch-npu | >= 2.5.1 | Required for vllm-ascend |
16+
| torch | >= 2.5.1 | Required for torch-npu and vllm |
1717

1818
You have 2 way to install:
1919
- **Using pip**: first prepare env manually or via CANN image, then install `vllm-ascend` using pip.
@@ -156,7 +156,6 @@ cd ..
156156
# Install vLLM Ascend
157157
git clone --depth 1 --branch |vllm_ascend_version| https://github.com/vllm-project/vllm-ascend.git
158158
cd vllm-ascend
159-
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
160159
pip install -v -e .
161160
cd ..
162161
```

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ requires = [
1212
"scipy",
1313
"setuptools>=64",
1414
"setuptools-scm>=8",
15-
"torch-npu==2.5.1.post1.dev20250528",
15+
"torch-npu==2.5.1",
1616
"torch>=2.5.1",
1717
"torchvision<0.21.0",
1818
"wheel",

requirements.txt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pyyaml
1010
scipy
1111
setuptools>=64
1212
setuptools-scm>=8
13+
torch-npu==2.5.1
1314
torch>=2.5.1
1415
torchvision<0.21.0
1516
wheel
@@ -20,8 +21,3 @@ quart
2021

2122
# Required for N-gram speculative decoding
2223
numba
23-
24-
# Install torch_npu
25-
--pre
26-
--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
27-
torch-npu==2.5.1.post1.dev20250528

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def configure(self, ext: CMakeExtension) -> None:
152152
# if pybind11 is installed via pip
153153
pybind11_cmake_path = (subprocess.check_output(
154154
[python_executable, "-m", "pybind11",
155-
"--cmakedir"]).decode().strip())
155+
"--cmake"]).decode().strip())
156156
except subprocess.CalledProcessError as e:
157157
# else specify pybind11 path installed from source code on CI container
158158
raise RuntimeError(f"CMake configuration failed: {e}")

tests/multicard/test_data_parallel.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

vllm_ascend/patch/__init__.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,16 @@
4747
# Related PR (if no, explain why):
4848
# Future Plan:
4949
# Remove those patch when vllm merged them
50-
# 2. `vllm.config.ParallelConfig.get_next_dp_init_port`
50+
# 2. `vllm.v1.engine.core.DPEngineCoreProc._init_data_parallel`
51+
# Why:
52+
# There is some bug for ASCEND_RT_VISIBLE_DEVICES usage.
53+
# How:
54+
# The ASCEND_RT_VISIBLE_DEVICES related code is dropped.
55+
# Related PR (if no, explain why):
56+
# No, this is a bug for vllm ascend
57+
# Future Plan:
58+
# Remove this patch once ASCEND_RT_VISIBLE_DEVICES bug is fixed.
59+
# 3. `vllm.config.ParallelConfig.get_next_dp_init_port`
5160
# Why:
5261
# vllm doesn't support get port from environment.
5362
# How:
@@ -56,7 +65,7 @@
5665
# Need a PR to vllm to support get port from environment.
5766
# Future Plan:
5867
# Remove those patch when vllm merged them
59-
# 3. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
68+
# 4. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
6069
# Why:
6170
# vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to
6271
# get better performance

vllm_ascend/patch/platform/patch_common/patch_distributed.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@
2222
import vllm.distributed
2323
import vllm.envs as envs
2424
from torch.distributed import ProcessGroup
25-
from vllm.config import ParallelConfig
25+
from vllm.config import ParallelConfig, VllmConfig
2626
from vllm.distributed.utils import \
2727
stateless_init_torch_distributed_process_group
28+
from vllm.v1.engine.core import DPEngineCoreProc
2829

2930
from vllm_ascend.utils import NullHandle, is_310p
3031

@@ -81,7 +82,22 @@ def stateless_init_dp_group(self) -> "ProcessGroup":
8182
return dp_group
8283

8384

85+
def _init_data_parallel(self, vllm_config: VllmConfig):
86+
# Configure NPUs and stateless process group for data parallel.
87+
dp_rank = vllm_config.parallel_config.data_parallel_rank
88+
dp_size = vllm_config.parallel_config.data_parallel_size
89+
local_dp_rank = vllm_config.parallel_config.data_parallel_rank_local
90+
91+
assert dp_size > 1
92+
assert 0 <= local_dp_rank <= dp_rank < dp_size
93+
94+
self.local_dp_rank = local_dp_rank
95+
self.dp_group = vllm_config.parallel_config.stateless_init_dp_group()
96+
self.current_wave = 0
97+
98+
8499
vllm.distributed.parallel_state.destroy_model_parallel = ascend_destroy_model_parallel
100+
DPEngineCoreProc._init_data_parallel = _init_data_parallel
85101
ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port
86102
ParallelConfig.stateless_init_dp_group = stateless_init_dp_group
87103

vllm_ascend/worker/worker_v1.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ def __init__(
7474
distributed_init_method=distributed_init_method,
7575
is_driver_worker=is_driver_worker)
7676

77+
# NOTE(Yizhou): Since we do not set ASCEND_RT_VISIBLE_DEVICES in
78+
# vllm_ascend, we need to set the device id manually.
79+
local_dp_rank = self.vllm_config.parallel_config.data_parallel_rank_local
80+
world_size = self.vllm_config.parallel_config.world_size
81+
self.local_rank_across_dp = local_dp_rank * world_size + self.local_rank
82+
7783
# Try to import mindie_turbo to accelerate vLLM inference.
7884
try_register_lib(
7985
"mindie_turbo",
@@ -117,7 +123,7 @@ def initialize_cache(self, num_gpu_blocks: int,
117123

118124
def init_device(self):
119125
if self.device_config.device.type == "npu":
120-
self.device = torch.device(f"npu:{self.local_rank}")
126+
self.device = torch.device(f"npu:{self.local_rank_across_dp}")
121127
NPUPlatform.set_device(self.device)
122128
NPUPlatform.empty_cache()
123129
self.init_npu_memory = NPUPlatform.mem_get_info()[0]

0 commit comments

Comments
 (0)