Skip to content

Commit cacde11

Browse files
MengqingCaowxsIcey
authored andcommitted
[DP][V1] Fix rank set in DP scenario & Bump torch-npu version to 2.5.1.post1.dev20250528 (vllm-project#1235)
1. Fix rank set in DP scenario. The new poc version of torch-npu support setting `ASCEND_RT_VISIBLE_DEVICES` dynamically, thus we could use the rank set in `DPEngineCoreProc` directly instead of calculating local rank across dp by hand in the patched `_init_data_parallel` Closes: vllm-project#1170 2. Bump torch-npu version to 2.5.1.post1.dev20250528 Closes: vllm-project#1242 Closes: vllm-project#1232 CI passed with new added test. --------- Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: Icey <1790571317@qq.com> Co-authored-by: Icey <1790571317@qq.com>
1 parent b40af9b commit cacde11

File tree

3 files changed

+69
-12
lines changed

3 files changed

+69
-12
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ requires = [
1212
"scipy",
1313
"setuptools>=64",
1414
"setuptools-scm>=8",
15-
"torch-npu==2.5.1",
15+
"torch-npu==2.5.1.post1.dev20250528",
1616
"torch>=2.5.1",
1717
"torchvision<0.21.0",
1818
"wheel",

tests/multicard/test_data_parallel.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
# Copyright 2023 The vLLM team.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
"""
18+
Compare the outputs of vLLM with and without aclgraph.
19+
20+
Run `pytest tests/multicard/test_data_parallel.py`.
21+
"""
22+
23+
import os
24+
25+
import pytest
26+
27+
from tests.conftest import VllmRunner
28+
from tests.model_utils import check_outputs_equal
29+
30+
MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"]
31+
32+
33+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
34+
reason="Data parallel only support on v1")
35+
@pytest.mark.parametrize("model", MODELS)
36+
@pytest.mark.parametrize("max_tokens", [32])
37+
def test_data_parallel_correctness(
38+
model: str,
39+
max_tokens: int,
40+
) -> None:
41+
example_prompts = [
42+
"Hello, my name is", "The president of the United States is",
43+
"The capital of France is", "The future of AI is"
44+
]
45+
46+
with VllmRunner(model_name=model,
47+
max_model_len=1024,
48+
max_num_seqs=16,
49+
data_parallel_size=2,
50+
distributed_executor_backend="mp") as vllm_model:
51+
vllm_dp_outputs = vllm_model.generate_greedy(example_prompts,
52+
max_tokens)
53+
54+
with VllmRunner(
55+
model_name=model,
56+
max_model_len=1024,
57+
max_num_seqs=16,
58+
) as vllm_model:
59+
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
60+
61+
check_outputs_equal(
62+
outputs_0_lst=vllm_outputs,
63+
outputs_1_lst=vllm_dp_outputs,
64+
name_0="vllm_outputs",
65+
name_1="vllm_dp_outputs",
66+
)

vllm_ascend/patch/__init__.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,7 @@
4747
# Related PR (if no, explain why):
4848
# Future Plan:
4949
# Remove those patch when vllm merged them
50-
# 2. `vllm.v1.engine.core.DPEngineCoreProc._init_data_parallel`
51-
# Why:
52-
# There is some bug for ASCEND_RT_VISIBLE_DEVICES usage.
53-
# How:
54-
# The ASCEND_RT_VISIBLE_DEVICES related code is dropped.
55-
# Related PR (if no, explain why):
56-
# No, this is a bug for vllm ascend
57-
# Future Plan:
58-
# Remove this patch once ASCEND_RT_VISIBLE_DEVICES bug is fixed.
59-
# 3. `vllm.config.ParallelConfig.get_next_dp_init_port`
50+
# 2. `vllm.config.ParallelConfig.get_next_dp_init_port`
6051
# Why:
6152
# vllm doesn't support get port from environment.
6253
# How:
@@ -65,7 +56,7 @@
6556
# Need a PR to vllm to support get port from environment.
6657
# Future Plan:
6758
# Remove those patch when vllm merged them
68-
# 4. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
59+
# 3. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
6960
# Why:
7061
# vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to
7162
# get better performance

0 commit comments

Comments
 (0)