Skip to content

Commit 722fce0

Browse files
tlrmchlsmthpatrickvonplaten
authored andcommitted
[Bugfix][EP+DP] Fix internode check (vllm-project#19112)
Signed-off-by: Tyler Michael Smith <tysmith@redhat.com> Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
1 parent c16103f commit 722fce0

File tree

2 files changed

+1
-8
lines changed

2 files changed

+1
-8
lines changed

vllm/distributed/device_communicators/all2all.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,6 @@ def __init__(self, cpu_group):
8484
assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa
8585
super().__init__(cpu_group)
8686

87-
# TODO(tms): Disable pplx-a2a intranode as it fails with the error:
88-
# failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa
89-
self.internode = True
90-
9187
if self.internode:
9288
# inter-node communication needs nvshmem,
9389
# intra-node communication uses p2p mapping directly
@@ -178,7 +174,6 @@ def _make_all2all_kwargs(self) -> dict[Any, Any]:
178174
num_rdma_bytes = 1024 * 1024 * 1024
179175
num_qps_per_rank = self.num_sms // 2
180176
else:
181-
assert self.intranode
182177
num_rdma_bytes = 0
183178
num_qps_per_rank = 1
184179

@@ -243,7 +238,6 @@ def _make_all2all_kwargs(
243238
if self.internode:
244239
num_rdma_bytes = 1024 * 1024 * 1024
245240
else:
246-
assert self.intranode
247241
num_rdma_bytes = deep_ep.Buffer.get_low_latency_rdma_size_hint(
248242
num_max_dispatch_tokens_per_rank=max_num_tokens_per_dp_rank,
249243
hidden=token_hidden_size,

vllm/distributed/device_communicators/base_device_communicator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ def __init__(self, cpu_group):
4949

5050
# all2all communication often has separate implementations for
5151
# intra-node and inter-node communication
52-
self.intranode = in_the_same_node_as(cpu_group, source_rank=0)
53-
self.internode = not self.intranode
52+
self.internode = not all(in_the_same_node_as(cpu_group, source_rank=0))
5453

5554
def get_handle(self, kwargs):
5655
# get a handle for the all2all communication,

0 commit comments

Comments
 (0)