From d975f9e08d6bac1476ce576b7fd48fa0d0c2f62f Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Fri, 30 May 2025 01:01:10 +0000 Subject: [PATCH 1/4] Turn off pplx intranode Signed-off-by: Tyler Michael Smith --- vllm/distributed/device_communicators/all2all.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index a250ec89cd5b..b95ee6450f7e 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -83,6 +83,9 @@ def __init__(self, cpu_group): assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa super().__init__(cpu_group) + # Intranode doesn't work yet. + self.internode = True + if self.internode: # inter-node communication needs nvshmem, # intra-node communication uses p2p mapping directly From 3ef240a0c5d7afd6a17054ed5b554fc0174b1507 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Fri, 30 May 2025 01:13:00 +0000 Subject: [PATCH 2/4] group_name Signed-off-by: Tyler Michael Smith --- vllm/model_executor/layers/fused_moe/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 838a7c24b642..f530dbc336ec 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -269,7 +269,7 @@ def init_prepare_finalize(self, moe: MoEConfig, hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else ( (moe.hidden_dim + moe.block_size - 1) // moe.block_size * torch.float32.itemsize)), - group_name=all2all_manager.cpu_group.group_name, + #group_name=all2all_manager.cpu_group.group_name, ) handle = all2all_manager.get_handle(all_to_all_args) From c6626822024225d4c1164e148086ed1eda8a781a Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 2 Jun 2025 14:32:50 -0400 Subject: [PATCH 3/4] Cleanup Signed-off-by: Tyler Michael Smith --- vllm/distributed/device_communicators/all2all.py | 3 ++- vllm/model_executor/layers/fused_moe/layer.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index b95ee6450f7e..7177754a3711 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -83,7 +83,8 @@ def __init__(self, cpu_group): assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa super().__init__(cpu_group) - # Intranode doesn't work yet. + # TODO(tms): Disable pplx-a2a intranode as it fails with the error: + # failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa self.internode = True if self.internode: diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 1ac65b92c119..adb9190700f4 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -269,9 +269,12 @@ def init_prepare_finalize(self, moe: MoEConfig, hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else ( (moe.hidden_dim + moe.block_size - 1) // moe.block_size * torch.float32.itemsize)), - #group_name=all2all_manager.cpu_group.group_name, ) + # Intranode pplx a2a takes a group name while internode does not. + if not all2all_manager.internode: + all_to_all_args["group_name"] = all2all_manager.cpu_group.group_name + handle = all2all_manager.get_handle(all_to_all_args) prepare_finalize = PplxPrepareAndFinalize( From 73167212f0e724e88e2daf6c2243ec53d6204bd0 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 2 Jun 2025 14:44:57 -0400 Subject: [PATCH 4/4] precommit Signed-off-by: Tyler Michael Smith --- vllm/model_executor/layers/fused_moe/layer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index adb9190700f4..1e193c909f61 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -273,7 +273,8 @@ def init_prepare_finalize(self, moe: MoEConfig, # Intranode pplx a2a takes a group name while internode does not. if not all2all_manager.internode: - all_to_all_args["group_name"] = all2all_manager.cpu_group.group_name + all_to_all_args[ + "group_name"] = all2all_manager.cpu_group.group_name handle = all2all_manager.get_handle(all_to_all_args)