From d975f9e08d6bac1476ce576b7fd48fa0d0c2f62f Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tysmith@redhat.com>
Date: Fri, 30 May 2025 01:01:10 +0000
Subject: [PATCH 1/4] Turn off pplx intranode

Signed-off-by: Tyler Michael Smith <tysmith@redhat.com>
---
 vllm/distributed/device_communicators/all2all.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
index a250ec89cd5b..b95ee6450f7e 100644
--- a/vllm/distributed/device_communicators/all2all.py
+++ b/vllm/distributed/device_communicators/all2all.py
@@ -83,6 +83,9 @@ def __init__(self, cpu_group):
         assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels."  # noqa
         super().__init__(cpu_group)
 
+        # Intranode doesn't work yet.
+        self.internode = True
+
         if self.internode:
             # inter-node communication needs nvshmem,
             # intra-node communication uses p2p mapping directly

From 3ef240a0c5d7afd6a17054ed5b554fc0174b1507 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tysmith@redhat.com>
Date: Fri, 30 May 2025 01:13:00 +0000
Subject: [PATCH 2/4] group_name

Signed-off-by: Tyler Michael Smith <tysmith@redhat.com>
---
 vllm/model_executor/layers/fused_moe/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index 838a7c24b642..f530dbc336ec 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -269,7 +269,7 @@ def init_prepare_finalize(self, moe: MoEConfig,
                 hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else (
                     (moe.hidden_dim + moe.block_size - 1) // moe.block_size *
                     torch.float32.itemsize)),
-                group_name=all2all_manager.cpu_group.group_name,
+                #group_name=all2all_manager.cpu_group.group_name,
             )
 
             handle = all2all_manager.get_handle(all_to_all_args)

From c6626822024225d4c1164e148086ed1eda8a781a Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Mon, 2 Jun 2025 14:32:50 -0400
Subject: [PATCH 3/4] Cleanup

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 vllm/distributed/device_communicators/all2all.py | 3 ++-
 vllm/model_executor/layers/fused_moe/layer.py    | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
index b95ee6450f7e..7177754a3711 100644
--- a/vllm/distributed/device_communicators/all2all.py
+++ b/vllm/distributed/device_communicators/all2all.py
@@ -83,7 +83,8 @@ def __init__(self, cpu_group):
         assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels."  # noqa
         super().__init__(cpu_group)
 
-        # Intranode doesn't work yet.
+        # TODO(tms): Disable pplx-a2a intranode as it fails with the error:
+        # failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa
         self.internode = True
 
         if self.internode:
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index 1ac65b92c119..adb9190700f4 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -269,9 +269,12 @@ def init_prepare_finalize(self, moe: MoEConfig,
                 hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else (
                     (moe.hidden_dim + moe.block_size - 1) // moe.block_size *
                     torch.float32.itemsize)),
-                #group_name=all2all_manager.cpu_group.group_name,
             )
 
+            # Intranode pplx a2a takes a group name while internode does not.
+            if not all2all_manager.internode:
+                all_to_all_args["group_name"] = all2all_manager.cpu_group.group_name
+
             handle = all2all_manager.get_handle(all_to_all_args)
 
             prepare_finalize = PplxPrepareAndFinalize(

From 73167212f0e724e88e2daf6c2243ec53d6204bd0 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Mon, 2 Jun 2025 14:44:57 -0400
Subject: [PATCH 4/4] precommit

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 vllm/model_executor/layers/fused_moe/layer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index adb9190700f4..1e193c909f61 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -273,7 +273,8 @@ def init_prepare_finalize(self, moe: MoEConfig,
 
             # Intranode pplx a2a takes a group name while internode does not.
             if not all2all_manager.internode:
-                all_to_all_args["group_name"] = all2all_manager.cpu_group.group_name
+                all_to_all_args[
+                    "group_name"] = all2all_manager.cpu_group.group_name
 
             handle = all2all_manager.get_handle(all_to_all_args)