Skip to content

Commit ab275d4

Browse files
pzhan9facebook-github-bot
authored andcommitted
Pass comm_actor_ref in ActorMeshRef (#426)
Summary: Pull Request resolved: #426 tl;dr `comm actor`'s world id can be different from `ProcMesh`'s `world_id`. We should not attest it. `Comm actor`, which is spawned through `ProcessAllocator`, gets its world ID from a uuid: https://www.internalfb.com/code/fbsource/[a29e91672b216c626792ec9406e77a922b9d88dc]/fbcode/monarch/hyperactor_mesh/src/alloc/process.rs?lines=108-109%2C420 `ProcMesh` gets its `world_id` from the `Alloc` it uses. https://www.internalfb.com/code/fbsource/[a29e91672b216c626792ec9406e77a922b9d88dc]/fbcode/monarch/hyperactor_mesh/src/proc_mesh.rs?lines=95%2C99-101%2C300%2C302%2C317 `MastAllocator` uses `RemoteProcessAlloc`, and explicitly uses the `task_group_name` as its `world_id`. https://www.internalfb.com/code/fbsource/[a29e91672b216c626792ec9406e77a922b9d88dc]/fbcode/monarch/hyperactor_meta/src/alloc.rs?lines=274-275%2C284%2C394%2C397%2C399 For example, this is a log I get from running a test. `ProcMesh` has `test_task_group`, comm actor has `_1C8Rf4TR6jZe`. > I0703 09:09:51.966694 416137 fbcode/monarch/hyperactor_mesh/src/actor_mesh.rs:154] binding actor mesh ProcMesh { world_id: WorldId("test_task_group"), shape: Shape { labels: ["hosts", "gpus"], slice: Slice { offset: 0, sizes: [1, 2], strides: [2, 1] } }, ranks: [(ProcId(WorldId("_1C8Rf4TR6jZe"), 0), (Unix(Bound("vujWCqQQ55kCU2NQQlCC2q3N" (abstract))), ActorRef { actor_id: ActorId(ProcId(WorldId("_1C8Rf4TR6jZe"), 0), "mesh", 0), phantom: PhantomData<hyperactor_mesh::proc_mesh::mesh_agent::MeshAgent> })), (ProcId(WorldId("_1C8Rf4TR6jZe"), 1), (Unix(Bound("vujWCqQQ55kCU2NQQlCC2q3N" (abstract))), ActorRef { actor_id: ActorId(ProcId(WorldId("_1C8Rf4TR6jZe"), 1), "mesh", 0), phantom: PhantomData<hyperactor_mesh::proc_mesh::mesh_agent::MeshAgent> }))], client: Mailbox { inner: State { actor_id: ActorId(ProcId(WorldId("test_task_group_manager"), 0), "client", 0), open_ports: [17082645012990790806], next_port: 1036 } }, comm_actors: [ActorRef { actor_id: ActorId(ProcId(WorldId("_1C8Rf4TR6jZe"), 0), "comm", 0), phantom: PhantomData<hyperactor_mesh::comm::CommActor> }, ActorRef { actor_id: ActorId(ProcId(WorldId("_1C8Rf4TR6jZe"), 1), "comm", 0), phantom: PhantomData<hyperactor_mesh::comm::CommActor> }] } Reviewed By: mariusae Differential Revision: D77737951 fbshipit-source-id: 4f70aad14e984b1bc89bea79bf3a3ed8218048e0
1 parent 04af59f commit ab275d4

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

hyperactor_mesh/src/actor_mesh.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ pub trait ActorMesh: Mesh {
155155
),
156156
self.shape().clone(),
157157
self.proc_mesh().shape().clone(),
158+
self.proc_mesh().comm_actor().clone(),
158159
)
159160
}
160161
}

hyperactor_mesh/src/reference.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,9 @@ use std::cmp::PartialOrd;
1111
use std::hash::Hash;
1212
use std::marker::PhantomData;
1313

14-
use hyperactor::ActorId;
1514
use hyperactor::ActorRef;
1615
use hyperactor::Named;
17-
use hyperactor::ProcId;
1816
use hyperactor::RemoteHandles;
19-
use hyperactor::WorldId;
2017
use hyperactor::actor::RemoteActor;
2118
use hyperactor::cap;
2219
use hyperactor::message::Castable;
@@ -79,6 +76,8 @@ pub struct ActorMeshRef<A: RemoteActor> {
7976
shape: Shape,
8077
/// The shape of the underlying Proc Mesh.
8178
proc_mesh_shape: Shape,
79+
/// The reference to the comm actor of the underlying Proc Mesh.
80+
comm_actor_ref: ActorRef<CommActor>,
8281
phantom: PhantomData<A>,
8382
}
8483

@@ -87,11 +86,17 @@ impl<A: RemoteActor> ActorMeshRef<A> {
8786
/// typed reference. This is usually invoked to provide a guarantee
8887
/// that an externally-provided mesh ID (e.g., through a command
8988
/// line argument) is a valid reference.
90-
pub(crate) fn attest(mesh_id: ActorMeshId, shape: Shape, proc_mesh_shape: Shape) -> Self {
89+
pub(crate) fn attest(
90+
mesh_id: ActorMeshId,
91+
shape: Shape,
92+
proc_mesh_shape: Shape,
93+
comm_actor_ref: ActorRef<CommActor>,
94+
) -> Self {
9195
Self {
9296
mesh_id,
9397
shape,
9498
proc_mesh_shape,
99+
comm_actor_ref,
95100
phantom: PhantomData,
96101
}
97102
}
@@ -127,21 +132,16 @@ impl<A: RemoteActor> ActorMeshRef<A> {
127132
where
128133
A: RemoteHandles<M> + RemoteHandles<IndexedErasedUnbound<M>>,
129134
{
130-
let world_id = WorldId(self.mesh_id.0.0.clone());
131-
let comm_actor_id = ActorId(ProcId(world_id, 0), "comm".to_string(), 0);
132-
133135
actor_mesh_cast::<M, A>(
134136
caps,
135137
self.shape(),
136138
self.proc_mesh_shape(),
137139
self.name(),
138140
caps.mailbox().actor_id(),
139-
&ActorRef::<CommActor>::attest(comm_actor_id),
141+
&self.comm_actor_ref,
140142
selection,
141143
message,
142-
)?;
143-
144-
Ok(())
144+
)
145145
}
146146
}
147147

@@ -151,6 +151,7 @@ impl<A: RemoteActor> Clone for ActorMeshRef<A> {
151151
mesh_id: self.mesh_id.clone(),
152152
shape: self.shape.clone(),
153153
proc_mesh_shape: self.proc_mesh_shape.clone(),
154+
comm_actor_ref: self.comm_actor_ref.clone(),
154155
phantom: PhantomData,
155156
}
156157
}
@@ -211,6 +212,7 @@ mod tests {
211212
mesh_id: ActorMeshId,
212213
shape: Shape,
213214
proc_mesh_shape: Shape,
215+
comm_actor_ref: ActorRef<CommActor>,
214216
}
215217

216218
#[async_trait]
@@ -223,6 +225,7 @@ mod tests {
223225
params.mesh_id,
224226
params.shape,
225227
params.proc_mesh_shape,
228+
params.comm_actor_ref,
226229
),
227230
})
228231
}
@@ -284,6 +287,7 @@ mod tests {
284287
),
285288
shape: ping_proc_mesh.shape().clone(),
286289
proc_mesh_shape: ping_proc_mesh.shape().clone(),
290+
comm_actor_ref: ping_proc_mesh.comm_actor().clone(),
287291
},
288292
)
289293
.await
@@ -301,6 +305,7 @@ mod tests {
301305
),
302306
shape: pong_proc_mesh.shape().clone(),
303307
proc_mesh_shape: pong_proc_mesh.shape().clone(),
308+
comm_actor_ref: pong_proc_mesh.comm_actor().clone(),
304309
},
305310
)
306311
.await

0 commit comments

Comments
 (0)