Skip to content

Commit 0a24bba

Browse files
benjipelletierfacebook-github-bot
authored andcommitted
Support single asyncio runtime for python actors (#413)
Summary: Pull Request resolved: #413 Support two asyncio runtime modes in monarch-hyperactor: * shared asyncio event loop * per-actor async event loop (default) This lets us scale past ~1000 Python actors with the LocalAllocator since each actor can run on a shared thread. exported-using-ghexport Reviewed By: mariusae Differential Revision: D77636974 fbshipit-source-id: a74d12776ee3551b470074dddab380a472caabb7
1 parent d07cbce commit 0a24bba

File tree

4 files changed

+63
-23
lines changed

4 files changed

+63
-23
lines changed

monarch_hyperactor/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ anyhow = "1.0.98"
1212
async-trait = "0.1.86"
1313
bincode = "1.3.3"
1414
clap = { version = "4.5.38", features = ["derive", "env", "string", "unicode", "wrap_help"] }
15+
erased-serde = "0.3.27"
1516
fbinit = { version = "0.2.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
1617
hyperactor = { version = "0.0.0", path = "../hyperactor" }
1718
hyperactor_extension = { version = "0.0.0", path = "../hyperactor_extension" }

monarch_hyperactor/src/actor.rs

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use std::future::Future;
1111
use std::future::pending;
1212
use std::pin::Pin;
1313
use std::sync::Arc;
14+
use std::sync::OnceLock;
1415

1516
use async_trait::async_trait;
1617
use hyperactor::Actor;
@@ -41,6 +42,7 @@ use serde_bytes::ByteBuf;
4142
use tokio::sync::Mutex;
4243
use tokio::sync::oneshot;
4344

45+
use crate::config::SHARED_ASYNCIO_RUNTIME;
4446
use crate::mailbox::EitherPortRef;
4547
use crate::mailbox::PyMailbox;
4648
use crate::proc::InstanceWrapper;
@@ -275,8 +277,20 @@ pub(super) struct PythonActor {
275277
pub(super) actor: PyObject,
276278

277279
/// Stores a reference to the Python event loop to run Python coroutines on.
278-
/// We give each PythonActor its own even loop in its own thread.
279-
task_locals: pyo3_async_runtimes::TaskLocals,
280+
/// This is None when using single runtime mode, Some when using per-actor mode.
281+
task_locals: Option<pyo3_async_runtimes::TaskLocals>,
282+
}
283+
284+
impl PythonActor {
285+
/// Get the TaskLocals to use for this actor.
286+
/// Returns either the shared TaskLocals or this actor's own TaskLocals based on configuration.
287+
fn get_task_locals(&self, py: Python) -> &pyo3_async_runtimes::TaskLocals {
288+
self.task_locals.as_ref().unwrap_or_else(|| {
289+
// Use shared TaskLocals
290+
static SHARED_TASK_LOCALS: OnceLock<pyo3_async_runtimes::TaskLocals> = OnceLock::new();
291+
Python::allow_threads(py, || SHARED_TASK_LOCALS.get_or_init(create_task_locals))
292+
})
293+
}
280294
}
281295

282296
#[async_trait]
@@ -289,32 +303,36 @@ impl Actor for PythonActor {
289303
let class_type: &Bound<'_, PyType> = unpickled.downcast()?;
290304
let actor: PyObject = class_type.call0()?.into_py_any(py)?;
291305

292-
// Release the GIL so that the thread spawned below can acquire it.
293-
let task_locals = Python::allow_threads(py, || {
294-
let (tx, rx) = std::sync::mpsc::channel();
295-
let _ = std::thread::spawn(move || {
296-
Python::with_gil(|py| {
297-
let asyncio = Python::import(py, "asyncio").unwrap();
298-
let event_loop = asyncio.call_method0("new_event_loop").unwrap();
299-
asyncio
300-
.call_method1("set_event_loop", (event_loop.clone(),))
301-
.unwrap();
302-
303-
let task_locals = pyo3_async_runtimes::TaskLocals::new(event_loop.clone())
304-
.copy_context(py)
305-
.unwrap();
306-
tx.send(task_locals).unwrap();
307-
event_loop.call_method0("run_forever").unwrap();
308-
});
309-
});
310-
rx.recv().unwrap()
311-
});
306+
// Only create per-actor TaskLocals if not using shared runtime
307+
let task_locals = (!hyperactor::config::global::get(SHARED_ASYNCIO_RUNTIME))
308+
.then(|| Python::allow_threads(py, create_task_locals));
312309

313310
Ok(Self { actor, task_locals })
314311
})?)
315312
}
316313
}
317314

315+
/// Create a new TaskLocals with its own asyncio event loop in a dedicated thread.
316+
fn create_task_locals() -> pyo3_async_runtimes::TaskLocals {
317+
let (tx, rx) = std::sync::mpsc::channel();
318+
let _ = std::thread::spawn(move || {
319+
Python::with_gil(|py| {
320+
let asyncio = Python::import(py, "asyncio").unwrap();
321+
let event_loop = asyncio.call_method0("new_event_loop").unwrap();
322+
asyncio
323+
.call_method1("set_event_loop", (event_loop.clone(),))
324+
.unwrap();
325+
326+
let task_locals = pyo3_async_runtimes::TaskLocals::new(event_loop.clone())
327+
.copy_context(py)
328+
.unwrap();
329+
tx.send(task_locals).unwrap();
330+
event_loop.call_method0("run_forever").unwrap();
331+
});
332+
});
333+
rx.recv().unwrap()
334+
}
335+
318336
// [Panics in async endpoints]
319337
// This class exists to solve a deadlock when an async endpoint calls into some
320338
// Rust code that panics.
@@ -403,7 +421,7 @@ impl Handler<PythonMessage> for PythonActor {
403421
};
404422

405423
pyo3_async_runtimes::into_future_with_locals(
406-
&self.task_locals,
424+
self.get_task_locals(py),
407425
awaitable.into_bound(py),
408426
)
409427
.map_err(|err| err.into())

monarch_hyperactor/src/config.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
//! Configuration for Monarch Hyperactor.
10+
//!
11+
//! This module provides monarch-specific configuration attributes that extend
12+
//! the base hyperactor configuration system.
13+
14+
use hyperactor::attrs::declare_attrs;
15+
16+
// Declare monarch-specific configuration keys
17+
declare_attrs! {
18+
/// Use a single asyncio runtime for all Python actors, rather than one per actor
19+
pub attr SHARED_ASYNCIO_RUNTIME: bool = false;
20+
}

monarch_hyperactor/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pub mod actor_mesh;
1313
pub mod alloc;
1414
pub mod bootstrap;
1515
pub mod channel;
16+
pub mod config;
1617
pub mod mailbox;
1718
pub mod ndslice;
1819
pub mod proc;

0 commit comments

Comments
 (0)