diff --git a/examples/grpo_actor.py b/examples/grpo_actor.py index 5992f473..52c5dde1 100644 --- a/examples/grpo_actor.py +++ b/examples/grpo_actor.py @@ -13,9 +13,8 @@ import torch import torch.nn as nn import torch.optim as optim -from monarch.actor_mesh import Actor, ActorMeshRef, endpoint -from monarch.proc_mesh import proc_mesh +from monarch.actor import Actor, ActorMeshRef, endpoint, proc_mesh from monarch.rdma import RDMABuffer from torch.distributions import Categorical, kl_divergence diff --git a/examples/notebooks/ping_pong.ipynb b/examples/notebooks/ping_pong.ipynb index e6ae862d..6675587e 100644 --- a/examples/notebooks/ping_pong.ipynb +++ b/examples/notebooks/ping_pong.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "source": [ "## Hello World\n", - "Actors are spawned in Process meshes via the `monarch.proc_mesh` API. For those familiar with distributed systems, it can be helpful to think of each Actor as a server with endpoints that can be called." + "Actors are spawned in Process meshes via the `monarch.actor` API. For those familiar with distributed systems, it can be helpful to think of each Actor as a server with endpoints that can be called." ] }, { @@ -43,8 +43,8 @@ "source": [ "import asyncio\n", "\n", - "from monarch.proc_mesh import proc_mesh, ProcMesh\n", - "from monarch.actor_mesh import Actor, endpoint, current_rank\n", + "from monarch.actor import proc_mesh, ProcMesh\n", + "from monarch.actor import Actor, endpoint, current_rank\n", "\n", "NUM_ACTORS=4\n", "\n", @@ -168,8 +168,8 @@ "source": [ "import asyncio\n", "\n", - "from monarch.proc_mesh import proc_mesh, ProcMesh\n", - "from monarch.actor_mesh import Actor, endpoint, current_rank\n", + "from monarch.actor import proc_mesh, ProcMesh\n", + "from monarch.actor import Actor, endpoint, current_rank\n", "\n", "class ExampleActor(Actor):\n", " def __init__(self, actor_name):\n", diff --git a/examples/notebooks/spmd_ddp.ipynb b/examples/notebooks/spmd_ddp.ipynb index c9e930dd..6036bb42 100644 --- a/examples/notebooks/spmd_ddp.ipynb +++ b/examples/notebooks/spmd_ddp.ipynb @@ -24,8 +24,8 @@ "import torch.nn as nn\n", "import torch.optim as optim\n", "\n", - "from monarch.proc_mesh import proc_mesh\n", - "from monarch.actor_mesh import Actor, current_rank, endpoint\n", + "from monarch.actor import proc_mesh\n", + "from monarch.actor import Actor, current_rank, endpoint\n", "\n", "from torch.nn.parallel import DistributedDataParallel as DDP\n", "\n", diff --git a/python/monarch/_src/actor/proc_mesh.py b/python/monarch/_src/actor/proc_mesh.py index d2745013..d0c3fc79 100644 --- a/python/monarch/_src/actor/proc_mesh.py +++ b/python/monarch/_src/actor/proc_mesh.py @@ -22,8 +22,6 @@ TypeVar, ) -from monarch._rust_bindings import has_tensor_engine - from monarch._rust_bindings.hyperactor_extension.alloc import ( # @manual=//monarch/monarch_extension:monarch_extension # @manual=//monarch/monarch_extension:monarch_extension Alloc, AllocConstraints, @@ -44,11 +42,21 @@ from monarch._src.actor.future import Future from monarch._src.actor.shape import MeshTrait +HAS_TENSOR_ENGINE = False +try: + # TODO: while the tensor_engine submodule doesn't exist yet, use the + # available of monarch.rdma as a proxy. + # type: ignore + from monarch.rdma import RDMAManager # @manual + + HAS_TENSOR_ENGINE = True +except ImportError: + pass + if TYPE_CHECKING: Tensor = Any DeviceMesh = Any - RDMAManager = Any T = TypeVar("T") @@ -84,10 +92,7 @@ def __init__( self._auto_reload_actor: Optional[AutoReloadActor] = None self._maybe_device_mesh: Optional["DeviceMesh"] = _device_mesh self._stopped = False - if _mock_shape is None and has_tensor_engine(): - # type: ignore[21] - from monarch.rdma import RDMAManager # @manual - + if _mock_shape is None and HAS_TENSOR_ENGINE: # type: ignore[21] self._rdma_manager = self._spawn_blocking("rdma_manager", RDMAManager) @@ -192,7 +197,7 @@ async def _spawn_nonblocking( @property def _device_mesh(self) -> "DeviceMesh": - if not has_tensor_engine(): + if not HAS_TENSOR_ENGINE: raise RuntimeError( "DeviceMesh is not available because tensor_engine was not compiled (USE_TENSOR_ENGINE=0)" ) @@ -308,7 +313,7 @@ def local_proc_mesh(*, gpus: Optional[int] = None, hosts: int = 1) -> Future[Pro ) -_BOOTSTRAP_MAIN = "monarch.bootstrap_main" +_BOOTSTRAP_MAIN = "monarch._src.actor.bootstrap_main" def _get_bootstrap_args() -> tuple[str, Optional[list[str]], dict[str, str]]: diff --git a/python/monarch/_testing.py b/python/monarch/_testing.py index 2e9a9475..052992ff 100644 --- a/python/monarch/_testing.py +++ b/python/monarch/_testing.py @@ -14,12 +14,12 @@ import monarch_supervisor from monarch._src.actor.shape import NDSlice +from monarch.actor import proc_mesh, ProcMesh from monarch.common.client import Client from monarch.common.device_mesh import DeviceMesh from monarch.common.invocation import DeviceException, RemoteException from monarch.controller.backend import ProcessBackend from monarch.mesh_controller import spawn_tensor_engine -from monarch.proc_mesh import proc_mesh, ProcMesh from monarch.python_local_mesh import PythonLocalContext from monarch.rust_local_mesh import ( local_mesh, diff --git a/python/monarch/actor/__init__.py b/python/monarch/actor/__init__.py index 582a6f07..a2720198 100644 --- a/python/monarch/actor/__init__.py +++ b/python/monarch/actor/__init__.py @@ -12,27 +12,34 @@ Accumulator, Actor, ActorError, + ActorMeshRef, current_actor_name, current_rank, current_size, endpoint, MonarchContext, + Point, + send, ValueMesh, ) from monarch._src.actor.future import Future -from monarch._src.actor.proc_mesh import proc_mesh, ProcMesh +from monarch._src.actor.proc_mesh import local_proc_mesh, proc_mesh, ProcMesh __all__ = [ "Accumulator", "Actor", "ActorError", + "ActorMeshRef", "current_actor_name", "current_rank", "current_size", "endpoint", + "Future", + "local_proc_mesh", "MonarchContext", - "ValueMesh", + "Point", "proc_mesh", "ProcMesh", - "Future", + "send", + "ValueMesh", ] diff --git a/python/monarch/mesh_controller.py b/python/monarch/mesh_controller.py index 78c7739b..04225fe1 100644 --- a/python/monarch/mesh_controller.py +++ b/python/monarch/mesh_controller.py @@ -34,8 +34,8 @@ from monarch._rust_bindings.monarch_hyperactor.proc import ( # @manual=//monarch/monarch_extension:monarch_extension ActorId, ) +from monarch._src.actor.actor_mesh import Port, PortTuple from monarch._src.actor.shape import NDSlice -from monarch.actor_mesh import Port, PortTuple from monarch.common import messages from monarch.common.controller_api import TController from monarch.common.invocation import Seq diff --git a/python/tests/error_test_binary.py b/python/tests/error_test_binary.py index 28024dba..c3a58df4 100644 --- a/python/tests/error_test_binary.py +++ b/python/tests/error_test_binary.py @@ -13,8 +13,7 @@ from monarch._rust_bindings.monarch_extension.panic import panicking_function -from monarch.actor_mesh import Actor, endpoint, send -from monarch.proc_mesh import proc_mesh +from monarch.actor import Actor, endpoint, proc_mesh, send class ErrorActor(Actor): diff --git a/python/tests/test_actor_error.py b/python/tests/test_actor_error.py index 27dd918e..0f5c523e 100644 --- a/python/tests/test_actor_error.py +++ b/python/tests/test_actor_error.py @@ -10,8 +10,7 @@ import pytest from monarch._rust_bindings.monarch_hyperactor.proc_mesh import ProcEvent -from monarch.actor_mesh import Actor, ActorError, endpoint, send -from monarch.proc_mesh import local_proc_mesh, proc_mesh +from monarch.actor import Actor, ActorError, endpoint, local_proc_mesh, proc_mesh class ExceptionActor(Actor): diff --git a/python/tests/test_allocator.py b/python/tests/test_allocator.py index 7de4b3b8..e875826e 100644 --- a/python/tests/test_allocator.py +++ b/python/tests/test_allocator.py @@ -38,8 +38,14 @@ StaticRemoteAllocInitializer, TorchXRemoteAllocInitializer, ) -from monarch.actor_mesh import Actor, current_rank, current_size, endpoint, ValueMesh -from monarch.proc_mesh import ProcMesh +from monarch.actor import ( + Actor, + current_rank, + current_size, + endpoint, + ProcMesh, + ValueMesh, +) from monarch.tools.mesh_spec import MeshSpec, ServerSpec from monarch.tools.network import get_sockaddr diff --git a/python/tests/test_python_actors.py b/python/tests/test_python_actors.py index c8052a80..db597bb9 100644 --- a/python/tests/test_python_actors.py +++ b/python/tests/test_python_actors.py @@ -12,8 +12,6 @@ from types import ModuleType from unittest.mock import AsyncMock, patch -import monarch - import pytest import torch @@ -23,6 +21,7 @@ from monarch.actor import ( Accumulator, Actor, + ActorError, current_actor_name, current_rank, current_size, @@ -545,7 +544,7 @@ def _patch_output(msg): breakpoints = await debug_client.list.call_one() assert len(breakpoints) == 0 - with pytest.raises(monarch.actor.ActorError, match="ValueError: bad rank"): + with pytest.raises(ActorError, match="ValueError: bad rank"): await fut diff --git a/python/tests/test_tensor_engine.py b/python/tests/test_tensor_engine.py index 6098c32c..a7e5122b 100644 --- a/python/tests/test_tensor_engine.py +++ b/python/tests/test_tensor_engine.py @@ -8,8 +8,8 @@ import pytest import torch from monarch import remote +from monarch.actor import proc_mesh from monarch.mesh_controller import spawn_tensor_engine -from monarch.proc_mesh import proc_mesh two_gpu = pytest.mark.skipif( diff --git a/setup.py b/setup.py index cd509910..4e0ed2f6 100644 --- a/setup.py +++ b/setup.py @@ -166,7 +166,7 @@ def run(self): entry_points={ "console_scripts": [ "monarch=monarch.tools.cli:main", - "monarch_bootstrap=monarch.bootstrap_main:invoke_main", + "monarch_bootstrap=monarch._src.actor.bootstrap_main:invoke_main", ], }, rust_extensions=rust_extensions,