[Refactor] Rename weight updaters

Vincent Moens · Vincent Moens · commit efe938956f87 · 2025-04-07T10:33:33.000+01:00
ghstack-source-id: 8889046 Pull Request resolved: #2892
diff --git a/.github/unittest/linux/scripts/environment.yml b/.github/unittest/linux/scripts/environment.yml
@@ -17,6 +17,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-timeout
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_distributed/scripts/environment.yml b/.github/unittest/linux_distributed/scripts/environment.yml
@@ -16,6 +16,7 @@ dependencies:
     - pytest-mock
     - pytest-instafail
     - pytest-rerunfailures
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_ataridqn/environment.yml b/.github/unittest/linux_libs/scripts_ataridqn/environment.yml
@@ -15,6 +15,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_brax/environment.yml b/.github/unittest/linux_libs/scripts_brax/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_chess/environment.yml b/.github/unittest/linux_libs/scripts_chess/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_d4rl/environment.yml b/.github/unittest/linux_libs/scripts_d4rl/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_envpool/environment.yml b/.github/unittest/linux_libs/scripts_envpool/environment.yml
@@ -15,6 +15,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_gen-dgrl/environment.yml b/.github/unittest/linux_libs/scripts_gen-dgrl/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_gym/environment.yml b/.github/unittest/linux_libs/scripts_gym/environment.yml
@@ -19,6 +19,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_habitat/environment.yml b/.github/unittest/linux_libs/scripts_habitat/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-error-for-skips
     - pytest-rerunfailures
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy==1.9.1
diff --git a/.github/unittest/linux_libs/scripts_jumanji/environment.yml b/.github/unittest/linux_libs/scripts_jumanji/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_llm/environment.yml b/.github/unittest/linux_libs/scripts_llm/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_meltingpot/environment.yml b/.github/unittest/linux_libs/scripts_meltingpot/environment.yml
@@ -12,4 +12,5 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
diff --git a/.github/unittest/linux_libs/scripts_minari/environment.yml b/.github/unittest/linux_libs/scripts_minari/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_openx/environment.yml b/.github/unittest/linux_libs/scripts_openx/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_robohive/environment.yml b/.github/unittest/linux_libs/scripts_robohive/environment.yml
@@ -19,6 +19,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_roboset/environment.yml b/.github/unittest/linux_libs/scripts_roboset/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_sklearn/environment.yml b/.github/unittest/linux_libs/scripts_sklearn/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_smacv2/environment.yml b/.github/unittest/linux_libs/scripts_smacv2/environment.yml
@@ -14,6 +14,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - numpy==1.23.0
diff --git a/.github/unittest/linux_libs/scripts_vd4rl/environment.yml b/.github/unittest/linux_libs/scripts_vd4rl/environment.yml
@@ -13,6 +13,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/.github/unittest/linux_libs/scripts_vmas/environment.yml b/.github/unittest/linux_libs/scripts_vmas/environment.yml
@@ -19,6 +19,7 @@ dependencies:
     - pytest-instafail
     - pytest-rerunfailures
     - pytest-error-for-skips
+    - pytest-asyncio
     - expecttest
     - pyyaml
     - scipy
diff --git a/docs/source/reference/collectors.rst b/docs/source/reference/collectors.rst
@@ -126,16 +126,16 @@ mechanism for updating policy weights across different devices and processes, ac
 Local and Remote Weight Updaters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The weight synchronization process is facilitated by two main components: :class:`~torchrl.collectors.LocalWeightUpdaterBase`
-and :class:`~torchrl.collectors.RemoteWeightUpdaterBase`. These base classes provide a structured interface for
+The weight synchronization process is facilitated by two main components: :class:`~torchrl.collectors.WeightUpdateReceiverBase`
+and :class:`~torchrl.collectors.WeightUpdateSenderBase`. These base classes provide a structured interface for
 implementing custom weight update logic, allowing users to tailor the synchronization process to their specific needs.
 
-- :class:`~torchrl.collectors.LocalWeightUpdaterBase`: This component is responsible for updating the policy weights on
+- :class:`~torchrl.collectors.WeightUpdateReceiverBase`: This component is responsible for updating the policy weights on
   the local inference worker. It is particularly useful when the training and inference occur on the same machine but on
   different devices. Users can extend this class to define how weights are fetched from a server and applied locally.
   It is also the extension point for collectors where the workers need to ask for weight updates (in contrast with
   situations where the server decides when to update the worker policies).
-- :class:`~torchrl.collectors.RemoteWeightUpdaterBase`: This component handles the distribution of policy weights to
+- :class:`~torchrl.collectors.WeightUpdateSenderBase`: This component handles the distribution of policy weights to
   remote inference workers. It is essential in distributed systems where multiple workers need to be kept in sync with
   the central policy. Users can extend this class to implement custom logic for synchronizing weights across a network of
   devices or processes.
@@ -153,8 +153,8 @@ Default Implementations
 
 For common scenarios, the API provides default implementations of these updaters, such as
 :class:`~torchrl.collectors.VanillaLocalWeightUpdater`, :class:`~torchrl.collectors.MultiProcessedRemoteWeightUpdate`,
-:class:`~torchrl.collectors.RayRemoteWeightUpdater`, :class:`~torchrl.collectors.RPCRemoteWeightUpdater`, and
-:class:`~torchrl.collectors.DistributedRemoteWeightUpdater`.
+:class:`~torchrl.collectors.RayWeightUpdateSender`, :class:`~torchrl.collectors.RPCWeightUpdateSender`, and
+:class:`~torchrl.collectors.DistributedWeightUpdateSender`.
 These implementations cover a range of typical deployment configurations, from single-device setups to large-scale
 distributed systems.
 
@@ -180,13 +180,13 @@ scenarios, ensuring that their policies remain up-to-date and performant.
     :toctree: generated/
     :template: rl_template.rst
 
-    LocalWeightUpdaterBase
-    RemoteWeightUpdaterBase
+    WeightUpdateReceiverBase
+    WeightUpdateSenderBase
     VanillaLocalWeightUpdater
     MultiProcessedRemoteWeightUpdate
-    RayRemoteWeightUpdater
-    DistributedRemoteWeightUpdater
-    RPCRemoteWeightUpdater
+    RayWeightUpdateSender
+    DistributedWeightUpdateSender
+    RPCWeightUpdateSender
 
 Collectors and replay buffers interoperability
 ----------------------------------------------
diff --git a/examples/collectors/mp_collector_mps.py b/examples/collectors/mp_collector_mps.py
@@ -11,29 +11,29 @@
 ----------------------
 
 This script demonstrates a weight update in TorchRL.
-The script uses a custom `MPSRemoteWeightUpdater` class to update the weights of a policy network across multiple workers.
+The script uses a custom `MPSWeightUpdateSender` class to update the weights of a policy network across multiple workers.
 
 Key Features
 ------------
 
 - Multi-Worker Setup: The script creates two worker processes that collect data from a Gym environment
   ("Pendulum-v1") using a policy network.
 - MPS (Metal Performance Shaders) Device: The policy network is placed on an MPS device.
-- Custom Weight Updater: The `MPSRemoteWeightUpdater` class is used to update the policy weights across workers. This
+- Custom Weight Updater: The `MPSWeightUpdateSender` class is used to update the policy weights across workers. This
   class is necessary because MPS tensors cannot be sent over a pipe due to serialization/pickling issues in PyTorch.
 
 Workaround for MPS Tensor Serialization Issue
 ---------------------------------------------
 
 In PyTorch, MPS tensors cannot be serialized or pickled, which means they cannot be sent over a pipe or shared between
-processes. To work around this issue, the MPSRemoteWeightUpdater class sends the policy weights on the CPU device
+processes. To work around this issue, the MPSWeightUpdateSender class sends the policy weights on the CPU device
 instead of the MPS device. The local workers then copy the weights from the CPU device to the MPS device.
 
 Script Flow
 -----------
 
 1. Initialize the environment, policy network, and collector.
-2. Update the policy weights using the MPSRemoteWeightUpdater.
+2. Update the policy weights using the MPSWeightUpdateSender.
 3. Collect data from the environment using the policy network.
 4. Zero out the policy weights after a few iterations.
 5. Verify that the updated policy weights are being used by checking the actions generated by the policy network.
@@ -45,12 +45,12 @@ class is necessary because MPS tensors cannot be sent over a pipe due to seriali
 from tensordict import TensorDictBase
 from tensordict.nn import TensorDictModule
 from torch import nn
-from torchrl.collectors import MultiSyncDataCollector, RemoteWeightUpdaterBase
+from torchrl.collectors import MultiSyncDataCollector, WeightUpdateSenderBase
 
 from torchrl.envs.libs.gym import GymEnv
 
 
-class MPSRemoteWeightUpdater(RemoteWeightUpdaterBase):
+class MPSWeightUpdaterBase(WeightUpdateSenderBase):
     def __init__(self, policy_weights, num_workers):
         # Weights are on mps device, which cannot be shared
         self.policy_weights = policy_weights.data
@@ -101,7 +101,7 @@ def policy_factory(device=device):
         reset_at_each_iter=False,
         device=device,
         storing_device="cpu",
-        remote_weight_updater=MPSRemoteWeightUpdater(policy_weights, 2),
+        weight_update_sender=MPSWeightUpdaterBase(policy_weights, 2),
         # use_buffers=False,
         # cat_results="stack",
     )
diff --git a/setup.py b/setup.py
@@ -216,6 +216,7 @@ def _main(argv):
             "scipy",
             "pytest-mock",
             "pytest-cov",
+            "pytest-asyncio",
             "pytest-benchmark",
             "pytest-rerunfailures",
             "pytest-error-for-skips",
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -43,8 +43,8 @@
 )
 from torchrl.collectors import (
     aSyncDataCollector,
-    RemoteWeightUpdaterBase,
     SyncDataCollector,
+    WeightUpdateSenderBase,
 )
 from torchrl.collectors.collectors import (
     _Interruptor,
@@ -3498,7 +3498,7 @@ def __deepcopy_error__(*args, **kwargs):
 
 
 class TestPolicyFactory:
-    class MPSRemoteWeightUpdater(RemoteWeightUpdaterBase):
+    class MPSWeightUpdaterBase(WeightUpdateSenderBase):
         def __init__(self, policy_weights, num_workers):
             # Weights are on mps device, which cannot be shared
             self.policy_weights = policy_weights.data
@@ -3542,7 +3542,7 @@ def test_weight_update(self):
             reset_at_each_iter=False,
             device=device,
             storing_device="cpu",
-            remote_weight_updater=self.MPSRemoteWeightUpdater(policy_weights, 2),
+            weight_update_sender=self.MPSWeightUpdaterBase(policy_weights, 2),
         )
 
         collector.update_policy_weights_()
@@ -3683,10 +3683,9 @@ def _run_collector_test(self, total_steps, rb, policy, tokenizer):
                 assert len(stack["text"][i]) < len(stack["next", "text"][i])
         assert collector._frames >= total_steps
 
-    def test_llm_collector_start(self, vllm_instance):
-        asyncio.run(self._async_run_collector_test(vllm_instance))
-
-    async def _async_run_collector_test(self, vllm_instance):
+    @pytest.mark.slow
+    @pytest.mark.asyncio
+    async def test_llm_collector_start(self, vllm_instance):
         total_steps = 20
         policy = vLLMWrapper(vllm_instance)
         vllm_instance.get_tokenizer()
@@ -3708,28 +3707,29 @@ async def _async_run_collector_test(self, vllm_instance):
             replay_buffer=rb,
             total_steps=total_steps,
         )
+        torchrl_logger.info("starting")
         collector.start()
 
-        i = 0
-        wait = 0
+        j = 0
         while True:
-            while not len(rb):
+            if not len(rb):
                 await asyncio.sleep(1)  # Use asyncio.sleep instead of time.sleep
-                wait += 1
-                if wait > 20:
-                    raise RuntimeError
             sample = rb.sample(10)
-            for i in range(sample.numel()):
+            assert sample.ndim == 1
+            for i in range(10):
                 # Check that there are more chars in the next step
                 assert len(sample["text"][i]) < len(sample["next", "text"][i])
             assert not sample._has_exclusive_keys, sample
-            await asyncio.sleep(0.1)  # Use asyncio.sleep instead of time.sleep
-            i += 1
-            if i == 5:
+            j += 1
+            if j == 5:
                 break
         assert collector._frames >= total_steps
 
-        await collector.async_shutdown()
+        try:
+            # Assuming collector._task is the task created in start()
+            await asyncio.wait_for(collector.async_shutdown(), timeout=30)
+        except asyncio.TimeoutError:
+            torchrl_logger.info("Collector shutdown timed out")
 
     @pytest.mark.slow
     @pytest.mark.parametrize("rb", [False, True])
diff --git a/torchrl/collectors/__init__.py b/torchrl/collectors/__init__.py
@@ -13,20 +13,20 @@
     SyncDataCollector,
 )
 from .weight_update import (
-    LocalWeightUpdaterBase,
-    MultiProcessedRemoteWeightUpdate,
-    RayRemoteWeightUpdater,
-    RemoteWeightUpdaterBase,
-    VanillaLocalWeightUpdater,
+    MultiProcessedWeightUpdate,
+    RayWeightUpdater,
+    VanillaWeightUpdater,
+    WeightUpdateReceiverBase,
+    WeightUpdateSenderBase,
 )
 
 __all__ = [
     "RandomPolicy",
-    "LocalWeightUpdaterBase",
-    "RemoteWeightUpdaterBase",
-    "VanillaLocalWeightUpdater",
-    "RayRemoteWeightUpdater",
-    "MultiProcessedRemoteWeightUpdate",
+    "WeightUpdateReceiverBase",
+    "WeightUpdateSenderBase",
+    "VanillaWeightUpdater",
+    "RayWeightUpdater",
+    "MultiProcessedWeightUpdate",
     "aSyncDataCollector",
     "DataCollectorBase",
     "MultiaSyncDataCollector",
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
diff --git a/torchrl/collectors/distributed/__init__.py b/torchrl/collectors/distributed/__init__.py
diff --git a/torchrl/collectors/distributed/generic.py b/torchrl/collectors/distributed/generic.py
diff --git a/torchrl/collectors/distributed/ray.py b/torchrl/collectors/distributed/ray.py
diff --git a/torchrl/collectors/distributed/rpc.py b/torchrl/collectors/distributed/rpc.py
diff --git a/torchrl/collectors/distributed/sync.py b/torchrl/collectors/distributed/sync.py
diff --git a/torchrl/collectors/llm.py b/torchrl/collectors/llm.py
diff --git a/torchrl/collectors/utils.py b/torchrl/collectors/utils.py
diff --git a/torchrl/collectors/weight_update.py b/torchrl/collectors/weight_update.py