[BugFix] Fix get_original_weights in collectors

Vincent Moens · Vincent Moens · commit 01399e097850 · 2025-05-13T11:05:41.000+01:00
ghstack-source-id: bf77b22 Pull-Request-resolved: #2951
diff --git a/sota-implementations/sac/utils.py b/sota-implementations/sac/utils.py
@@ -128,9 +128,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):
     device = cfg.collector.device
     if device in ("", None):
         if torch.cuda.is_available():
-            if torch.cuda.device_count() < 2:
-                raise RuntimeError("Requires >= 2 GPUs")
-            device = torch.device("cuda:1")
+            device = torch.device("cuda:0")
         else:
             device = torch.device("cpu")
     collector = SyncDataCollector(
@@ -158,7 +156,9 @@ def make_collector_async(
     device = cfg.collector.device
     if device in ("", None):
         if torch.cuda.is_available():
-            device = torch.device("cuda:0")
+            if torch.cuda.device_count() < 2:
+                raise RuntimeError("Requires >= 2 GPUs")
+            device = torch.device("cuda:1")
         else:
             device = torch.device("cpu")
 
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -3419,7 +3419,7 @@ def test_collector_rb_multisync(
             assert len(rb) == pred_len
         collector.shutdown()
         assert len(rb) == 256
-        if not extend_buffer:
+        if extend_buffer:
             steps_counts = rb["step_count"].squeeze().split(16)
             collector_ids = rb["collector", "traj_ids"].squeeze().split(16)
             for step_count, ids in zip(steps_counts, collector_ids):
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -208,7 +208,7 @@ def _get_policy_and_device(
             return policy, None
 
         if isinstance(policy, nn.Module):
-            param_and_buf = TensorDict.from_module(policy, as_module=True).data
+            param_and_buf = TensorDict.from_module(policy, as_module=True)
         else:
             # Because we want to reach the warning
             param_and_buf = TensorDict()
@@ -231,19 +231,25 @@ def _get_policy_and_device(
             return policy, None
 
         # Create a stateless policy, then populate this copy with params on device
-        def get_original_weights(policy):
+        def get_original_weights(policy=policy):
             td = TensorDict.from_module(policy)
             return td.data
 
         # We need to use ".data" otherwise buffers may disappear from the `get_original_weights` function
         with param_and_buf.data.to("meta").to_module(policy):
-            policy = deepcopy(policy)
+            policy_new_device = deepcopy(policy)
 
-        param_and_buf.apply(
+        param_and_buf_new_device = param_and_buf.apply(
             functools.partial(_map_weight, policy_device=policy_device),
             filter_empty=False,
-        ).to_module(policy)
-        return policy, get_original_weights
+        )
+        param_and_buf_new_device.to_module(policy_new_device)
+        # Sanity check
+        if set(TensorDict.from_module(policy_new_device).keys(True, True)) != set(
+            get_original_weights().keys(True, True)
+        ):
+            raise RuntimeError("Failed to map weights. The weight sets mismatch.")
+        return policy_new_device, get_original_weights
 
     def start(self):
         """Starts the collector for asynchronous data collection.
@@ -1976,17 +1982,17 @@ def __init__(
             for policy_device, env_maker, env_maker_kwargs in _zip_strict(
                 self.policy_device, self.create_env_fn, self.create_env_kwargs
             ):
-                (policy_copy, get_weights_fn,) = self._get_policy_and_device(
+                (policy_new_device, get_weights_fn,) = self._get_policy_and_device(
                     policy=policy,
                     policy_device=policy_device,
                     env_maker=env_maker,
                     env_maker_kwargs=env_maker_kwargs,
                 )
-                if type(policy_copy) is not type(policy):
-                    policy = policy_copy
+                if type(policy_new_device) is not type(policy):
+                    policy = policy_new_device
                 weights = (
-                    TensorDict.from_module(policy_copy).data
-                    if isinstance(policy_copy, nn.Module)
+                    TensorDict.from_module(policy_new_device).data
+                    if isinstance(policy_new_device, nn.Module)
                     else TensorDict()
                 )
                 self._policy_weights_dict[policy_device] = weights