pytorch
diff --git a/‎examples/td3/td3.py
Lines changed: 3 additions & 3 deletions b/‎examples/td3/td3.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/test_collector.py
Lines changed: 14 additions & 10 deletions b/‎test/test_collector.py
Lines changed: 14 additions & 10 deletions
diff --git a/‎test/test_cost.py
Lines changed: 13 additions & 7 deletions b/‎test/test_cost.py
Lines changed: 13 additions & 7 deletions
diff --git a/‎test/test_postprocs.py
Lines changed: 23 additions & 16 deletions b/‎test/test_postprocs.py
Lines changed: 23 additions & 16 deletions
diff --git a/‎test/test_trainer.py
Lines changed: 18 additions & 7 deletions b/‎test/test_trainer.py
Lines changed: 18 additions & 7 deletions
diff --git a/‎test/test_transforms.py
Lines changed: 1 addition & 1 deletion b/‎test/test_transforms.py
Lines changed: 1 addition & 1 deletion
@@ -273,10 +273,10 @@ def main(cfg: "DictConfig"):  # noqa: F821
         pbar.update(tensordict.numel())
 
         # extend the replay buffer with the new data
-        if "mask" in tensordict.keys():
+        if ("collector", "mask") in tensordict.keys(True):
             # if multi-step, a mask is present to help filter padded values
-            current_frames = tensordict["mask"].sum()
-            tensordict = tensordict[tensordict.get("mask").squeeze(-1)]
+            current_frames = tensordict["collector", "mask"].sum()
+            tensordict = tensordict[tensordict.get(("collector", "mask")).squeeze(-1)]
         else:
             tensordict = tensordict.view(-1)
             current_frames = tensordict.numel()
 
@@ -316,7 +316,7 @@ def make_env():
     )
     for _data in collector:
         continue
-    steps = _data["step_count"][..., 1:]
+    steps = _data["collector", "step_count"][..., 1:]
     done = _data["done"][..., :-1, :].squeeze(-1)
     # we don't want just one done
     assert done.sum() > 3
@@ -375,7 +375,7 @@ def make_env(seed):
         break
 
     assert (d["done"].sum(-2) >= 1).all()
-    assert torch.unique(d["traj_ids"], dim=-1).shape[-1] == 1
+    assert torch.unique(d["collector", "traj_ids"], dim=-1).shape[-1] == 1
 
     del collector
 
@@ -426,12 +426,15 @@ def make_env(seed):
         break
 
     assert d.ndimension() == 2
-    assert d["mask"].shape == d.shape
-    assert d["step_count"].shape == d.shape
-    assert d["traj_ids"].shape == d.shape
+    assert d["collector", "mask"].shape == d.shape
+    assert d["collector", "step_count"].shape == d.shape
+    assert d["collector", "traj_ids"].shape == d.shape
     for traj in d.unbind(0):
-        assert traj["traj_ids"].unique().numel() == 1
-        assert (traj["step_count"][1:] - traj["step_count"][:-1] == 1).all()
+        assert traj["collector", "traj_ids"].unique().numel() == 1
+        assert (
+            traj["collector", "step_count"][1:] - traj["collector", "step_count"][:-1]
+            == 1
+        ).all()
 
     del collector
 
@@ -986,17 +989,18 @@ def test_collector_output_keys(collector_class, init_random_frames, explicit_spe
     keys = {
         "action",
         "done",
+        "collector",
         "hidden1",
         "hidden2",
-        "mask",
+        ("collector", "mask"),
         ("next", "hidden1"),
         ("next", "hidden2"),
         ("next", "observation"),
         "next",
         "observation",
         "reward",
-        "step_count",
-        "traj_ids",
+        ("collector", "step_count"),
+        ("collector", "traj_ids"),
     }
     b = next(iter(collector))
 
 
@@ -7,6 +7,8 @@
 import re
 from copy import deepcopy
 
+from packaging import version as pack_version
+
 _has_functorch = True
 try:
     import functorch as ft  # noqa
@@ -273,7 +275,7 @@ def _create_seq_mock_data_dqn(
                     "observation": next_obs.masked_fill_(~mask.unsqueeze(-1), 0.0)
                 },
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action": action.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action_value": action_value.masked_fill_(~mask.unsqueeze(-1), 0.0),
@@ -507,7 +509,7 @@ def _create_seq_mock_data_ddpg(
                     "observation": next_obs.masked_fill_(~mask.unsqueeze(-1), 0.0)
                 },
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action": action.masked_fill_(~mask.unsqueeze(-1), 0.0),
             },
@@ -735,7 +737,7 @@ def _create_seq_mock_data_td3(
                 "observation": obs * mask.to(obs.dtype),
                 "next": {"observation": next_obs * mask.to(obs.dtype)},
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward * mask.to(obs.dtype),
                 "action": action * mask.to(obs.dtype),
             },
@@ -1012,7 +1014,7 @@ def _create_seq_mock_data_sac(
                     "observation": next_obs.masked_fill_(~mask.unsqueeze(-1), 0.0)
                 },
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action": action.masked_fill_(~mask.unsqueeze(-1), 0.0),
             },
@@ -1441,7 +1443,7 @@ def _create_seq_mock_data_redq(
                     "observation": next_obs.masked_fill_(~mask.unsqueeze(-1), 0.0)
                 },
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action": action.masked_fill_(~mask.unsqueeze(-1), 0.0),
             },
@@ -1880,7 +1882,7 @@ def _create_seq_mock_data_ppo(
                     "observation": next_obs.masked_fill_(~mask.unsqueeze(-1), 0.0)
                 },
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action": action.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "sample_log_prob": (torch.randn_like(action[..., 1]) / 10).masked_fill_(
@@ -2035,6 +2037,8 @@ def test_ppo_shared(self, loss_class, device, advantage):
     @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_ppo_diff(self, loss_class, device, gradient_mode, advantage):
+        if pack_version.parse(torch.__version__) > pack_version.parse("1.14"):
+            raise pytest.skip("make_functional_with_buffers needs to be changed")
         torch.manual_seed(self.seed)
         td = self._create_seq_mock_data_ppo(device=device)
 
@@ -2153,7 +2157,7 @@ def _create_seq_mock_data_a2c(
                     "observation": next_obs.masked_fill_(~mask.unsqueeze(-1), 0.0)
                 },
                 "done": done,
-                "mask": mask,
+                "collector": {"mask": mask},
                 "reward": reward.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "action": action.masked_fill_(~mask.unsqueeze(-1), 0.0),
                 "sample_log_prob": torch.randn_like(action[..., 1]).masked_fill_(
@@ -2245,6 +2249,8 @@ def test_a2c(self, device, gradient_mode, advantage):
     @pytest.mark.parametrize("advantage", ("gae", "td", "td_lambda"))
     @pytest.mark.parametrize("device", get_available_devices())
     def test_a2c_diff(self, device, gradient_mode, advantage):
+        if pack_version.parse(torch.__version__) > pack_version.parse("1.14"):
+            raise pytest.skip("make_functional_with_buffers needs to be changed")
         torch.manual_seed(self.seed)
         td = self._create_seq_mock_data_a2c(device=device)
 
 
@@ -41,7 +41,7 @@ def test_multistep(n, key, device, T=11):
             "done": done,
             "reward": torch.randn(1, T, 1, device=device).expand(b, T, 1)
             * mask.to(torch.float),
-            "mask": mask,
+            "collector": {"mask": mask},
         },
         batch_size=(b, T),
     ).to(device)
@@ -98,28 +98,28 @@ def create_fake_trajs(
         traj_len=200,
     ):
         traj_ids = torch.arange(num_workers)
-        steps_count = torch.zeros(num_workers)
+        step_count = torch.zeros(num_workers)
         workers = torch.arange(num_workers)
 
         out = []
         for _ in range(traj_len):
-            done = steps_count == traj_ids  # traj_id 0 has 0 steps, 1 has 1 step etc.
+            done = step_count == traj_ids  # traj_id 0 has 0 steps, 1 has 1 step etc.
 
             td = TensorDict(
                 source={
-                    "traj_ids": traj_ids,
+                    ("collector", "traj_ids"): traj_ids,
                     "a": traj_ids.clone().unsqueeze(-1),
-                    "steps_count": steps_count,
+                    ("collector", "step_count"): step_count,
                     "workers": workers,
                     "done": done.unsqueeze(-1),
                 },
                 batch_size=[num_workers],
             )
             out.append(td.clone())
-            steps_count += 1
+            step_count += 1
 
             traj_ids[done] = traj_ids.max() + torch.arange(1, done.sum() + 1)
-            steps_count[done] = 0
+            step_count[done] = 0
 
         out = torch.stack(out, 1).contiguous()
         return out
@@ -132,22 +132,29 @@ def test_splits(self, num_workers, traj_len):
         assert trajs.shape[0] == num_workers
         assert trajs.shape[1] == traj_len
         split_trajs = split_trajectories(trajs)
-        assert split_trajs.shape[0] == split_trajs.get("traj_ids").max() + 1
-        assert split_trajs.shape[1] == split_trajs.get("steps_count").max() + 1
+        assert (
+            split_trajs.shape[0] == split_trajs.get(("collector", "traj_ids")).max() + 1
+        )
+        assert (
+            split_trajs.shape[1]
+            == split_trajs.get(("collector", "step_count")).max() + 1
+        )
 
-        assert split_trajs.get("mask").sum() == num_workers * traj_len
+        assert split_trajs.get(("collector", "mask")).sum() == num_workers * traj_len
 
         assert split_trajs.get("done").sum(1).max() == 1
-        out_mask = split_trajs[split_trajs.get("mask")]
+        out_mask = split_trajs[split_trajs.get(("collector", "mask"))]
         for i in range(split_trajs.shape[0]):
-            traj_id_split = split_trajs[i].get("traj_ids")[split_trajs[i].get("mask")]
+            traj_id_split = split_trajs[i].get(("collector", "traj_ids"))[
+                split_trajs[i].get(("collector", "mask"))
+            ]
             assert 1 == len(traj_id_split.unique())
 
         for w in range(num_workers):
             assert (out_mask.get("workers") == w).sum() == traj_len
         # Assert that either the chain is not done XOR if it is it must have the desired length (equal to traj id by design)
-        for i in range(split_trajs.get("traj_ids").max()):
-            idx_traj_id = out_mask.get("traj_ids") == i
+        for i in range(split_trajs.get(("collector", "traj_ids")).max()):
+            idx_traj_id = out_mask.get(("collector", "traj_ids")) == i
             # (!=) == (xor)
             c1 = (idx_traj_id.sum() - 1 == i) and (
                 out_mask.get("done")[idx_traj_id].sum() == 1
@@ -162,8 +169,8 @@ def test_splits(self, num_workers, traj_len):
             )
 
         assert (
-            split_trajs.get("traj_ids").unique().numel()
-            == split_trajs.get("traj_ids").max() + 1
+            split_trajs.get(("collector", "traj_ids")).unique().numel()
+            == split_trajs.get(("collector", "traj_ids")).max() + 1
         )
 
 
 
@@ -762,14 +762,14 @@ def test_masking():
     batch = 10
     td = TensorDict(
         {
-            "mask": torch.zeros(batch, dtype=torch.bool).bernoulli_(),
+            ("collector", "mask"): torch.zeros(batch, dtype=torch.bool).bernoulli_(),
             "tensor": torch.randn(batch, 51),
         },
         [batch],
     )
     td_out = trainer._process_batch_hook(td)
-    assert td_out.shape[0] == td.get("mask").sum()
-    assert (td["tensor"][td["mask"]] == td_out["tensor"]).all()
+    assert td_out.shape[0] == td.get(("collector", "mask")).sum()
+    assert (td["tensor"][td[("collector", "mask")]] == td_out["tensor"]).all()
 
 
 class TestSubSampler:
@@ -989,10 +989,13 @@ def test_countframes(self):
         count_frames = CountFramesLog(frame_skip=frame_skip)
         count_frames.register(trainer)
         td = TensorDict(
-            {"mask": torch.zeros(batch, dtype=torch.bool).bernoulli_()}, [batch]
+            {("collector", "mask"): torch.zeros(batch, dtype=torch.bool).bernoulli_()},
+            [batch],
         )
         trainer._pre_steps_log_hook(td)
-        assert count_frames.frame_count == td.get("mask").sum() * frame_skip
+        assert (
+            count_frames.frame_count == td.get(("collector", "mask")).sum() * frame_skip
+        )
 
     @pytest.mark.parametrize(
         "backend",
@@ -1037,13 +1040,21 @@ def _make_countframe_and_trainer(tmpdirname):
         with tempfile.TemporaryDirectory() as tmpdirname, tempfile.TemporaryDirectory() as tmpdirname2:
             trainer, count_frames, file = _make_countframe_and_trainer(tmpdirname)
             td = TensorDict(
-                {"mask": torch.zeros(batch, dtype=torch.bool).bernoulli_()}, [batch]
+                {
+                    ("collector", "mask"): torch.zeros(
+                        batch, dtype=torch.bool
+                    ).bernoulli_()
+                },
+                [batch],
             )
             trainer._pre_steps_log_hook(td)
             trainer.save_trainer(True)
             trainer2, count_frames2, _ = _make_countframe_and_trainer(tmpdirname2)
             trainer2.load_from_file(file)
-            assert count_frames2.frame_count == td.get("mask").sum() * frame_skip
+            assert (
+                count_frames2.frame_count
+                == td.get(("collector", "mask")).sum() * frame_skip
+            )
             assert state_dict_has_been_called[0]
             assert load_state_dict_has_been_called[0]
         CountFramesLog.state_dict = CountFramesLog_state_dict
 
@@ -1832,7 +1832,7 @@ def test_step_counter(self, max_steps, device, batch, reset_workers):
         while max_steps is None or i < max_steps:
             step_counter._step(td)
             i += 1
-            assert torch.all(td.get("step_count") == i)
+            assert torch.all(td.get("step_count") == i), (td.get("step_count"), i)
             if max_steps is None:
                 break
         if max_steps is not None: