[Quality] Better TD construction in codebase

Vincent Moens · Vincent Moens · commit a4c1ee3b3978 · 2024-11-14T18:04:01.000Z
ghstack-source-id: 9e280d9 Pull Request resolved: #2565
diff --git a/examples/rlhf/utils.py b/examples/rlhf/utils.py
@@ -85,7 +85,7 @@ def log(self, model):
 
 class TrainLogger:
     def __init__(self, size: int, log_interval: int, logger: Logger):
-        self.data = TensorDict({}, [size])
+        self.data = TensorDict(batch_size=[size])
         self.counter = 0
         self.log_interval = log_interval
         self.logger = logger
diff --git a/sota-implementations/a2c/a2c_atari.py b/sota-implementations/a2c/a2c_atari.py
@@ -144,7 +144,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
                 }
             )
 
-        losses = TensorDict({}, batch_size=[num_mini_batches])
+        losses = TensorDict(batch_size=[num_mini_batches])
         training_start = time.time()
 
         # Compute GAE
diff --git a/sota-implementations/a2c/a2c_mujoco.py b/sota-implementations/a2c/a2c_mujoco.py
@@ -128,7 +128,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
                 }
             )
 
-        losses = TensorDict({}, batch_size=[num_mini_batches])
+        losses = TensorDict(batch_size=[num_mini_batches])
         training_start = time.time()
 
         # Compute GAE
diff --git a/sota-implementations/cql/cql_online.py b/sota-implementations/cql/cql_online.py
@@ -127,7 +127,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
         # optimization steps
         training_start = time.time()
         if collected_frames >= init_random_frames:
-            log_loss_td = TensorDict({}, [num_updates])
+            log_loss_td = TensorDict(batch_size=[num_updates])
             for j in range(num_updates):
                 # sample from replay buffer
                 sampled_tensordict = replay_buffer.sample()
diff --git a/sota-implementations/impala/impala_multi_node_ray.py b/sota-implementations/impala/impala_multi_node_ray.py
@@ -184,7 +184,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
                     logger.log_scalar(key, value, collected_frames)
             continue
 
-        losses = TensorDict({}, batch_size=[sgd_updates])
+        losses = TensorDict(batch_size=[sgd_updates])
         training_start = time.time()
         for j in range(sgd_updates):
 
diff --git a/sota-implementations/impala/impala_multi_node_submitit.py b/sota-implementations/impala/impala_multi_node_submitit.py
@@ -176,7 +176,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
                     logger.log_scalar(key, value, collected_frames)
             continue
 
-        losses = TensorDict({}, batch_size=[sgd_updates])
+        losses = TensorDict(batch_size=[sgd_updates])
         training_start = time.time()
         for j in range(sgd_updates):
 
diff --git a/sota-implementations/impala/impala_single_node.py b/sota-implementations/impala/impala_single_node.py
@@ -154,7 +154,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
                     logger.log_scalar(key, value, collected_frames)
             continue
 
-        losses = TensorDict({}, batch_size=[sgd_updates])
+        losses = TensorDict(batch_size=[sgd_updates])
         training_start = time.time()
         for j in range(sgd_updates):
 
diff --git a/sota-implementations/ppo/ppo_atari.py b/sota-implementations/ppo/ppo_atari.py
@@ -138,7 +138,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
     cfg_logger_num_test_episodes = cfg.logger.num_test_episodes
     cfg_optim_max_grad_norm = cfg.optim.max_grad_norm
     cfg.loss.clip_epsilon = cfg_loss_clip_epsilon
-    losses = TensorDict({}, batch_size=[cfg_loss_ppo_epochs, num_mini_batches])
+    losses = TensorDict(batch_size=[cfg_loss_ppo_epochs, num_mini_batches])
 
     for i, data in enumerate(collector):
 
diff --git a/sota-implementations/ppo/ppo_mujoco.py b/sota-implementations/ppo/ppo_mujoco.py
@@ -125,7 +125,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
     cfg_loss_clip_epsilon = cfg.loss.clip_epsilon
     cfg_logger_test_interval = cfg.logger.test_interval
     cfg_logger_num_test_episodes = cfg.logger.num_test_episodes
-    losses = TensorDict({}, batch_size=[cfg_loss_ppo_epochs, num_mini_batches])
+    losses = TensorDict(batch_size=[cfg_loss_ppo_epochs, num_mini_batches])
 
     for i, data in enumerate(collector):
 
diff --git a/sota-implementations/sac/sac.py b/sota-implementations/sac/sac.py
@@ -126,7 +126,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
         # Optimization steps
         training_start = time.time()
         if collected_frames >= init_random_frames:
-            losses = TensorDict({}, batch_size=[num_updates])
+            losses = TensorDict(batch_size=[num_updates])
             for i in range(num_updates):
                 # Sample from replay buffer
                 sampled_tensordict = replay_buffer.sample()
diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -492,7 +492,7 @@ def _reset(self, tensordict: TensorDictBase = None) -> TensorDictBase:
         self.counter += 1
         state = torch.zeros(self.size) + self.counter
         if tensordict is None:
-            tensordict = TensorDict({}, self.batch_size, device=self.device)
+            tensordict = TensorDict(batch_size=self.batch_size, device=self.device)
         tensordict = tensordict.empty().set(self.out_key, self._get_out_obs(state))
         tensordict = tensordict.set(self._out_key, self._get_out_obs(state))
         tensordict.set("done", torch.zeros(*tensordict.shape, 1, dtype=torch.bool))
@@ -595,7 +595,7 @@ def _reset(self, tensordict: TensorDictBase) -> TensorDictBase:
         self.step_count = 0
         # state = torch.zeros(self.size) + self.counter
         if tensordict is None:
-            tensordict = TensorDict({}, self.batch_size, device=self.device)
+            tensordict = TensorDict(batch_size=self.batch_size, device=self.device)
 
         tensordict = tensordict.empty()
         tensordict.update(self.observation_spec.rand())
diff --git a/test/test_env.py b/test/test_env.py
@@ -1420,7 +1420,7 @@ def test_steptensordict(
             tds[1]["but", "not", "this", "one"] = torch.ones(2)
             tds[0]["next", "this", "one"] = torch.ones(2) * 2
             tensordict = LazyStackedTensorDict.lazy_stack(tds, 0)
-        next_tensordict = TensorDict({}, [4]) if has_out else None
+        next_tensordict = TensorDict(batch_size=[4]) if has_out else None
         if has_out and lazy_stack:
             next_tensordict = LazyStackedTensorDict.lazy_stack(
                 next_tensordict.unbind(0), 0
@@ -1550,9 +1550,9 @@ def test_nested(
         nested_key = ("data",)
         td = TensorDict(
             {
-                nested_key: TensorDict({}, nested_batch_size),
+                nested_key: TensorDict(batch_size=nested_batch_size),
                 "next": {
-                    nested_key: TensorDict({}, nested_batch_size),
+                    nested_key: TensorDict(batch_size=nested_batch_size),
                 },
             },
             td_batch_size,
@@ -1670,7 +1670,7 @@ def test_nested_partially(
         # Nested only in root
         td = TensorDict(
             {
-                nested_key: TensorDict({}, nested_batch_size),
+                nested_key: TensorDict(batch_size=nested_batch_size),
                 "next": {},
             },
             td_batch_size,
@@ -1711,7 +1711,7 @@ def test_nested_partially(
         # Nested only in next
         td = TensorDict(
             {
-                "next": {nested_key: TensorDict({}, nested_batch_size)},
+                "next": {nested_key: TensorDict(batch_size=nested_batch_size)},
             },
             td_batch_size,
         )
diff --git a/test/test_modules.py b/test/test_modules.py
@@ -375,7 +375,7 @@ def test_CEM_model_free_env(self, device, batch_size, seed=1):
             num_candidates=100,
             top_k=2,
         )
-        td = env.reset(TensorDict({}, batch_size=batch_size).to(device))
+        td = env.reset(TensorDict(batch_size=batch_size).to(device))
         td_copy = td.clone()
         td = planner(td)
         assert (
@@ -408,7 +408,7 @@ def test_MPPI(self, device, batch_size, seed=1):
             num_candidates=100,
             top_k=2,
         )
-        td = env.reset(TensorDict({}, batch_size=batch_size).to(device))
+        td = env.reset(TensorDict(batch_size=batch_size).to(device))
         td_copy = td.clone()
         td = planner(td)
         assert (
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -3317,7 +3317,7 @@ def _make_storage(self, storage_type, data_type):
             return LazyMemmapStorage(max_size=100)
         if storage_type is TensorStorage:
             if data_type is TensorDict:
-                return TensorStorage(TensorDict({}, [100]))
+                return TensorStorage(TensorDict(batch_size=[100]))
             elif data_type is torch.Tensor:
                 return TensorStorage(torch.zeros(100))
             else:
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -3945,7 +3945,7 @@ def test_nested(self, skip=4):
     def test_transform_model(self):
         t = FrameSkipTransform(2)
         t = nn.Sequential(t, nn.Identity())
-        tensordict = TensorDict({}, [])
+        tensordict = TensorDict()
         with pytest.raises(
             RuntimeError,
             match="FrameSkipTransform can only be used when appended to a transformed env",
@@ -4252,15 +4252,15 @@ def test_transform_compose(self):
 
     def test_transform_model(self):
         t = nn.Sequential(NoopResetEnv(), nn.Identity())
-        td = TensorDict({}, [])
+        td = TensorDict()
         t(td)
 
     @pytest.mark.parametrize("rbclass", [ReplayBuffer, TensorDictReplayBuffer])
     def test_transform_rb(self, rbclass):
         t = NoopResetEnv()
         rb = rbclass(storage=LazyTensorStorage(10))
         rb.append_transform(t)
-        td = TensorDict({}, [10])
+        td = TensorDict(batch_size=[10])
         rb.extend(td)
         rb.sample(1)
 
@@ -6917,7 +6917,7 @@ def test_transform_no_env(self):
     def test_transform_model(self):
         t = TensorDictPrimer(mykey=Unbounded([3]))
         model = nn.Sequential(t, nn.Identity())
-        td = TensorDict({}, [])
+        td = TensorDict()
         model(td)
         assert "mykey" in td.keys()
 
@@ -7507,7 +7507,7 @@ def test_transform_model(self):
         action_dim = 5
         t = gSDENoise(state_dim=state_dim, action_dim=action_dim, shape=(2,))
         model = nn.Sequential(t, nn.Identity())
-        td = TensorDict({}, [])
+        td = TensorDict()
         model(td)
         assert "_eps_gSDE" in td.keys()
         assert (td["_eps_gSDE"] != 0.0).all()
@@ -9736,7 +9736,7 @@ def test_transform_model(self):
         with pytest.raises(
             NotImplementedError, match="InitTracker cannot be executed without a parent"
         ):
-            td = TensorDict({}, [])
+            td = TensorDict()
             chain = nn.Sequential(InitTracker())
             chain(td)
 
@@ -10169,7 +10169,7 @@ def test_trans_parallel_env_check(self, maybe_fork_ParallelEnv):
     def test_transform_no_env(self):
         t = ActionMask()
         with pytest.raises(RuntimeError, match="parent cannot be None"):
-            t._call(TensorDict({}, []))
+            t._call(TensorDict())
 
     def test_transform_compose(self):
         env = self._env_class()
@@ -10197,7 +10197,7 @@ def test_transform_env(self):
     def test_transform_model(self):
         t = ActionMask()
         with pytest.raises(RuntimeError, match=FORWARD_NOT_IMPLEMENTED.format(type(t))):
-            t(TensorDict({}, []))
+            t(TensorDict())
 
     def test_transform_rb(self):
         t = ActionMask()
@@ -10526,18 +10526,12 @@ def make_env():
 
     def test_transform_no_env(self):
         t = DeviceCastTransform("cpu:1", "cpu:0")
-        assert t._call(TensorDict({}, [], device="cpu:0")).device == torch.device(
-            "cpu:1"
-        )
+        assert t._call(TensorDict(device="cpu:0")).device == torch.device("cpu:1")
 
     def test_transform_compose(self):
         t = Compose(DeviceCastTransform("cpu:1", "cpu:0"))
-        assert t._call(TensorDict({}, [], device="cpu:0")).device == torch.device(
-            "cpu:1"
-        )
-        assert t._inv_call(TensorDict({}, [], device="cpu:1")).device == torch.device(
-            "cpu:0"
-        )
+        assert t._call(TensorDict(device="cpu:0")).device == torch.device("cpu:1")
+        assert t._inv_call(TensorDict(device="cpu:1")).device == torch.device("cpu:0")
 
     def test_transform_env(self):
         env = ContinuousActionVecMockEnv(device="cpu:0")
@@ -10550,7 +10544,7 @@ def test_transform_env(self):
     def test_transform_model(self):
         t = Compose(DeviceCastTransform("cpu:1", "cpu:0"))
         m = nn.Sequential(t)
-        assert t(TensorDict({}, [], device="cpu:0")).device == torch.device("cpu:1")
+        assert t(TensorDict(device="cpu:0")).device == torch.device("cpu:1")
 
     @pytest.mark.parametrize("rbclass", [ReplayBuffer, TensorDictReplayBuffer])
     @pytest.mark.parametrize("storage", [TensorStorage, LazyTensorStorage])
@@ -10574,9 +10568,7 @@ def test_transform_rb(self, rbclass, storage):
 
     def test_transform_inverse(self):
         t = DeviceCastTransform("cpu:1", "cpu:0")
-        assert t._inv_call(TensorDict({}, [], device="cpu:1")).device == torch.device(
-            "cpu:0"
-        )
+        assert t._inv_call(TensorDict(device="cpu:1")).device == torch.device("cpu:0")
 
 
 class TestPermuteTransform(TransformBase):
@@ -10804,12 +10796,12 @@ def make():
     def test_transform_no_env(self):
         t = EndOfLifeTransform()
         with pytest.raises(RuntimeError, match=t.NO_PARENT_ERR.format(type(t))):
-            t._step(TensorDict({}, []), TensorDict({}, []))
+            t._step(TensorDict(), TensorDict())
 
     def test_transform_compose(self):
         t = EndOfLifeTransform()
         with pytest.raises(RuntimeError, match=t.NO_PARENT_ERR.format(type(t))):
-            Compose(t)._step(TensorDict({}, []), TensorDict({}, []))
+            Compose(t)._step(TensorDict(), TensorDict())
 
     @pytest.mark.parametrize("eol_key", ["eol_key", ("nested", "eol")])
     @pytest.mark.parametrize("lives_key", ["lives_key", ("nested", "lives")])
@@ -10838,7 +10830,7 @@ def test_transform_env(self, eol_key, lives_key):
     def test_transform_model(self):
         t = EndOfLifeTransform()
         with pytest.raises(RuntimeError, match=FORWARD_NOT_IMPLEMENTED.format(type(t))):
-            nn.Sequential(t)(TensorDict({}, []))
+            nn.Sequential(t)(TensorDict())
 
     def test_transform_rb(self):
         pass
@@ -11286,7 +11278,7 @@ def _reset(self, tensordict):
 
         def _step(self, tensordict):
             return (
-                TensorDict({}, batch_size=[])
+                TensorDict()
                 .update(self.observation_spec.rand())
                 .update(self.full_done_spec.zero())
                 .update(self.full_reward_spec.rand())
@@ -11378,7 +11370,7 @@ def test_transform_inverse(self):
         t.inv(td)
         assert len(td.keys()) != 0
         env = TransformedEnv(self.DummyEnv(), RemoveEmptySpecs())
-        td2 = env.transform.inv(TensorDict({}, []))
+        td2 = env.transform.inv(TensorDict())
         assert ("state", "sub") in td2.keys(True)
 
 
diff --git a/torchrl/data/datasets/atari_dqn.py b/torchrl/data/datasets/atari_dqn.py
@@ -598,7 +598,7 @@ def _download_and_proc_split(
     @classmethod
     def _preproc_run(cls, path, gz_files, run):
         files = gz_files[run]
-        td = TensorDict({}, [])
+        td = TensorDict()
         path = Path(path)
         for file in files:
             name = str(Path(file).parts[-1]).split(".")[0]
diff --git a/torchrl/data/datasets/minari_data.py b/torchrl/data/datasets/minari_data.py
@@ -245,7 +245,7 @@ def _download_and_preproc(self):
             minari.download_dataset(dataset_id=self.dataset_id)
             parent_dir = Path(tmpdir) / self.dataset_id / "data"
 
-            td_data = TensorDict({}, [])
+            td_data = TensorDict()
             total_steps = 0
             torchrl_logger.info("first read through data to create data structure...")
             h5_data = PersistentTensorDict.from_h5(parent_dir / "main_data.hdf5")
diff --git a/torchrl/data/datasets/roboset.py b/torchrl/data/datasets/roboset.py
@@ -246,7 +246,7 @@ def _download_and_preproc(self):
             return self._preproc_h5(h5_data_files)
 
     def _preproc_h5(self, h5_data_files):
-        td_data = TensorDict({}, [])
+        td_data = TensorDict()
         total_steps = 0
         torchrl_logger.info(
             f"first read through data files {h5_data_files} to create data structure..."
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -2173,7 +2173,7 @@ def sample(self, storage, batch_size):
             [
                 TensorDict.from_dict(info, batch_dims=samples.ndim - 1)
                 if info
-                else TensorDict({}, [])
+                else TensorDict()
                 for info in infos
             ]
         )
diff --git a/torchrl/data/replay_buffers/storages.py b/torchrl/data/replay_buffers/storages.py
@@ -317,9 +317,7 @@ def load_state_dict(self, state_dict):
             if isinstance(elt, torch.Tensor):
                 self._storage.append(elt)
             elif isinstance(elt, (dict, OrderedDict)):
-                self._storage.append(
-                    TensorDict({}, []).load_state_dict(elt, strict=False)
-                )
+                self._storage.append(TensorDict().load_state_dict(elt, strict=False))
             else:
                 raise TypeError(
                     f"Objects of type {type(elt)} are not supported by ListStorage.load_state_dict"
@@ -675,9 +673,7 @@ def load_state_dict(self, state_dict):
             if is_tensor_collection(self._storage):
                 self._storage.load_state_dict(_storage, strict=False)
             elif self._storage is None:
-                self._storage = TensorDict({}, []).load_state_dict(
-                    _storage, strict=False
-                )
+                self._storage = TensorDict().load_state_dict(_storage, strict=False)
             else:
                 raise RuntimeError(
                     f"Cannot copy a storage of type {type(_storage)} onto another of type {type(self._storage)}. If your storage is pytree-based, use the dumps/load API instead."
@@ -1193,9 +1189,7 @@ def load_state_dict(self, state_dict):
                     "It is preferable to load a storage onto a"
                     "pre-allocated one whenever possible."
                 )
-                self._storage = TensorDict({}, []).load_state_dict(
-                    _storage, strict=False
-                )
+                self._storage = TensorDict().load_state_dict(_storage, strict=False)
                 self._storage.memmap_()
             else:
                 raise RuntimeError(
diff --git a/torchrl/data/replay_buffers/utils.py b/torchrl/data/replay_buffers/utils.py
diff --git a/torchrl/envs/libs/robohive.py b/torchrl/envs/libs/robohive.py
diff --git a/torchrl/envs/model_based/common.py b/torchrl/envs/model_based/common.py
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
diff --git a/torchrl/modules/tensordict_module/rnn.py b/torchrl/modules/tensordict_module/rnn.py
diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py
diff --git a/tutorials/sphinx-tutorials/torchrl_envs.py b/tutorials/sphinx-tutorials/torchrl_envs.py

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ def main(cfg: "DictConfig"): # noqa: F821`
`144`	`144`	`}`
`145`	`145`	`)`
`146`	`146`
`147`		`- losses = TensorDict({}, batch_size=[num_mini_batches])`
	`147`	`+ losses = TensorDict(batch_size=[num_mini_batches])`
`148`	`148`	`training_start = time.time()`
`149`	`149`
`150`	`150`	`# Compute GAE`
Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@ def main(cfg: "DictConfig"): # noqa: F821`
`128`	`128`	`}`
`129`	`129`	`)`
`130`	`130`
`131`		`- losses = TensorDict({}, batch_size=[num_mini_batches])`
	`131`	`+ losses = TensorDict(batch_size=[num_mini_batches])`
`132`	`132`	`training_start = time.time()`
`133`	`133`
`134`	`134`	`# Compute GAE`