[Refactor] Minor refactorings to envs (#872)

vmoens · web-flow · commit 37e0c5369f2f · 2023-01-27T16:12:42.000Z
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -376,12 +376,19 @@ def step(self, tensordict: TensorDictBase) -> TensorDictBase:
         # sanity check
         self._assert_tensordict_shape(tensordict)
 
-        tensordict.is_locked = True  # make sure _step does not modify the tensordict
+        tensordict.lock()  # make sure _step does not modify the tensordict
         tensordict_out = self._step(tensordict)
-        tensordict.is_locked = False
+        if tensordict_out is tensordict:
+            raise RuntimeError(
+                "EnvBase._step should return outplace changes to the input "
+                "tensordict. Consider emptying the TensorDict first (e.g. tensordict.empty() or "
+                "tensordict.select()) inside _step before writing new tensors onto this new instance."
+            )
+        tensordict.unlock()
+
         obs_keys = set(self.observation_spec.keys())
         tensordict_out_select = tensordict_out.select(*obs_keys)
-        tensordict_out = tensordict_out.exclude(*obs_keys)
+        tensordict_out = tensordict_out.exclude(*obs_keys, inplace=True)
         tensordict_out.set("next", tensordict_out_select)
 
         reward = tensordict_out.get("reward")
@@ -409,12 +416,6 @@ def step(self, tensordict: TensorDictBase) -> TensorDictBase:
             done = done.view(expected_done_shape)
             tensordict_out.set("done", done)
 
-        if tensordict_out is tensordict:
-            raise RuntimeError(
-                "EnvBase._step should return outplace changes to the input "
-                "tensordict. Consider emptying the TensorDict first (e.g. tensordict.empty() or "
-                "tensordict.select()) inside _step before writing new tensors onto this new instance."
-            )
         if self.run_type_checks:
             for key in self._select_observation_keys(tensordict_out):
                 obs = tensordict_out.get(key)
@@ -432,7 +433,6 @@ def step(self, tensordict: TensorDictBase) -> TensorDictBase:
                 )
         tensordict.update(tensordict_out, inplace=self._inplace_update)
 
-        del tensordict_out
         return tensordict
 
     def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
@@ -726,8 +726,6 @@ def _to_tensor(
             value = torch.as_tensor(value, device=device)
         else:
             value = value.to(device)
-        # if dtype is not None:
-        #     value = value.to(dtype)
         return value
 
     def close(self):
diff --git a/torchrl/envs/gym_like.py b/torchrl/envs/gym_like.py
@@ -177,7 +177,7 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         action = tensordict.get("action")
         action_np = self.read_action(action)
 
-        reward = self.reward_spec.zero(self.batch_size)
+        reward = self.reward_spec.zero()
         for _ in range(self.wrapper_frame_skip):
             obs, _reward, done, *info = self._output_transform(
                 self._env.step(action_np)
@@ -200,7 +200,7 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
                 )
 
             if _reward is None:
-                _reward = self.reward_spec.zero(self.batch_size)
+                _reward = self.reward_spec.zero()
 
             reward = self.read_reward(reward, _reward)