pytorch
diff --git a/‎test/mocking_classes.py
Lines changed: 108 additions & 1 deletion b/‎test/mocking_classes.py
Lines changed: 108 additions & 1 deletion
diff --git a/‎test/test_env.py
Lines changed: 143 additions & 15 deletions b/‎test/test_env.py
Lines changed: 143 additions & 15 deletions
diff --git a/‎test/test_specs.py
Lines changed: 7 additions & 0 deletions b/‎test/test_specs.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎torchrl/_utils.py
Lines changed: 12 additions & 6 deletions b/‎torchrl/_utils.py
Lines changed: 12 additions & 6 deletions
diff --git a/‎torchrl/collectors/collectors.py
Lines changed: 1 addition & 1 deletion b/‎torchrl/collectors/collectors.py
Lines changed: 1 addition & 1 deletion
@@ -8,6 +8,8 @@
 import string
 from typing import Dict, List, Optional
 
+import numpy as np
+
 import torch
 import torch.nn as nn
 from tensordict import TensorDict, TensorDictBase
@@ -26,6 +28,7 @@
     Unbounded,
 )
 from torchrl.data.utils import consolidate_spec
+from torchrl.envs import Transform
 from torchrl.envs.common import EnvBase
 from torchrl.envs.model_based.common import ModelBasedEnvBase
 from torchrl.envs.utils import (
@@ -34,7 +37,6 @@
     MarlGroupMapType,
 )
 
-
 spec_dict = {
     "bounded": Bounded,
     "one_hot": OneHot,
@@ -2268,3 +2270,108 @@ def _step(self, tensordict: TensorDictBase, **kwargs) -> TensorDict:
 
     def _set_seed(self, seed: Optional[int]):
         ...
+
+
+@tensorclass()
+class TC:
+    field0: str
+    field1: torch.Tensor
+
+
+class EnvWithTensorClass(CountingEnv):
+    tc_cls = TC
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.observation_spec["tc"] = Composite(
+            field0=NonTensor(example_data="an observation!", shape=self.batch_size),
+            field1=Unbounded(shape=self.batch_size),
+            shape=self.batch_size,
+            data_cls=TC,
+        )
+
+    def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+        td = super()._reset(tensordict, **kwargs)
+        td["tc"] = TC("0", torch.zeros(self.batch_size))
+        return td
+
+    def _step(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+        td = super()._step(tensordict, **kwargs)
+        default = TC("0", 0)
+        f0 = tensordict.get("tc", default).field0
+        if f0 is None:
+            f0 = "0"
+        f1 = tensordict.get("tc", default).field1
+        if f1 is None:
+            f1 = torch.zeros(self.batch_size)
+        td["tc"] = TC(
+            str(int(f0) + 1),
+            f1 + 1,
+        )
+        return td
+
+
+@tensorclass
+class History:
+    role: str
+    content: str
+
+
+class HistoryTransform(Transform):
+    """A mocking class to record history."""
+
+    def transform_observation_spec(self, observation_spec: Composite) -> Composite:
+        defaults = {
+            "role": NonTensor(
+                example_data="a role!",
+                shape=(-1,),
+            ),
+            "content": NonTensor(
+                example_data="a content!",
+                shape=(-1,),
+            ),
+        }
+        observation_spec["history"] = Composite(
+            defaults,
+            shape=(-1,),
+            data_cls=History,
+        )
+        assert observation_spec.device == self.parent.device
+        assert observation_spec["history"].device == self.parent.device
+        return observation_spec
+
+    def _reset(
+        self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase
+    ) -> TensorDictBase:
+        assert tensordict_reset.device == self.parent.device
+        tensordict_reset["history"] = torch.stack(
+            [
+                History(role="system", content="0"),
+                History(role="user", content="1"),
+            ]
+        )
+        assert tensordict_reset["history"].device == self.parent.device
+        return tensordict_reset
+
+    def _step(
+        self, tensordict: TensorDictBase, next_tensordict: TensorDictBase
+    ) -> TensorDictBase:
+        assert next_tensordict.device == self.parent.device
+        history = tensordict["history"]
+        local_history = History(
+            role=np.random.choice(["user", "system", "assistant"]),
+            content=str(int(history.content[-1]) + 1),
+            device=history.device,
+        )
+        # history = tensordict["history"].append(local_history)
+        try:
+            history = torch.stack(list(history.unbind(0)) + [local_history])
+        except Exception:
+            raise
+        assert isinstance(history, History)
+        next_tensordict["history"] = history
+        assert next_tensordict["history"].device == self.parent.device, (
+            next_tensordict["history"],
+            self.parent.device,
+        )
+        return next_tensordict
@@ -42,6 +42,7 @@
     CatFrames,
     CatTensors,
     ChessEnv,
+    ConditionalSkip,
     DoubleToFloat,
     EnvBase,
     EnvCreator,
@@ -70,6 +71,7 @@
     check_marl_grouping,
     make_composite_from_td,
     MarlGroupMapType,
+    RandomPolicy,
     step_mdp,
 )
 from torchrl.modules import Actor, ActorCriticOperator, MLP, SafeModule, ValueOperator
@@ -131,6 +133,7 @@
         EnvWithMetadata,
         HeterogeneousCountingEnv,
         HeterogeneousCountingEnvPolicy,
+        HistoryTransform,
         MockBatchedLockedEnv,
         MockBatchedUnLockedEnv,
         MockSerialEnv,
@@ -170,6 +173,7 @@
         EnvWithMetadata,
         HeterogeneousCountingEnv,
         HeterogeneousCountingEnvPolicy,
+        HistoryTransform,
         MockBatchedLockedEnv,
         MockBatchedUnLockedEnv,
         MockSerialEnv,
@@ -3629,8 +3633,11 @@ def test_serial(self, bwad, use_buffers):
     def test_parallel(self, bwad, use_buffers):
         N = 50
         env = ParallelEnv(2, EnvWithMetadata, use_buffers=use_buffers)
-        r = env.rollout(N, break_when_any_done=bwad)
-        assert r.get("non_tensor").tolist() == [list(range(N))] * 2
+        try:
+            r = env.rollout(N, break_when_any_done=bwad)
+            assert r.get("non_tensor").tolist() == [list(range(N))] * 2
+        finally:
+            env.close(raise_if_closed=False)
 
     class AddString(Transform):
         def __init__(self):
@@ -3662,19 +3669,22 @@ def test_partial_reset(self, batched):
                 env = ParallelEnv(2, [env0, env1], mp_start_method=mp_ctx)
             else:
                 env = SerialEnv(2, [env0, env1])
-            s = env.reset()
-            i = 0
-            for i in range(10):  # noqa: B007
-                s, s_ = env.step_and_maybe_reset(
-                    s.set("action", torch.ones(2, 1, dtype=torch.int))
-                )
-                if s.get(("next", "done")).any():
-                    break
-                s = s_
-            assert i == 5
-            assert (s["next", "done"] == torch.tensor([[True], [False]])).all()
-            assert s_["string"] == ["0", "6"]
-            assert s["next", "string"] == ["6", "6"]
+            try:
+                s = env.reset()
+                i = 0
+                for i in range(10):  # noqa: B007
+                    s, s_ = env.step_and_maybe_reset(
+                        s.set("action", torch.ones(2, 1, dtype=torch.int))
+                    )
+                    if s.get(("next", "done")).any():
+                        break
+                    s = s_
+                assert i == 5
+                assert (s["next", "done"] == torch.tensor([[True], [False]])).all()
+                assert s_["string"] == ["0", "6"]
+                assert s["next", "string"] == ["6", "6"]
+            finally:
+                env.close(raise_if_closed=False)
 
     @pytest.mark.skipif(not _has_transformers, reason="transformers required")
     def test_str2str_env_tokenizer(self):
@@ -4182,6 +4192,124 @@ def test_serial_partial_step_and_maybe_reset(self, use_buffers, device, env_devi
             assert (td[3].get("next") != 0).any()
 
 
+class TestEnvWithHistory:
+    @pytest.fixture(autouse=True, scope="class")
+    def set_capture(self):
+        with set_capture_non_tensor_stack(False), set_auto_unwrap_transformed_env(
+            False
+        ):
+            yield
+        return
+
+    def _make_env(self, device, max_steps=10):
+        return CountingEnv(device=device, max_steps=max_steps).append_transform(
+            HistoryTransform()
+        )
+
+    def _make_skipping_env(self, device, max_steps=10):
+        env = self._make_env(device=device, max_steps=max_steps)
+        # skip every 3 steps
+        env = env.append_transform(
+            ConditionalSkip(lambda td: ((td["step_count"] % 3) == 2))
+        )
+        env = TransformedEnv(env, StepCounter())
+        return env
+
+    @pytest.mark.parametrize("device", [None, "cpu"])
+    def test_env_history_base(self, device):
+        env = self._make_env(device)
+        env.check_env_specs()
+
+    @pytest.mark.parametrize("device", [None, "cpu"])
+    def test_skipping_history_env(self, device):
+        env = self._make_skipping_env(device)
+        env.check_env_specs()
+        r = env.rollout(100)
+
+    @pytest.mark.parametrize("device_env", [None, "cpu"])
+    @pytest.mark.parametrize("device", [None, "cpu"])
+    @pytest.mark.parametrize("batch_cls", [SerialEnv, "parallel"])
+    @pytest.mark.parametrize("consolidate", [False, True])
+    def test_env_history_base_batched(
+        self, device, device_env, batch_cls, maybe_fork_ParallelEnv, consolidate
+    ):
+        if batch_cls == "parallel":
+            batch_cls = maybe_fork_ParallelEnv
+        env = batch_cls(
+            2,
+            lambda: self._make_env(device_env),
+            device=device,
+            consolidate=consolidate,
+        )
+        try:
+            assert not env._use_buffers
+            env.check_env_specs(break_when_any_done="both")
+        finally:
+            env.close(raise_if_closed=False)
+
+    @pytest.mark.parametrize("device_env", [None, "cpu"])
+    @pytest.mark.parametrize("device", [None, "cpu"])
+    @pytest.mark.parametrize("batch_cls", [SerialEnv, "parallel"])
+    @pytest.mark.parametrize("consolidate", [False, True])
+    def test_skipping_history_env_batched(
+        self, device, device_env, batch_cls, maybe_fork_ParallelEnv, consolidate
+    ):
+        if batch_cls == "parallel":
+            batch_cls = maybe_fork_ParallelEnv
+        env = batch_cls(
+            2,
+            lambda: self._make_skipping_env(device_env),
+            device=device,
+            consolidate=consolidate,
+        )
+        try:
+            env.check_env_specs()
+        finally:
+            env.close(raise_if_closed=False)
+
+    @pytest.mark.parametrize("device_env", [None, "cpu"])
+    @pytest.mark.parametrize("collector_cls", [SyncDataCollector])
+    def test_env_history_base_collector(self, device_env, collector_cls):
+        env = self._make_env(device_env)
+        collector = collector_cls(
+            env, RandomPolicy(env.full_action_spec), total_frames=35, frames_per_batch=5
+        )
+        for d in collector:
+            for i in range(d.shape[0] - 1):
+                assert (
+                    d[i + 1]["history"].content[0] == d[i]["next", "history"].content[0]
+                )
+
+    @pytest.mark.parametrize("device_env", [None, "cpu"])
+    @pytest.mark.parametrize("collector_cls", [SyncDataCollector])
+    def test_skipping_history_env_collector(self, device_env, collector_cls):
+        env = self._make_skipping_env(device_env, max_steps=10)
+        collector = collector_cls(
+            env,
+            lambda td: td.update(env.full_action_spec.one()),
+            total_frames=35,
+            frames_per_batch=5,
+        )
+        length = None
+        count = 1
+        for d in collector:
+            for k in range(1, 5):
+                if len(d[k]["history"].content) == 2:
+                    count = 1
+                    continue
+                if count % 3 == 2:
+                    assert (
+                        d[k]["next", "history"].content
+                        == d[k - 1]["next", "history"].content
+                    ), (d["next", "history"].content, k, count)
+                else:
+                    assert d[k]["next", "history"].content[-1] == str(
+                        int(d[k - 1]["next", "history"].content[-1]) + 1
+                    ), (d["next", "history"].content, k, count)
+                count += 1
+            count += 1
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
@@ -3912,6 +3912,13 @@ def test_example_data_ineq(self):
         nts1 = NonTensor(shape=(3, 4), example_data="example_data 2")
         assert nts0 != nts1
 
+    def test_device_cast(self):
+        comp = Composite(device="cpu")
+        comp["nontensor"] = NonTensor(device=None)
+        assert comp["nontensor"].device == torch.device("cpu")
+        comp["nontensor"] = NonTensor(device="cpu")
+        assert comp["nontensor"].device == torch.device("cpu")
+
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="not cuda device")
 def test_device_ordinal():
 
@@ -162,14 +162,20 @@ def erase():
 def _check_for_faulty_process(processes):
     terminate = False
     for p in processes:
-        if not p.is_alive():
+        if not p._closed and not p.is_alive():
             terminate = True
             for _p in processes:
-                if _p.is_alive():
-                    _p.terminate()
-                    _p.close()
-        if terminate:
-            break
+                _p: mp.Process
+                if not _p._closed and _p.is_alive():
+                    try:
+                        _p.terminate()
+                    except Exception:
+                        _p.kill()
+                    finally:
+                        time.sleep(0.1)
+                        _p.close()
+            if terminate:
+                break
     if terminate:
         raise RuntimeError(
             "At least one process failed. Check for more infos in the log."
 
@@ -1057,7 +1057,7 @@ def cuda_check(tensor: torch.Tensor):
                 # This may be a bit dangerous as `torch.device("cuda")` may not have a precise
                 # device associated, whereas `tensor.device` always has
                 for spec in self.env.specs.values(True, True):
-                    if spec.device.type == "cuda":
+                    if spec.device is not None and spec.device.type == "cuda":
                         if ":" not in str(spec.device):
                             raise RuntimeError(
                                 "A cuda spec did not have a device associated. Make sure to "