[Feature] RandomCropTensorDict transform (#908)

vmoens · web-flow · commit 873779a7f63b · 2023-02-10T17:09:41.000Z
diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
@@ -55,6 +55,22 @@ The following mean sampling latency improvements over using ListStorage were fou
 | :class:`LazyMemmapStorage`    | 3.44x     |
 +-------------------------------+-----------+
 
+Sotring trajectories
+~~~~~~~~~~~~~~~~~~~~
+
+It is not too difficult to store trajecotries in the replay buffer.
+One element to pay attention to is that the size of the replay buffer is always
+the size of the leading dimension of the storage: in other words, creating a
+replay buffer with a storage of size 1M when storing multidimensional data
+does not mean storing 1M frames but 1M trajectories.
+
+When sampling trajectories, it may be desirable to sample sub-trajectories
+to diversify learning or make the sampling more efficient.
+To do this, we provide a custom :class:`torchrl.envs.Transform` class named
+:class:`torchrl.envs.RandomCropTensorDict`. Here is an example of how this class
+can be used:
+
+    >>>
 
 TensorSpec
 ----------
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -270,6 +270,7 @@ to be able to create this other composition:
     ObservationTransform
     PinMemoryTransform
     R3MTransform
+    RandomCropTensorDict
     Resize
     RewardClipping
     RewardScaling
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -382,7 +382,8 @@ def test_prototype_prb(priority_key, contiguous, device):
 
 
 @pytest.mark.parametrize("stack", [False, True])
-def test_replay_buffer_trajectories(stack):
+@pytest.mark.parametrize("reduction", ["min", "max", "median", "mean"])
+def test_replay_buffer_trajectories(stack, reduction):
     traj_td = TensorDict(
         {"obs": torch.randn(3, 4, 5), "actions": torch.randn(3, 4, 2)},
         batch_size=[3, 4],
@@ -395,22 +396,23 @@ def test_replay_buffer_trajectories(stack):
             5,
             alpha=0.7,
             beta=0.9,
+            reduction=reduction,
         ),
         priority_key="td_error",
     )
     rb.extend(traj_td)
     sampled_td = rb.sample(3)
-    sampled_td.set("td_error", torch.rand(3))
+    sampled_td.set("td_error", torch.rand(sampled_td.shape))
     rb.update_tensordict_priority(sampled_td)
     sampled_td = rb.sample(3, include_info=True)
     assert (sampled_td.get("_weight") > 0).all()
-    assert sampled_td.batch_size == torch.Size([3])
+    assert sampled_td.batch_size == torch.Size([3, 4])
 
-    # set back the trajectory length
-    sampled_td_filtered = sampled_td.to_tensordict().exclude(
-        "_weight", "index", "td_error"
-    )
-    sampled_td_filtered.batch_size = [3, 4]
+    # # set back the trajectory length
+    # sampled_td_filtered = sampled_td.to_tensordict().exclude(
+    #     "_weight", "index", "td_error"
+    # )
+    # sampled_td_filtered.batch_size = [3, 4]
 
 
 @pytest.mark.parametrize(
@@ -660,7 +662,8 @@ def test_prb(priority_key, contiguous, device):
 
 
 @pytest.mark.parametrize("stack", [False, True])
-def test_rb_trajectories(stack):
+@pytest.mark.parametrize("reduction", ["min", "max", "mean", "median"])
+def test_rb_trajectories(stack, reduction):
     traj_td = TensorDict(
         {"obs": torch.randn(3, 4, 5), "actions": torch.randn(3, 4, 2)},
         batch_size=[3, 4],
@@ -676,11 +679,11 @@ def test_rb_trajectories(stack):
     )
     rb.extend(traj_td)
     sampled_td = rb.sample(3)
-    sampled_td.set("td_error", torch.rand(3))
+    sampled_td.set("td_error", torch.rand(3, 4))
     rb.update_tensordict_priority(sampled_td)
     sampled_td = rb.sample(3, include_info=True)
     assert (sampled_td.get("_weight") > 0).all()
-    assert sampled_td.batch_size == torch.Size([3])
+    assert sampled_td.batch_size == torch.Size([3, 4])
 
     # set back the trajectory length
     sampled_td_filtered = sampled_td.to_tensordict().exclude(
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -56,6 +56,7 @@
     ParallelEnv,
     PinMemoryTransform,
     R3MTransform,
+    RandomCropTensorDict,
     Resize,
     RewardClipping,
     RewardScaling,
@@ -6168,6 +6169,67 @@ def test_clone_parent_compose(transform):
     assert env.transform[1].parent.base_env is base_env1
 
 
+class TestCroSeq:
+    def test_crop_dim1(self):
+        tensordict = TensorDict(
+            {
+                "a": torch.arange(20).view(1, 1, 1, 20).expand(3, 4, 2, 20),
+                "b": TensorDict(
+                    {"c": torch.arange(20).view(1, 1, 1, 20, 1).expand(3, 4, 2, 20, 1)},
+                    [3, 4, 2, 20, 1],
+                ),
+            },
+            [3, 4, 2, 20],
+        )
+        t = RandomCropTensorDict(11, -1)
+        tensordict_crop = t(tensordict)
+        assert tensordict_crop.shape == torch.Size([3, 4, 2, 11])
+        assert tensordict_crop["b"].shape == torch.Size([3, 4, 2, 11, 1])
+        assert (
+            tensordict_crop["a"][:, :, :, :-1] + 1 == tensordict_crop["a"][:, :, :, 1:]
+        ).all()
+
+    def test_crop_dim2(self):
+        tensordict = TensorDict(
+            {"a": torch.arange(20).view(1, 1, 20, 1).expand(3, 4, 20, 2)},
+            [3, 4, 20, 2],
+        )
+        t = RandomCropTensorDict(11, -2)
+        tensordict_crop = t(tensordict)
+        assert tensordict_crop.shape == torch.Size([3, 4, 11, 2])
+        assert (
+            tensordict_crop["a"][:, :, :-1] + 1 == tensordict_crop["a"][:, :, 1:]
+        ).all()
+
+    def test_crop_error(self):
+        tensordict = TensorDict(
+            {"a": torch.arange(20).view(1, 1, 20, 1).expand(3, 4, 20, 2)},
+            [3, 4, 20, 2],
+        )
+        t = RandomCropTensorDict(21, -2)
+        with pytest.raises(RuntimeError, match="Cannot sample trajectories of length"):
+            _ = t(tensordict)
+
+    @pytest.mark.parametrize("mask_key", ("mask", ("collector", "mask")))
+    def test_crop_mask(self, mask_key):
+        a = torch.arange(20).view(1, 1, 20, 1).expand(3, 4, 20, 2).clone()
+        mask = a < 21
+        mask[0] = a[0] < 15
+        mask[1] = a[1] < 16
+        mask[1] = a[2] < 14
+        tensordict = TensorDict(
+            {"a": a, mask_key: mask},
+            [3, 4, 20, 2],
+        )
+        t = RandomCropTensorDict(15, -2, mask_key=mask_key)
+        with pytest.raises(RuntimeError, match="Cannot sample trajectories of length"):
+            _ = t(tensordict)
+        t = RandomCropTensorDict(13, -2, mask_key=mask_key)
+        tensordict_crop = t(tensordict)
+        assert tensordict_crop.shape == torch.Size([3, 4, 13, 2])
+        assert tensordict_crop[mask_key].all()
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
@@ -10,6 +10,7 @@
 
 import torch
 from tensordict.tensordict import LazyStackedTensorDict, TensorDictBase
+from tensordict.utils import expand_right
 
 from torchrl.data.utils import DEVICE_TYPING
 
@@ -351,7 +352,11 @@ def _get_priority(self, tensordict: TensorDictBase) -> Optional[torch.Tensor]:
             tensordict = tensordict.clone(recurse=False)
             tensordict.batch_size = []
         try:
-            priority = tensordict.get(self.priority_key).item()
+            priority = tensordict.get(self.priority_key)
+            if priority.numel() > 1:
+                priority = _reduce(priority, self._sampler.reduction)
+            else:
+                priority = priority.item()
         except ValueError:
             raise ValueError(
                 f"Found a priority key of size"
@@ -378,7 +383,16 @@ def extend(self, tensordicts: Union[List, TensorDictBase]) -> torch.Tensor:
                     tensordicts = tensordicts.clone(recurse=False)
                 else:
                     tensordicts = tensordicts.contiguous()
+                # we keep track of the batch size to reinstantiate it when sampling
+                if "_batch_size" in tensordicts.keys():
+                    raise KeyError(
+                        "conflicting key '_batch_size'. Consider removing from data."
+                    )
+                shape = torch.tensor(tensordicts.batch_size[1:]).expand(
+                    tensordicts.batch_size[0], tensordicts.batch_dims - 1
+                )
                 tensordicts.batch_size = tensordicts.batch_size[:1]
+                tensordicts.set("_batch_size", shape)
             tensordicts.set(
                 "index",
                 torch.zeros(
@@ -406,7 +420,13 @@ def update_tensordict_priority(self, data: TensorDictBase) -> None:
             dtype=torch.float,
             device=data.device,
         )
-        self.update_priority(data.get("index"), priority)
+        # if the index shape does not match the priority shape, we have expanded it.
+        # we just take the first value
+        index = data.get("index")
+        while index.shape != priority.shape:
+            # reduce index
+            index = index[..., 0]
+        self.update_priority(index, priority)
 
     def sample(
         self, batch_size: int, include_info: bool = False, return_info: bool = False
@@ -429,6 +449,18 @@ def sample(
         if include_info:
             for k, v in info.items():
                 data.set(k, torch.tensor(v, device=data.device), inplace=True)
+        if "_batch_size" in data.keys():
+            # we need to reset the batch-size
+            shape = data.pop("_batch_size")
+            shape = shape[0]
+            shape = torch.Size([data.shape[0], *shape])
+            # we may need to update some values in the data
+            for key, value in data.items():
+                if value.ndim >= len(shape):
+                    continue
+                value = expand_right(value, shape)
+                data.set(key, value)
+            data.batch_size = shape
         if return_info:
             return data, info
         return data
@@ -462,6 +494,9 @@ class TensorDictPrioritizedReplayBuffer(TensorDictReplayBuffer):
             using multithreading.
         transform (Transform, optional): Transform to be executed when sample() is called.
             To chain transforms use the :obj:`Compose` class.
+        reduction (str, optional): the reduction method for multidimensional
+            tensordicts (ie stored trajectories). Can be one of "max", "min",
+            "median" or "mean".
     """
 
     def __init__(
@@ -475,10 +510,13 @@ def __init__(
         pin_memory: bool = False,
         prefetch: Optional[int] = None,
         transform: Optional["Transform"] = None,  # noqa-F821
+        reduction: Optional[str] = "max",
     ) -> None:
         if storage is None:
             storage = ListStorage(max_size=1_000)
-        sampler = PrioritizedSampler(storage.max_size, alpha, beta, eps)
+        sampler = PrioritizedSampler(
+            storage.max_size, alpha, beta, eps, reduction=reduction
+        )
         super(TensorDictPrioritizedReplayBuffer, self).__init__(
             priority_key=priority_key,
             storage=storage,
@@ -539,3 +577,16 @@ def __call__(self, list_of_tds):
         else:
             torch.stack(list_of_tds, 0, out=self.out)
         return self.out
+
+
+def _reduce(tensor: torch.Tensor, reduction: str):
+    """Reduces a tensor given the reduction method."""
+    if reduction == "max":
+        return tensor.max().item()
+    elif reduction == "min":
+        return tensor.min().item()
+    elif reduction == "mean":
+        return tensor.mean().item()
+    elif reduction == "median":
+        return tensor.median().item()
+    raise NotImplementedError(f"Unknown reduction method {reduction}")
diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -132,8 +132,11 @@ class PrioritizedSampler(Sampler):
         alpha (float): exponent α determines how much prioritization is used,
             with α = 0 corresponding to the uniform case.
         beta (float): importance sampling negative exponent.
-        eps (float): delta added to the priorities to ensure that the buffer
-            does not contain null priorities.
+        eps (float, optional): delta added to the priorities to ensure that the buffer
+            does not contain null priorities. Defaults to 1e-8.
+        reduction (str, optional): the reduction method for multidimensional
+            tensordicts (ie stored trajectories). Can be one of "max", "min",
+            "median" or "mean".
 
     """
 
@@ -144,6 +147,7 @@ def __init__(
         beta: float,
         eps: float = 1e-8,
         dtype: torch.dtype = torch.float,
+        reduction: str = "max",
     ) -> None:
         if alpha <= 0:
             raise ValueError(
@@ -156,6 +160,7 @@ def __init__(
         self._alpha = alpha
         self._beta = beta
         self._eps = eps
+        self.reduction = reduction
         if dtype in (torch.float, torch.FloatType, torch.float32):
             self._sum_tree = SumSegmentTreeFp32(self._max_capacity)
             self._min_tree = MinSegmentTreeFp32(self._max_capacity)
diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
@@ -26,6 +26,7 @@
     ObservationTransform,
     PinMemoryTransform,
     R3MTransform,
+    RandomCropTensorDict,
     Resize,
     RewardClipping,
     RewardScaling,
diff --git a/torchrl/envs/transforms/__init__.py b/torchrl/envs/transforms/__init__.py
@@ -22,6 +22,7 @@
     ObservationNorm,
     ObservationTransform,
     PinMemoryTransform,
+    RandomCropTensorDict,
     Resize,
     RewardClipping,
     RewardScaling,
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py