Update (base update)

Vincent Moens · Vincent Moens · commit 66336114d24b · 2025-01-17T13:29:50.000Z
[ghstack-poisoned]
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -2457,6 +2457,7 @@ def __init__(
         shape: Union[torch.Size, int] = _DEFAULT_SHAPE,
         device: Optional[DEVICE_TYPING] = None,
         dtype: torch.dtype | None = None,
+        example_data: Any = None,
         **kwargs,
     ):
         if isinstance(shape, int):
@@ -2467,6 +2468,7 @@ def __init__(
         super().__init__(
             shape=shape, space=None, device=device, dtype=dtype, domain=domain, **kwargs
         )
+        self.example_data = example_data
 
     def cardinality(self) -> Any:
         raise RuntimeError("Cannot enumerate a NonTensorSpec.")
@@ -2485,30 +2487,46 @@ def to(self, dest: Union[torch.dtype, DEVICE_TYPING]) -> NonTensor:
             dest_device = torch.device(dest)
         if dest_device == self.device and dest_dtype == self.dtype:
             return self
-        return self.__class__(shape=self.shape, device=dest_device, dtype=None)
+        return self.__class__(
+            shape=self.shape,
+            device=dest_device,
+            dtype=None,
+            example_data=self.example_data,
+        )
 
     def clone(self) -> NonTensor:
-        return self.__class__(shape=self.shape, device=self.device, dtype=self.dtype)
+        return self.__class__(
+            shape=self.shape,
+            device=self.device,
+            dtype=self.dtype,
+            example_data=self.example_data,
+        )
 
     def rand(self, shape=None):
         if shape is None:
             shape = ()
         return NonTensorData(
-            data=None, batch_size=(*shape, *self._safe_shape), device=self.device
+            data=self.example_data,
+            batch_size=(*shape, *self._safe_shape),
+            device=self.device,
         )
 
     def zero(self, shape=None):
         if shape is None:
             shape = ()
         return NonTensorData(
-            data=None, batch_size=(*shape, *self._safe_shape), device=self.device
+            data=self.example_data,
+            batch_size=(*shape, *self._safe_shape),
+            device=self.device,
         )
 
     def one(self, shape=None):
         if shape is None:
             shape = ()
         return NonTensorData(
-            data=None, batch_size=(*shape, *self._safe_shape), device=self.device
+            data=self.example_data,
+            batch_size=(*shape, *self._safe_shape),
+            device=self.device,
         )
 
     def is_in(self, val: Any) -> bool:
@@ -2533,23 +2551,36 @@ def expand(self, *shape):
             raise ValueError(
                 f"The last elements of the expanded shape must match the current one. Got shape={shape} while self.shape={self.shape}."
             )
-        return self.__class__(shape=shape, device=self.device, dtype=None)
+        return self.__class__(
+            shape=shape, device=self.device, dtype=None, example_data=self.example_data
+        )
 
     def _reshape(self, shape):
-        return self.__class__(shape=shape, device=self.device, dtype=self.dtype)
+        return self.__class__(
+            shape=shape,
+            device=self.device,
+            dtype=self.dtype,
+            example_data=self.example_data,
+        )
 
     def _unflatten(self, dim, sizes):
         shape = torch.zeros(self.shape, device="meta").unflatten(dim, sizes).shape
         return self.__class__(
             shape=shape,
             device=self.device,
             dtype=self.dtype,
+            example_data=self.example_data,
         )
 
     def __getitem__(self, idx: SHAPE_INDEX_TYPING):
         """Indexes the current TensorSpec based on the provided index."""
         indexed_shape = _size(_shape_indexing(self.shape, idx))
-        return self.__class__(shape=indexed_shape, device=self.device, dtype=self.dtype)
+        return self.__class__(
+            shape=indexed_shape,
+            device=self.device,
+            dtype=self.dtype,
+            example_data=self.example_data,
+        )
 
     def unbind(self, dim: int = 0):
         orig_dim = dim
@@ -2565,6 +2596,7 @@ def unbind(self, dim: int = 0):
                 shape=shape,
                 device=self.device,
                 dtype=self.dtype,
+                example_data=self.example_data,
             )
             for i in range(self.shape[dim])
         )
diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
@@ -94,6 +94,7 @@
     TargetReturn,
     TensorDictPrimer,
     TimeMaxPool,
+    Tokenizer,
     ToTensorImage,
     TrajCounter,
     Transform,
diff --git a/torchrl/envs/transforms/__init__.py b/torchrl/envs/transforms/__init__.py
@@ -55,6 +55,7 @@
     TargetReturn,
     TensorDictPrimer,
     TimeMaxPool,
+    Tokenizer,
     ToTensorImage,
     TrajCounter,
     Transform,
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -795,6 +795,17 @@ def input_spec(self) -> TensorSpec:
             input_spec = self.__dict__.get("_input_spec", None)
         return input_spec
 
+    def rand_action(self, tensordict: Optional[TensorDictBase] = None) -> TensorDict:
+        if self.base_env.rand_action is not EnvBase.rand_action:
+            # TODO: this will fail if the transform modifies the input.
+            #  For instance, if PendulumEnv overrides rand_action and we build a
+            #  env = PendulumEnv().append_transform(ActionDiscretizer(num_intervals=4))
+            #  env.rand_action will NOT have a discrete action!
+            #  Getting a discrete action would require coding the inverse transform of an action within
+            #  ActionDiscretizer (ie, float->int, not int->float).
+            return self.base_env.rand_action(tensordict)
+        return super().rand_action(tensordict)
+
     def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         # No need to clone here because inv does it already
         # tensordict = tensordict.clone(False)
@@ -4415,10 +4426,12 @@ class UnaryTransform(Transform):
     Args:
         in_keys (sequence of NestedKey): the keys of inputs to the unary operation.
         out_keys (sequence of NestedKey): the keys of the outputs of the unary operation.
-        fn (Callable): the function to use as the unary operation. If it accepts
-            a non-tensor input, it must also accept ``None``.
+        in_keys_inv (sequence of NestedKey, optional): the keys of inputs to the unary operation during inverse call.
+        out_keys_inv (sequence of NestedKey, optional): the keys of the outputs of the unary operation durin inverse call.
 
     Keyword Args:
+        fn (Callable): the function to use as the unary operation. If it accepts
+            a non-tensor input, it must also accept ``None``.
         use_raw_nontensor (bool, optional): if ``False``, data is extracted from
             :class:`~tensordict.NonTensorData`/:class:`~tensordict.NonTensorStack` inputs before ``fn`` is called
             on them. If ``True``, the raw :class:`~tensordict.NonTensorData`/:class:`~tensordict.NonTensorStack`
@@ -4489,11 +4502,18 @@ def __init__(
         self,
         in_keys: Sequence[NestedKey],
         out_keys: Sequence[NestedKey],
-        fn: Callable,
+        in_keys_inv: Sequence[NestedKey] | None = None,
+        out_keys_inv: Sequence[NestedKey] | None = None,
         *,
+        fn: Callable,
         use_raw_nontensor: bool = False,
     ):
-        super().__init__(in_keys=in_keys, out_keys=out_keys)
+        super().__init__(
+            in_keys=in_keys,
+            out_keys=out_keys,
+            in_keys_inv=in_keys_inv,
+            out_keys_inv=out_keys_inv,
+        )
         self._fn = fn
         self._use_raw_nontensor = use_raw_nontensor
 
@@ -4508,13 +4528,49 @@ def _apply_transform(self, value):
                 value = value.tolist()
         return self._fn(value)
 
+    def _inv_apply_transform(self, state: torch.Tensor) -> torch.Tensor:
+        if not self._use_raw_nontensor:
+            if isinstance(state, NonTensorData):
+                if state.dim() == 0:
+                    state = state.get("data")
+                else:
+                    state = state.tolist()
+            elif isinstance(state, NonTensorStack):
+                state = state.tolist()
+        return self._fn(state)
+
     def _reset(
         self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase
     ) -> TensorDictBase:
         with _set_missing_tolerance(self, True):
             tensordict_reset = self._call(tensordict_reset)
         return tensordict_reset
 
+    def transform_input_spec(self, input_spec: Composite) -> Composite:
+        input_spec = input_spec.clone()
+
+        # Make a generic input from the spec, call the transform with that
+        # input, and then generate the output spec from the output.
+        zero_input_ = input_spec.zero()
+        test_input = zero_input_["full_action_spec"].update(
+            zero_input_["full_state_spec"]
+        )
+        test_output = self.inv(test_input)
+        test_input_spec = make_composite_from_td(
+            test_output, unsqueeze_null_shapes=False
+        )
+
+        input_spec["full_action_spec"] = self.transform_action_spec(
+            input_spec["full_action_spec"],
+            test_input_spec,
+        )
+        if "full_state_spec" in input_spec.keys():
+            input_spec["full_state_spec"] = self.transform_state_spec(
+                input_spec["full_state_spec"],
+                test_input_spec,
+            )
+        return input_spec
+
     def transform_output_spec(self, output_spec: Composite) -> Composite:
         output_spec = output_spec.clone()
 
@@ -4575,19 +4631,31 @@ def transform_done_spec(
     ) -> TensorSpec:
         return self._transform_spec(done_spec, test_output_spec)
 
+    def transform_action_spec(
+        self, action_spec: TensorSpec, test_input_spec: TensorSpec
+    ) -> TensorSpec:
+        return self._transform_spec(action_spec, test_input_spec)
+
+    def transform_state_spec(
+        self, state_spec: TensorSpec, test_input_spec: TensorSpec
+    ) -> TensorSpec:
+        return self._transform_spec(state_spec, test_input_spec)
+
 
 class Hash(UnaryTransform):
     r"""Adds a hash value to a tensordict.
 
     Args:
         in_keys (sequence of NestedKey): the keys of the values to hash.
         out_keys (sequence of NestedKey): the keys of the resulting hashes.
+        in_keys_inv (sequence of NestedKey, optional): the keys of the values to hash during inv call.
+        out_keys_inv (sequence of NestedKey, optional): the keys of the resulting hashes during inv call.
+
+    Keyword Args:
         hash_fn (Callable, optional): the hash function to use. If ``seed`` is given,
             the hash function must accept it as its second argument. Default is
             ``Hash.reproducible_hash``.
         seed (optional): seed to use for the hash function, if it requires one.
-
-    Keyword Args:
         use_raw_nontensor (bool, optional): if ``False``, data is extracted from
             :class:`~tensordict.NonTensorData`/:class:`~tensordict.NonTensorStack` inputs before ``fn`` is called
             on them. If ``True``, the raw :class:`~tensordict.NonTensorData`/:class:`~tensordict.NonTensorStack`
@@ -4673,9 +4741,11 @@ def __init__(
         self,
         in_keys: Sequence[NestedKey],
         out_keys: Sequence[NestedKey],
+        in_keys_inv: Sequence[NestedKey] | None = None,
+        out_keys_inv: Sequence[NestedKey] | None = None,
+        *,
         hash_fn: Callable = None,
         seed: Any | None = None,
-        *,
         use_raw_nontensor: bool = False,
     ):
         if hash_fn is None:
@@ -4686,6 +4756,8 @@ def __init__(
         super().__init__(
             in_keys=in_keys,
             out_keys=out_keys,
+            in_keys_inv=in_keys_inv,
+            out_keys_inv=out_keys_inv,
             fn=self.call_hash_fn,
             use_raw_nontensor=use_raw_nontensor,
         )
@@ -4714,7 +4786,7 @@ def reproducible_hash(cls, string, seed=None):
         if seed is not None:
             seeded_string = seed + string
         else:
-            seeded_string = string
+            seeded_string = str(string)
 
         # Create a new SHA-256 hash object
         hash_object = hashlib.sha256()
@@ -4728,6 +4800,77 @@ def reproducible_hash(cls, string, seed=None):
         return torch.frombuffer(hash_bytes, dtype=torch.uint8)
 
 
+class Tokenizer(UnaryTransform):
+    r"""Applies a tokenization operation on the specified inputs.
+
+    Args:
+        in_keys (sequence of NestedKey): the keys of inputs to the tokenization operation.
+        out_keys (sequence of NestedKey): the keys of the outputs of the tokenization operation.
+        in_keys_inv (sequence of NestedKey, optional): the keys of inputs to the tokenization operation during inverse call.
+        out_keys_inv (sequence of NestedKey, optional): the keys of the outputs of the tokenization operation during inverse call.
+
+    Keyword Args:
+        tokenizer (transformers.PretrainedTokenizerBase or str, optional): the tokenizer to use. If ``None``,
+            "bert-base-uncased" will be used by default. If a string is provided, it should be the name of a
+            pre-trained tokenizer.
+        use_raw_nontensor (bool, optional): if ``False``, data is extracted from
+            :class:`~tensordict.NonTensorData`/:class:`~tensordict.NonTensorStack` inputs before the tokenization
+            function is called on them. If ``True``, the raw :class:`~tensordict.NonTensorData`/:class:`~tensordict.NonTensorStack`
+            inputs are given directly to the tokenization function, which must support those inputs. Default is ``False``.
+        additional_tokens (List[str], optional): list of additional tokens to add to the tokenizer's vocabulary.
+    """
+
+    def __init__(
+        self,
+        in_keys: Sequence[NestedKey],
+        out_keys: Sequence[NestedKey],
+        in_keys_inv: Sequence[NestedKey] | None = None,
+        out_keys_inv: Sequence[NestedKey] | None = None,
+        *,
+        tokenizer: "transformers.PretrainedTokenizerBase" = None,  # noqa: F821
+        use_raw_nontensor: bool = False,
+        additional_tokens: List[str] | None = None,
+    ):
+        if tokenizer is None:
+            from transformers import AutoTokenizer
+
+            tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+        elif isinstance(tokenizer, str):
+            from transformers import AutoTokenizer
+
+            tokenizer = AutoTokenizer.from_pretrained(tokenizer)
+
+        self.tokenizer = tokenizer
+        if additional_tokens:
+            self.tokenizer.add_tokens(additional_tokens)
+        super().__init__(
+            in_keys=in_keys,
+            out_keys=out_keys,
+            in_keys_inv=in_keys_inv,
+            out_keys_inv=out_keys_inv,
+            fn=self.call_tokenizer_fn,
+            use_raw_nontensor=use_raw_nontensor,
+        )
+
+    @property
+    def device(self):
+        if "_device" in self.__dict__:
+            return self._device
+        parent = self.parent
+        if parent is None:
+            return None
+        device = parent.device
+        self._device = device
+        return device
+
+    def call_tokenizer_fn(self, value: str | List[str]):
+        device = self.device
+        out = self.tokenizer.encode(value, return_tensors="pt")
+        if device is not None and out.device != device:
+            out = out.to(device)
+        return out
+
+
 class Stack(Transform):
     """Stacks tensors and tensordicts.