[Doc] Pretrained models tutorial (#814)

vmoens · web-flow · commit b99fb6d208b0 · 2023-01-10T14:32:29.000Z
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -34,6 +34,7 @@ Tutorials
    :maxdepth: 2
 
    tutorials/torchrl_demo
+   tutorials/pretrained_models
    tutorials/tensordict_tutorial
    tutorials/tensordict_module
    tutorials/torch_envs
diff --git a/torchrl/envs/transforms/r3m.py b/torchrl/envs/transforms/r3m.py
@@ -161,7 +161,7 @@ class R3MTransform(Compose):
 
     Args:
         model_name (str): one of resnet50, resnet34 or resnet18
-        in_keys (list of str, optional): list of input keys. If left empty, the
+        in_keys (list of str): list of input keys. If left empty, the
             "pixels" key is assumed.
         out_keys (list of str, optional): list of output keys. If left empty,
              "r3m_vec" is assumed.
@@ -190,7 +190,7 @@ def __new__(cls, *args, **kwargs):
     def __init__(
         self,
         model_name: str,
-        in_keys: List[str] = None,
+        in_keys: List[str],
         out_keys: List[str] = None,
         size: int = 244,
         stack_images: bool = True,
@@ -199,7 +199,7 @@ def __init__(
         tensor_pixels_keys: List[str] = None,
     ):
         super().__init__()
-        self.in_keys = in_keys
+        self.in_keys = in_keys if in_keys is not None else ["pixels"]
         self.download = download
         self.download_path = download_path
         self.model_name = model_name
@@ -258,6 +258,7 @@ def _init(self):
                 out_keys = ["r3m_vec"]
             else:
                 out_keys = [f"r3m_vec_{i}" for i in range(len(in_keys))]
+            self.out_keys = out_keys
         elif stack_images and len(out_keys) != 1:
             raise ValueError(
                 f"out_key must be of length 1 if stack_images is True. Got out_keys={out_keys}"
diff --git a/torchrl/envs/transforms/vip.py b/torchrl/envs/transforms/vip.py
@@ -174,7 +174,7 @@ def __init__(
         tensor_pixels_keys: List[str] = None,
     ):
         super().__init__()
-        self.in_keys = in_keys
+        self.in_keys = in_keys if in_keys is not None else ["pixels"]
         self.download = download
         self.download_path = download_path
         self.model_name = model_name
@@ -233,6 +233,7 @@ def _init(self):
                 out_keys = ["vip_vec"]
             else:
                 out_keys = [f"vip_vec_{i}" for i in range(len(in_keys))]
+            self.out_keys = out_keys
         elif stack_images and len(out_keys) != 1:
             raise ValueError(
                 f"out_key must be of length 1 if stack_images is True. Got out_keys={out_keys}"
diff --git a/tutorials/sphinx-tutorials/pretrained_models.py b/tutorials/sphinx-tutorials/pretrained_models.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+"""
+Using pretrained models
+=======================
+This tutorial explains how to use pretrained models in TorchRL.
+"""
+##############################################################################
+# At the end of this tutorial, you will be capable of using pretrained models
+# for efficient image representation, and fine-tune them.
+#
+# TorchRL provides pretrained models that are to be used either as transforms or as
+# components of the policy. As the sematic is the same, they can be used interchangeably
+# in one or the other context. In this tutorial, we will be using R3M (https://arxiv.org/abs/2203.12601),
+# but other models (e.g. VIP) will work equally well.
+#
+import torch.cuda
+from tensordict.nn import TensorDictSequential
+from torch import nn
+from torchrl.envs import R3MTransform, TransformedEnv
+from torchrl.envs.libs.gym import GymEnv
+from torchrl.modules import Actor
+
+device = "cuda:0" if torch.cuda.device_count() else "cpu"
+
+##############################################################################
+# Let us first create an environment. For the sake of simplicity, we will be using
+# a common gym environment. In practice, this will work in more challenging, embodied
+# AI contexts (e.g. have a look at our Habitat wrappers).
+#
+base_env = GymEnv("Ant-v4", from_pixels=True, device=device)
+
+##############################################################################
+# Let us fetch our pretrained model. We ask for the pretrained version of the model through the
+# download=True flag. By default this is turned off.
+# Next, we will append our transform to the environment. In practice, what will happen is that
+# each batch of data collected will go through the transform and be mapped on a "r3m_vec" entry
+# in the output tensordict. Our policy, consisting of a single layer MLP, will then read this vector and compute
+# the corresponding action.
+#
+r3m = R3MTransform("resnet50", in_keys=["pixels"], download=True).to(device)
+env_transformed = TransformedEnv(base_env, r3m)
+net = nn.Sequential(
+    nn.LazyLinear(128), nn.Tanh(), nn.Linear(128, base_env.action_spec.shape[-1])
+)
+policy = Actor(net, in_keys=["r3m_vec"])
+
+##############################################################################
+# Let's check the number of parameters of the policy:
+#
+print("number of params:", len(list(policy.parameters())))
+
+##############################################################################
+# We collect a rollout of 32 steps and print its output:
+#
+rollout = env_transformed.rollout(32, policy)
+print("rollout with transform:", rollout)
+
+##############################################################################
+# For fine tuning, we integrate the transform in the policy after making the parameters
+# trainable. In practice, it may be wiser to restrict this to a subset of the parameters (say the last layer
+# of the MLP).
+#
+r3m.train()
+policy = TensorDictSequential(r3m, policy)
+print("number of params after r3m is integrated:", len(list(policy.parameters())))
+
+##############################################################################
+# Again, we collect a rollout with R3M. The structure of the output has changed slightly, as now
+# the environment returns pixels (and not an embedding). The embedding "r3m_vec" is an intermediate
+# result of our policy.
+#
+rollout = base_env.rollout(32, policy)
+print("rollout, fine tuning:", rollout)
+
+##############################################################################
+# The easyness with which we have swapped the transform from the env to the policy
+# is due to the fact that both behave like TensorDictModule: they have a set of `"in_keys"` and
+# `"out_keys"` that make it easy to read and write output in different context.
+#