pytorch
diff --git a/‎.circleci/unittest/linux_examples/scripts/run_test.sh
Lines changed: 88 additions & 0 deletions b/‎.circleci/unittest/linux_examples/scripts/run_test.sh
Lines changed: 88 additions & 0 deletions
diff --git a/‎docs/source/conf.py
Lines changed: 9 additions & 8 deletions b/‎docs/source/conf.py
Lines changed: 9 additions & 8 deletions
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 50 additions & 9 deletions b/‎docs/source/reference/envs.rst
Lines changed: 50 additions & 9 deletions
diff --git a/‎examples/a2c/config.yaml
Lines changed: 1 addition & 0 deletions b/‎examples/a2c/config.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/ddpg/ddpg.py
Lines changed: 7 additions & 14 deletions b/‎examples/ddpg/ddpg.py
Lines changed: 7 additions & 14 deletions
diff --git a/‎examples/dqn/dqn.py
Lines changed: 9 additions & 18 deletions b/‎examples/dqn/dqn.py
Lines changed: 9 additions & 18 deletions
diff --git a/‎examples/dreamer/dreamer_utils.py
Lines changed: 12 additions & 8 deletions b/‎examples/dreamer/dreamer_utils.py
Lines changed: 12 additions & 8 deletions
diff --git a/‎examples/ppo/ppo.py
Lines changed: 7 additions & 14 deletions b/‎examples/ppo/ppo.py
Lines changed: 7 additions & 14 deletions
@@ -26,6 +26,93 @@ export MKL_THREADING_LAYER=GNU
 
 python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
 python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20
+
+# With batched environments
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
+  total_frames=48 \
+  init_random_frames=10 \
+  batch_size=10 \
+  frames_per_batch=16 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  buffer_size=120
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
+  total_frames=48 \
+  batch_size=10 \
+  frames_per_batch=16 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  logger=csv
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/dqn/dqn.py \
+  total_frames=48 \
+  init_random_frames=10 \
+  batch_size=10 \
+  frames_per_batch=16 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  buffer_size=120
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/redq/redq.py \
+  total_frames=48 \
+  init_random_frames=10 \
+  batch_size=10 \
+  frames_per_batch=16 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  buffer_size=120
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/sac/sac.py \
+  total_frames=48 \
+  init_random_frames=10 \
+  batch_size=10 \
+  frames_per_batch=16 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  buffer_size=120
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/ppo/ppo.py \
+  total_frames=48 \
+  batch_size=10 \
+  frames_per_batch=16 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  lr_scheduler=
+python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/dreamer.py \
+  total_frames=48 \
+  init_random_frames=10 \
+  batch_size=10 \
+  frames_per_batch=200 \
+  num_workers=4 \
+  env_per_collector=2 \
+  collector_devices=cuda:0 \
+  optim_steps_per_batch=1 \
+  record_video=True \
+  record_frames=4 \
+  buffer_size=120 \
+  rssm_hidden_dim=17
+
+# With single envs
 python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
   total_frames=48 \
   init_random_frames=10 \
@@ -109,5 +196,6 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
   record_frames=4 \
   buffer_size=120 \
   rssm_hidden_dim=17
+
 coverage combine
 coverage xml -i
@@ -71,6 +71,15 @@
     "myst_parser",
 ]
 
+intersphinx_mapping = {
+    "torch": ("https://pytorch.org/docs/stable/", None),
+    "tensordict": ("https://pytorch-labs.github.io/tensordict/", None),
+    "torchrl": ("https://pytorch.org/rl/", None),
+    "torchaudio": ("https://pytorch.org/audio/stable/", None),
+    "torchtext": ("https://pytorch.org/text/stable/", None),
+    "torchvision": ("https://pytorch.org/vision/stable/", None),
+}
+
 sphinx_gallery_conf = {
     "examples_dirs": "reference/generated/tutorials/",  # path to your example scripts
     "gallery_dirs": "tutorials",  # path to where to save gallery generated output
@@ -162,14 +171,6 @@
 ]
 
 
-# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {
-    "python": ("https://docs.python.org/3/", None),
-    "torch": ("https://pytorch.org/docs/stable/", None),
-    "numpy": ("https://numpy.org/doc/stable/", None),
-}
-
-
 aafig_default_options = {"scale": 1.5, "aspect": 1.0, "proportional": True}
 
 # -- Generate knowledge base references -----------------------------------
 
@@ -9,8 +9,8 @@ The goal is to be able to swap environments in an experiment with little or no e
 even if these environments are simulated using different libraries.
 TorchRL offers some out-of-the-box environment wrappers under :obj:`torchrl.envs.libs`,
 which we hope can be easily imitated for other libraries.
-The parent class :obj:`EnvBase` is a :obj:`torch.nn.Module` subclass that implements
-some typical environment methods using :obj:`TensorDict` as a data organiser. This allows this
+The parent class :class:`torchrl.envs.EnvBase` is a :class:`torch.nn.Module` subclass that implements
+some typical environment methods using :class:`tensordict.TensorDict` as a data organiser. This allows this
 class to be generic and to handle an arbitrary number of input and outputs, as well as
 nested or batched data structures.
 
@@ -25,10 +25,10 @@ Each env will have the following attributes:
   This is especially useful for transforms (see below). For parametric environments (e.g.
   model-based environments), the device does represent the hardware that will be used to
   compute the operations.
-- :obj:`env.observation_spec`: a :obj:`CompositeSpec` object containing all the observation key-spec pairs.
-- :obj:`env.input_spec`: a :obj:`CompositeSpec` object containing all the input keys (:obj:`"action"` and others).
-- :obj:`env.action_spec`: a :obj:`TensorSpec` object representing the action spec.
-- :obj:`env.reward_spec`: a :obj:`TensorSpec` object representing the reward spec.
+- :obj:`env.observation_spec`: a :class:`torchrl.data.CompositeSpec` object containing all the observation key-spec pairs.
+- :obj:`env.input_spec`: a :class:`torchrl.data.CompositeSpec` object containing all the input keys (:obj:`"action"` and others).
+- :obj:`env.action_spec`: a :class:`torchrl.data.TensorSpec` object representing the action spec.
+- :obj:`env.reward_spec`: a :class:`torchrl.data.TensorSpec` object representing the reward spec.
 
 Importantly, the environment spec shapes should *not* contain the batch size, e.g.
 an environment with :obj:`env.batch_size == torch.Size([4])` should not have
@@ -38,9 +38,9 @@ an :obj:`env.action_spec` with shape :obj:`torch.Size([4, action_size])` but sim
 With these, the following methods are implemented:
 
 - :obj:`env.reset(tensordict)`: a reset method that may (but not necessarily requires to) take
-  a :obj:`TensorDict` input. It return the first tensordict of a rollout, usually
+  a :class:`tensordict.TensorDict` input. It return the first tensordict of a rollout, usually
   containing a :obj:`"done"` state and a set of observations.
-- :obj:`env.step(tensordict)`: a step method that takes a :obj:`TensorDict` input
+- :obj:`env.step(tensordict)`: a step method that takes a :class:`tensordict.TensorDict` input
   containing an input action as well as other inputs (for model-based or stateless
   environments, for instance).
 - :obj:`env.set_seed(integer)`: a seeding method that will return the next seed
@@ -51,7 +51,7 @@ With these, the following methods are implemented:
 - :obj:`env.rollout(max_steps, policy)`: executes a rollout in the environment for
   a maximum number of steps :obj:`max_steps` and using a policy :obj:`policy`.
   The policy should be coded using a :obj:`SafeModule` (or any other
-  :obj:`TensorDict`-compatible module).
+  :class:`tensordict.TensorDict`-compatible module).
 
 
 .. autosummary::
@@ -204,6 +204,47 @@ in the environment. The keys to be included in this inverse transform are passed
 
         >>> env.append_transform(DoubleToFloat(in_keys_inv=["action"]))  # will map the action from float32 to float64 before calling the base_env.step
 
+Cloning transforms
+~~~~~~~~~~~~~~~~~~
+
+Because transforms appended to an environment are "registered" to this environment
+through the ``transform.parent`` property, when manipulating transforms we should keep
+in mind that the parent may come and go following what is being done with the transform.
+Here are some examples: if we get a single transform from a :class:`Compose` object,
+this transform will keep its parent:
+
+  >>> third_transform = env.transform[2]
+  >>> assert third_transform.parent is not None
+
+This means that using this transform for another environment is prohibited, as
+the other environment would replace the parent and this may lead to unexpected
+behviours. Fortunately, the :class:`Transform` class comes with a :func:`clone`
+method that will erase the parent while keeping the identity of all the
+registered buffers:
+
+  >>> TransformedEnv(base_env, third_transform)  # raises an Exception as third_transform already has a parent
+  >>> TransformedEnv(base_env, third_transform.clone())  # works
+
+On a single process or if the buffers are placed in shared memory, this will
+result in all the clone transforms to keep the same behaviour even if the
+buffers are changed in place (which is what will happen with the :class:`CatFrames`
+transform, for instance). In distributed settings, this may not hold and one
+should be careful about the expected behaviour of the cloned transforms in this
+context.
+Finally, notice that indexing multiple transforms from a :class:`Compose` transform
+may also result in loss of parenthood for these transforms: the reason is that
+indexing a :class:`Compose` transform results in another :class:`Compose` transform
+that does not have a parent environment. Hence, we have to clone the sub-transforms
+to be able to create this other composition:
+
+  >>> env = TransformedEnv(base_env, Compose(transform1, transform2, transform3))
+  >>> last_two = env.transform[-2:]
+  >>> assert isinstance(last_two, Compose)
+  >>> assert last_two.parent is None
+  >>> assert last_two[0] is not transform2
+  >>> assert isinstance(last_two[0], transform2)  # and the buffers will match
+  >>> assert last_two[1] is not transform3
+  >>> assert isinstance(last_two[1], transform3)  # and the buffers will match
 
 .. autosummary::
     :toctree: generated/
 
@@ -2,6 +2,7 @@
 env_library: gym  # env_library used for the simulated environment.
 env_name: HalfCheetah-v4  # name of the environment to be created. Default=Humanoid-v2
 frame_skip: 2  # frame_skip for the environment.
+batch_transform: True
 
 # Logger
 logger: wandb  # recorder type to be used. One of 'tensorboard', 'wandb' or 'csv'
 
@@ -159,23 +159,16 @@ def main(cfg: "DictConfig"):  # noqa: F821
         logger=logger,
         use_env_creator=False,
     )()
-
-    # remove video recorder from recorder to have matching state_dict keys
-    if cfg.record_video:
-        recorder_rm = TransformedEnv(recorder.base_env)
-        for transform in recorder.transform:
-            if not isinstance(transform, VideoRecorder):
-                recorder_rm.append_transform(transform.clone())
-    else:
-        recorder_rm = recorder
-
     if isinstance(create_env_fn, ParallelEnv):
-        recorder_rm.load_state_dict(create_env_fn.state_dict()["worker0"])
-        create_env_fn.close()
+        raise NotImplementedError("This behaviour is deprecated")
     elif isinstance(create_env_fn, EnvCreator):
-        recorder_rm.load_state_dict(create_env_fn().state_dict())
+        recorder.transform[1:].load_state_dict(create_env_fn().transform.state_dict())
+    elif isinstance(create_env_fn, TransformedEnv):
+        recorder.transform = create_env_fn.transform.clone()
     else:
-        recorder_rm.load_state_dict(create_env_fn.state_dict())
+        raise NotImplementedError(f"Unsupported env type {type(create_env_fn)}")
+    if logger is not None and video_tag:
+        recorder.insert_transform(0, VideoRecorder(logger=logger, tag=video_tag))
 
     # reset reward scaling
     for t in recorder.transform:
 
@@ -112,10 +112,6 @@ def main(cfg: "DictConfig"):  # noqa: F821
         make_env=create_env_fn,
         actor_model_explore=model_explore,
         cfg=cfg,
-        # make_env_kwargs=[
-        #     {"device": device} if device >= 0 else {}
-        #     for device in args.env_rendering_devices
-        # ],
     )
 
     replay_buffer = make_replay_buffer(device, cfg)
@@ -126,24 +122,19 @@ def main(cfg: "DictConfig"):  # noqa: F821
         norm_obs_only=True,
         obs_norm_state_dict=obs_norm_state_dict,
         logger=logger,
+        use_env_creator=False,
     )()
-
-    # remove video recorder from recorder to have matching state_dict keys
-    if cfg.record_video:
-        recorder_rm = TransformedEnv(recorder.base_env)
-        for transform in recorder.transform:
-            if not isinstance(transform, VideoRecorder):
-                recorder_rm.append_transform(transform.clone())
-    else:
-        recorder_rm = recorder
-
     if isinstance(create_env_fn, ParallelEnv):
-        recorder_rm.load_state_dict(create_env_fn.state_dict()["worker0"])
-        create_env_fn.close()
+        raise NotImplementedError("This behaviour is deprecated")
     elif isinstance(create_env_fn, EnvCreator):
-        recorder_rm.load_state_dict(create_env_fn().state_dict())
+        recorder.transform[1:].load_state_dict(create_env_fn().transform.state_dict())
+    elif isinstance(create_env_fn, TransformedEnv):
+        recorder.transform = create_env_fn.transform.clone()
     else:
-        recorder_rm.load_state_dict(create_env_fn.state_dict())
+        raise NotImplementedError(f"Unsupported env type {type(create_env_fn)}")
+    if logger is not None and video_tag:
+        recorder.insert_transform(0, VideoRecorder(logger=logger, tag=video_tag))
+
     # reset reward scaling
     for t in recorder.transform:
         if isinstance(t, RewardScaling):
 
@@ -91,17 +91,19 @@ def make_env_transforms(
             env.append_transform(GrayScale())
         env.append_transform(FlattenObservation(0, -3, allow_positive_dim=True))
         env.append_transform(CatFrames(N=cfg.catframes, in_keys=["pixels"], dim=-3))
-        if stats is None:
+        if stats is None and obs_norm_state_dict is None:
             obs_stats = {
-                "loc": torch.zeros(env.observation_spec["pixels"].shape),
-                "scale": torch.ones(env.observation_spec["pixels"].shape),
+                "loc": torch.zeros(()),
+                "scale": torch.ones(()),
             }
+        elif stats is None and obs_norm_state_dict is not None:
+            obs_stats = obs_norm_state_dict
         else:
             obs_stats = stats
         obs_stats["standard_normal"] = True
         obs_norm = ObservationNorm(**obs_stats, in_keys=["pixels"])
-        if obs_norm_state_dict:
-            obs_norm.load_state_dict(obs_norm_state_dict)
+        # if obs_norm_state_dict:
+        #     obs_norm.load_state_dict(obs_norm_state_dict)
         env.append_transform(obs_norm)
     if norm_rewards:
         reward_scaling = 1.0
@@ -125,8 +127,10 @@ def make_env_transforms(
     )
 
     default_dict = {
-        "state": UnboundedContinuousTensorSpec(cfg.state_dim),
-        "belief": UnboundedContinuousTensorSpec(cfg.rssm_hidden_dim),
+        "state": UnboundedContinuousTensorSpec(shape=(*env.batch_size, cfg.state_dim)),
+        "belief": UnboundedContinuousTensorSpec(
+            shape=(*env.batch_size, cfg.rssm_hidden_dim)
+        ),
     }
     env.append_transform(
         TensorDictPrimer(random=False, default_value=0, **default_dict)
@@ -417,6 +421,6 @@ class EnvConfig:
     # Disables grayscale transform.
     max_frames_per_traj: int = 1000
     # Number of steps before a reset of the environment is called (if it has not been flagged as done before).
-    batch_transform: bool = False
+    batch_transform: bool = True
     # if True, the transforms will be applied to the parallel env, and not to each individual env.\
     image_size: int = 84
@@ -132,23 +132,16 @@ def main(cfg: "DictConfig"):  # noqa: F821
         logger=logger,
         use_env_creator=False,
     )()
-
-    # remove video recorder from recorder to have matching state_dict keys
-    if cfg.record_video:
-        recorder_rm = TransformedEnv(recorder.base_env)
-        for transform in recorder.transform:
-            if not isinstance(transform, VideoRecorder):
-                recorder_rm.append_transform(transform.clone())
-    else:
-        recorder_rm = recorder
-
     if isinstance(create_env_fn, ParallelEnv):
-        recorder_rm.load_state_dict(create_env_fn.state_dict()["worker0"])
-        create_env_fn.close()
+        raise NotImplementedError("This behaviour is deprecated")
     elif isinstance(create_env_fn, EnvCreator):
-        recorder_rm.load_state_dict(create_env_fn().state_dict())
+        recorder.transform[1:].load_state_dict(create_env_fn().transform.state_dict())
+    elif isinstance(create_env_fn, TransformedEnv):
+        recorder.transform = create_env_fn.transform.clone()
     else:
-        recorder_rm.load_state_dict(create_env_fn.state_dict())
+        raise NotImplementedError(f"Unsupported env type {type(create_env_fn)}")
+    if logger is not None and video_tag:
+        recorder.insert_transform(0, VideoRecorder(logger=logger, tag=video_tag))
 
     # reset reward scaling
     for t in recorder.transform: