pytorch
diff --git a/‎.circleci/unittest/linux/scripts/install.sh
Lines changed: 3 additions & 0 deletions b/‎.circleci/unittest/linux/scripts/install.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎.circleci/unittest/linux_libs/scripts_habitat/install.sh
Lines changed: 3 additions & 0 deletions b/‎.circleci/unittest/linux_libs/scripts_habitat/install.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎.circleci/unittest/linux_olddeps/scripts_gym_0_13/install.sh
Lines changed: 3 additions & 0 deletions b/‎.circleci/unittest/linux_olddeps/scripts_gym_0_13/install.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎.circleci/unittest/linux_optdeps/scripts/install.sh
Lines changed: 3 additions & 0 deletions b/‎.circleci/unittest/linux_optdeps/scripts/install.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎.circleci/unittest/linux_stable/scripts/install.sh
Lines changed: 3 additions & 0 deletions b/‎.circleci/unittest/linux_stable/scripts/install.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/docs.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/docs.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/nightly_build.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/nightly_build.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/wheels.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/wheels.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 116 additions & 103 deletions b/‎README.md
Lines changed: 116 additions & 103 deletions
diff --git a/‎benchmarks/storage/benchmark_sample_latency_over_rpc.py
Lines changed: 1 addition & 1 deletion b/‎benchmarks/storage/benchmark_sample_latency_over_rpc.py
Lines changed: 1 addition & 1 deletion
@@ -39,5 +39,8 @@ python -c "import functorch"
 # install snapshot
 pip install git+https://github.com/pytorch/torchsnapshot
 
+# install tensordict
+pip install git+https://github.com/pytorch-labs/tensordict
+
 printf "* Installing torchrl\n"
 python setup.py develop
@@ -37,6 +37,9 @@ else
     pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu116 --force-reinstall
 fi
 
+# install tensordict
+pip install git+https://github.com/pytorch-labs/tensordict
+
 # smoke test
 python -c "import functorch"
 
 
@@ -41,5 +41,8 @@ else
     conda install pytorch==1.10.0 torchvision==0.11.0 torchaudio==0.10.0 cudatoolkit=11.3 -c pytorch -c conda-forge -y
 fi
 
+# install tensordict
+pip install git+https://github.com/pytorch-labs/tensordict
+
 printf "* Installing torchrl\n"
 python setup.py develop
@@ -35,6 +35,9 @@ else
     pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu113
 fi
 
+# install tensordict
+pip install git+https://github.com/pytorch-labs/tensordict
+
 # smoke test
 python -c "import functorch"
 
 
@@ -33,6 +33,9 @@ else
     pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
 fi
 
+# install tensordict
+pip install git+https://github.com/pytorch-labs/tensordict
+
 # smoke test
 python -c "import torch;import functorch"
 
 
@@ -54,6 +54,9 @@ jobs:
       shell: bash
       run: |
         conda run -n build_binary python -m pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+    - name: Install tensordict
+      run: |
+        python3 -mpip install git+https://github.com/pytorch-labs/tensordict.git
     - name: Install TorchRL
       run: |
         conda run -n build_binary python -m pip install -e .
 
@@ -217,6 +217,9 @@ jobs:
         run: |
           export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH"
           python3 -mpip install --upgrade pip
+      - name: Install tensordict
+        run: |
+          python3 -mpip install git+https://github.com/pytorch-labs/tensordict.git
       - name: Install test dependencies
         run: |
           export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH"
 
@@ -99,6 +99,9 @@ jobs:
       - name: Upgrade pip
         run: |
           python3 -mpip install --upgrade pip
+      - name: Install tensordict
+        run: |
+          python3 -mpip install git+https://github.com/pytorch-labs/tensordict.git
       - name: Install test dependencies
         run: |
           python3 -mpip install numpy pytest pytest-cov codecov unittest-xml-reporting pillow>=4.1.1 scipy av networkx expecttest pyyaml
 
@@ -31,116 +31,129 @@ On the low-level end, torchrl comes with a set of highly re-usable functionals f
 
 TorchRL aims at (1) a high modularity and (2) good runtime performance.
 
-## Features
+## TensorDict as a common data carrier for RL
 
-On the high-level end, TorchRL provides:
-- [`TensorDict`](torchrl/data/tensordict/tensordict.py),
+TorchRL relies on [`TensorDict`](https://github.com/pytorch-labs/tensordict/),
 a convenient data structure<sup>(1)</sup> to pass data from
 one object to another without friction.
 `TensorDict` makes it easy to re-use pieces of code across environments, models and
 algorithms. For instance, here's how to code a rollout in TorchRL:
-    <details>
-      <summary>Code</summary>
-
-    ```diff
-    - obs, done = env.reset()
-    + tensordict = env.reset()
-    policy = TensorDictModule(
-        model,
-        in_keys=["observation_pixels", "observation_vector"],
-        out_keys=["action"],
-    )
-    out = []
-    for i in range(n_steps):
-    -     action, log_prob = policy(obs)
-    -     next_obs, reward, done, info = env.step(action)
-    -     out.append((obs, next_obs, action, log_prob, reward, done))
-    -     obs = next_obs
-    +     tensordict = policy(tensordict)
-    +     tensordict = env.step(tensordict)
-    +     out.append(tensordict)
-    +     tensordict = step_mdp(tensordict)  # renames next_observation_* keys to observation_*
-    - obs, next_obs, action, log_prob, reward, done = [torch.stack(vals, 0) for vals in zip(*out)]
-    + out = torch.stack(out, 0)  # TensorDict supports multiple tensor operations
-    ```
-    TensorDict abstracts away the input / output signatures of the modules, env, collectors, replay buffers and losses of the library, allowing its primitives
-    to be easily recycled across settings.
-    Here's another example of an off-policy training loop in TorchRL (assuming that a data collector, a replay buffer, a loss and an optimizer have been instantiated):
-
-    ```diff
-    - for i, (obs, next_obs, action, hidden_state, reward, done) in enumerate(collector):
-    + for i, tensordict in enumerate(collector):
-    -     replay_buffer.add((obs, next_obs, action, log_prob, reward, done))
-    +     replay_buffer.add(tensordict)
-        for j in range(num_optim_steps):
-    -         obs, next_obs, action, hidden_state, reward, done = replay_buffer.sample(batch_size)
-    -         loss = loss_fn(obs, next_obs, action, hidden_state, reward, done)
-    +         tensordict = replay_buffer.sample(batch_size)
-    +         loss = loss_fn(tensordict)
-            loss.backward()
-            optim.step()
-            optim.zero_grad()
-    ```
-    Again, this training loop can be re-used across algorithms as it makes a minimal number of assumptions about the structure of the data.
+  <details>
+    <summary>Code</summary>
+
+  ```diff
+  - obs, done = env.reset()
+  + tensordict = env.reset()
+  policy = TensorDictModule(
+      model,
+      in_keys=["observation_pixels", "observation_vector"],
+      out_keys=["action"],
+  )
+  out = []
+  for i in range(n_steps):
+  -     action, log_prob = policy(obs)
+  -     next_obs, reward, done, info = env.step(action)
+  -     out.append((obs, next_obs, action, log_prob, reward, done))
+  -     obs = next_obs
+  +     tensordict = policy(tensordict)
+  +     tensordict = env.step(tensordict)
+  +     out.append(tensordict)
+  +     tensordict = step_mdp(tensordict)  # renames next_observation_* keys to observation_*
+  - obs, next_obs, action, log_prob, reward, done = [torch.stack(vals, 0) for vals in zip(*out)]
+  + out = torch.stack(out, 0)  # TensorDict supports multiple tensor operations
+  ```
+  </details>
+TensorDict abstracts away the input / output signatures of the modules, env, collectors, replay buffers and losses of the library, allowing its primitives
+to be easily recycled across settings.
+Here's another example of an off-policy training loop in TorchRL (assuming that a data collector, a replay buffer, a loss and an optimizer have been instantiated):
+
+  <details>
+    <summary>Code</summary>
+
+  ```diff
+  - for i, (obs, next_obs, action, hidden_state, reward, done) in enumerate(collector):
+  + for i, tensordict in enumerate(collector):
+  -     replay_buffer.add((obs, next_obs, action, log_prob, reward, done))
+  +     replay_buffer.add(tensordict)
+      for j in range(num_optim_steps):
+  -         obs, next_obs, action, hidden_state, reward, done = replay_buffer.sample(batch_size)
+  -         loss = loss_fn(obs, next_obs, action, hidden_state, reward, done)
+  +         tensordict = replay_buffer.sample(batch_size)
+  +         loss = loss_fn(tensordict)
+          loss.backward()
+          optim.step()
+          optim.zero_grad()
+  ```
+  Again, this training loop can be re-used across algorithms as it makes a minimal number of assumptions about the structure of the data.
+  </details>
+
+  TensorDict supports multiple tensor operations on its device and shape
+  (the shape of TensorDict, or its batch size, is the common arbitrary N first dimensions of all its contained tensors):
+
+  <details>
+    <summary>Code</summary>
+
+  ```python
+  # stack and cat
+  tensordict = torch.stack(list_of_tensordicts, 0)
+  tensordict = torch.cat(list_of_tensordicts, 0)
+  # reshape
+  tensordict = tensordict.view(-1)
+  tensordict = tensordict.permute(0, 2, 1)
+  tensordict = tensordict.unsqueeze(-1)
+  tensordict = tensordict.squeeze(-1)
+  # indexing
+  tensordict = tensordict[:2]
+  tensordict[:, 2] = sub_tensordict
+  # device and memory location
+  tensordict.cuda()
+  tensordict.to("cuda:1")
+  tensordict.share_memory_()
+  ```
+  </details>
+
+Check our TorchRL-specific [TensorDict tutorial](tutorials/tensordict.ipynb) for more information.
+
+The associated [`TensorDictModule` class](torchrl/modules/tensordict_module/common.py) which is [functorch](https://github.com/pytorch/functorch)-compatible!
+    
+  <details>
+    <summary>Code</summary>
+
+  ```diff
+  transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
+  + td_module = TensorDictModule(transformer_model, in_keys=["src", "tgt"], out_keys=["out"])
+  src = torch.rand((10, 32, 512))
+  tgt = torch.rand((20, 32, 512))
+  + tensordict = TensorDict({"src": src, "tgt": tgt}, batch_size=[20, 32])
+  - out = transformer_model(src, tgt)
+  + td_module(tensordict)
+  + out = tensordict["out"]
+  ```
+
+  The `TensorDictSequential` class allows to branch sequences of `nn.Module` instances in a highly modular way.
+  For instance, here is an implementation of a transformer using the encoder and decoder blocks:
+  ```python
+  encoder_module = TransformerEncoder(...)
+  encoder = TensorDictModule(encoder_module, in_keys=["src", "src_mask"], out_keys=["memory"])
+  decoder_module = TransformerDecoder(...)
+  decoder = TensorDictModule(decoder_module, in_keys=["tgt", "memory"], out_keys=["output"])
+  transformer = TensorDictSequential(encoder, decoder)
+  assert transformer.in_keys == ["src", "src_mask", "tgt"]
+  assert transformer.out_keys == ["memory", "output"]
+  ```
+
+  `TensorDictSequential` allows to isolate subgraphs by querying a set of desired input / output keys:
+  ```python
+  transformer.select_subsequence(out_keys=["memory"])  # returns the encoder
+  transformer.select_subsequence(in_keys=["tgt", "memory"])  # returns the decoder
+  ```
+  </details>
+
+  The corresponding [tutorial](tutorials/tensordictmodule.ipynb) provides more context about its features.
 
-    TensorDict supports multiple tensor operations on its device and shape
-    (the shape of TensorDict, or its batch size, is the common arbitrary N first dimensions of all its contained tensors):
-    ```python
-    # stack and cat
-    tensordict = torch.stack(list_of_tensordicts, 0)
-    tensordict = torch.cat(list_of_tensordicts, 0)
-    # reshape
-    tensordict = tensordict.view(-1)
-    tensordict = tensordict.permute(0, 2, 1)
-    tensordict = tensordict.unsqueeze(-1)
-    tensordict = tensordict.squeeze(-1)
-    # indexing
-    tensordict = tensordict[:2]
-    tensordict[:, 2] = sub_tensordict
-    # device and memory location
-    tensordict.cuda()
-    tensordict.to("cuda:1")
-    tensordict.share_memory_()
-    ```
-    </details>
 
-    Check our [TensorDict tutorial](tutorials/tensordict.ipynb) for more information.
 
-- An associated [`TensorDictModule` class](torchrl/modules/tensordict_module/common.py) which is [functorch](https://github.com/pytorch/functorch)-compatible!
-    <details>
-      <summary>Code</summary>
-
-    ```diff
-    transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
-    + td_module = TensorDictModule(transformer_model, in_keys=["src", "tgt"], out_keys=["out"])
-    src = torch.rand((10, 32, 512))
-    tgt = torch.rand((20, 32, 512))
-    + tensordict = TensorDict({"src": src, "tgt": tgt}, batch_size=[20, 32])
-    - out = transformer_model(src, tgt)
-    + td_module(tensordict)
-    + out = tensordict["out"]
-    ```
-
-    The `TensorDictSequential` class allows to branch sequences of `nn.Module` instances in a highly modular way.
-    For instance, here is an implementation of a transformer using the encoder and decoder blocks:
-    ```python
-    encoder_module = TransformerEncoder(...)
-    encoder = TensorDictModule(encoder_module, in_keys=["src", "src_mask"], out_keys=["memory"])
-    decoder_module = TransformerDecoder(...)
-    decoder = TensorDictModule(decoder_module, in_keys=["tgt", "memory"], out_keys=["output"])
-    transformer = TensorDictSequential(encoder, decoder)
-    assert transformer.in_keys == ["src", "src_mask", "tgt"]
-    assert transformer.out_keys == ["memory", "output"]
-    ```
-
-    `TensorDictSequential` allows to isolate subgraphs by querying a set of desired input / output keys:
-    ```python
-    transformer.select_subsequence(out_keys=["memory"])  # returns the encoder
-    transformer.select_subsequence(in_keys=["tgt", "memory"])  # returns the decoder
-    ```
-    </details>
-
-    The corresponding [tutorial](tutorials/tensordictmodule.ipynb) provides more context about its features.
+## Features
 
 - a generic [trainer class](torchrl/trainers/trainers.py)<sup>(1)</sup> that
     executes the aforementioned training loop. Through a hooking mechanism,
@@ -242,7 +255,7 @@ algorithms. For instance, here's how to code a rollout in TorchRL:
     ```
     </details>
 
-- various tools for distributed learning (e.g. [memory mapped tensors](torchrl/data/tensordict/memmap.py))<sup>(2)</sup>;
+- various tools for distributed learning (e.g. [memory mapped tensors](https://github.com/pytorch-labs/tensordict/blob/main/tensordict/memmap.py))<sup>(2)</sup>;
 - various [architectures](torchrl/modules/models/) and models (e.g. [actor-critic](torchrl/modules/tensordict_module/actors.py))<sup>(1)</sup>:
     <details>
       <summary>Code</summary>
 
@@ -18,6 +18,7 @@
 
 import torch
 import torch.distributed.rpc as rpc
+from tensordict import TensorDict
 from torchrl.data.replay_buffers.rb_prototype import RemoteTensorDictReplayBuffer
 from torchrl.data.replay_buffers.samplers import RandomSampler
 from torchrl.data.replay_buffers.storages import (
@@ -26,7 +27,6 @@
     ListStorage,
 )
 from torchrl.data.replay_buffers.writers import RoundRobinWriter
-from torchrl.data.tensordict import TensorDict
 
 RETRY_LIMIT = 2
 RETRY_DELAY_SECS = 3