Skip to content

Commit c78df1b

Browse files
author
Vincent Moens
committed
Update (base update)
[ghstack-poisoned]
2 parents 9233c50 + d009835 commit c78df1b

File tree

10 files changed

+483
-93
lines changed

10 files changed

+483
-93
lines changed

.github/workflows/nightly_build.yml

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@ on:
2121
branches:
2222
- "nightly"
2323

24-
env:
25-
ACTIONS_RUNNER_FORCED_INTERNAL_NODE_VERSION: node16
26-
ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16
27-
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true # https://github.com/actions/checkout/issues/1809
28-
2924
concurrency:
3025
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
3126
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
@@ -41,12 +36,15 @@ jobs:
4136
matrix:
4237
python_version: [["3.9", "cp39-cp39"], ["3.10", "cp310-cp310"], ["3.11", "cp311-cp311"], ["3.12", "cp312-cp312"]]
4338
cuda_support: [["", "cpu", "cpu"]]
44-
container: pytorch/manylinux-${{ matrix.cuda_support[2] }}
4539
steps:
4640
- name: Checkout torchrl
47-
uses: actions/checkout@v3
41+
uses: actions/checkout@v4
4842
env:
4943
AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache"
44+
- name: Setup Python
45+
uses: actions/setup-python@v4
46+
with:
47+
python-version: ${{ matrix.python_version[0] }}
5048
- name: Install PyTorch nightly
5149
run: |
5250
export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH"
@@ -67,7 +65,7 @@ jobs:
6765
python3 -mpip install auditwheel
6866
auditwheel show dist/*
6967
- name: Upload wheel for the test-wheel job
70-
uses: actions/upload-artifact@v3
68+
uses: actions/upload-artifact@v4
7169
with:
7270
name: torchrl-linux-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl
7371
path: dist/*.whl
@@ -81,12 +79,15 @@ jobs:
8179
matrix:
8280
python_version: [["3.9", "cp39-cp39"], ["3.10", "cp310-cp310"], ["3.11", "cp311-cp311"], ["3.12", "cp312-cp312"]]
8381
cuda_support: [["", "cpu", "cpu"]]
84-
container: pytorch/manylinux-${{ matrix.cuda_support[2] }}
8582
steps:
8683
- name: Checkout torchrl
87-
uses: actions/checkout@v3
84+
uses: actions/checkout@v4
85+
- name: Setup Python
86+
uses: actions/setup-python@v4
87+
with:
88+
python-version: ${{ matrix.python_version[0] }}
8889
- name: Download built wheels
89-
uses: actions/download-artifact@v3
90+
uses: actions/download-artifact@v4
9091
with:
9192
name: torchrl-linux-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl
9293
path: /tmp/wheels
@@ -121,7 +122,7 @@ jobs:
121122
env:
122123
AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache"
123124
- name: Checkout torchrl
124-
uses: actions/checkout@v3
125+
uses: actions/checkout@v4
125126
- name: Install PyTorch Nightly
126127
run: |
127128
export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH"
@@ -138,7 +139,7 @@ jobs:
138139
export PATH="/opt/python/${{ matrix.python_version[1] }}/bin:$PATH"
139140
python3 -mpip install numpy pytest pillow>=4.1.1 scipy networkx expecttest pyyaml
140141
- name: Download built wheels
141-
uses: actions/download-artifact@v3
142+
uses: actions/download-artifact@v4
142143
with:
143144
name: torchrl-linux-${{ matrix.python_version[0] }}_${{ matrix.cuda_support[2] }}.whl
144145
path: /tmp/wheels
@@ -179,7 +180,7 @@ jobs:
179180
with:
180181
python-version: ${{ matrix.python_version[1] }}
181182
- name: Checkout torchrl
182-
uses: actions/checkout@v3
183+
uses: actions/checkout@v4
183184
- name: Install PyTorch nightly
184185
shell: bash
185186
run: |
@@ -193,7 +194,7 @@ jobs:
193194
--package_name torchrl-nightly \
194195
--python-tag=${{ matrix.python-tag }}
195196
- name: Upload wheel for the test-wheel job
196-
uses: actions/upload-artifact@v3
197+
uses: actions/upload-artifact@v4
197198
with:
198199
name: torchrl-win-${{ matrix.python_version[0] }}.whl
199200
path: dist/*.whl
@@ -212,7 +213,7 @@ jobs:
212213
with:
213214
python-version: ${{ matrix.python_version[1] }}
214215
- name: Checkout torchrl
215-
uses: actions/checkout@v3
216+
uses: actions/checkout@v4
216217
- name: Install PyTorch Nightly
217218
shell: bash
218219
run: |
@@ -229,7 +230,7 @@ jobs:
229230
run: |
230231
python3 -mpip install git+https://github.com/pytorch/tensordict.git
231232
- name: Download built wheels
232-
uses: actions/download-artifact@v3
233+
uses: actions/download-artifact@v4
233234
with:
234235
name: torchrl-win-${{ matrix.python_version[0] }}.whl
235236
path: wheels
@@ -265,9 +266,9 @@ jobs:
265266
python_version: [["3.9", "3.9"], ["3.10", "3.10.3"], ["3.11", "3.11"], ["3.12", "3.12"]]
266267
steps:
267268
- name: Checkout torchrl
268-
uses: actions/checkout@v3
269+
uses: actions/checkout@v4
269270
- name: Download built wheels
270-
uses: actions/download-artifact@v3
271+
uses: actions/download-artifact@v4
271272
with:
272273
name: torchrl-win-${{ matrix.python_version[0] }}.whl
273274
path: wheels

examples/agents/recurrent_actor.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
7+
"""
8+
This code exemplifies how an actor that uses a RNN backbone can be built.
9+
10+
It is based on snippets from the DQN with RNN tutorial.
11+
12+
There are two main APIs to be aware of when using RNNs, and dedicated notes regarding these can be found at the end
13+
of this example: the `set_recurrent_mode` context manager, and the `make_tensordict_primer` method.
14+
15+
"""
16+
from collections import OrderedDict
17+
18+
import torch
19+
from tensordict.nn import TensorDictModule as Mod, TensorDictSequential as Seq
20+
from torch import nn
21+
22+
from torchrl.envs import (
23+
Compose,
24+
GrayScale,
25+
GymEnv,
26+
InitTracker,
27+
ObservationNorm,
28+
Resize,
29+
RewardScaling,
30+
StepCounter,
31+
ToTensorImage,
32+
TransformedEnv,
33+
)
34+
from torchrl.modules import ConvNet, LSTMModule, MLP, QValueModule, set_recurrent_mode
35+
36+
# Define the device to use for computations (GPU if available, otherwise CPU)
37+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
38+
39+
# Create a transformed environment using the CartPole-v1 gym environment
40+
env = TransformedEnv(
41+
GymEnv("CartPole-v1", from_pixels=True, device=device),
42+
# Apply a series of transformations to the environment:
43+
# 1. Convert observations to tensor images
44+
# 2. Convert images to grayscale
45+
# 3. Resize images to 84x84 pixels
46+
# 4. Keep track of the step count
47+
# 5. Initialize a tracker for the environment
48+
# 6. Scale rewards by a factor of 0.1
49+
# 7. Normalize observations to have zero mean and unit variance (we'll adapt that dynamically later)
50+
Compose(
51+
ToTensorImage(),
52+
GrayScale(),
53+
Resize(84, 84),
54+
StepCounter(),
55+
InitTracker(),
56+
RewardScaling(loc=0.0, scale=0.1),
57+
ObservationNorm(standard_normal=True, in_keys=["pixels"]),
58+
),
59+
)
60+
61+
# Initialize the normalization statistics for the observation norm transform
62+
env.transform[-1].init_stats(1000, reduce_dim=[0, 1, 2], cat_dim=0, keep_dims=[0])
63+
64+
# Reset the environment to get an initial observation
65+
td = env.reset()
66+
67+
# Define a feature extractor module that takes pixel observations as input
68+
# and outputs an embedding vector
69+
feature = Mod(
70+
ConvNet(
71+
num_cells=[32, 32, 64],
72+
squeeze_output=True,
73+
aggregator_class=nn.AdaptiveAvgPool2d,
74+
aggregator_kwargs={"output_size": (1, 1)},
75+
device=device,
76+
),
77+
in_keys=["pixels"],
78+
out_keys=["embed"],
79+
)
80+
81+
# Get the shape of the embedding vector output by the feature extractor
82+
with torch.no_grad():
83+
n_cells = feature(env.reset())["embed"].shape[-1]
84+
85+
# Define an LSTM module that takes the embedding vector as input and outputs
86+
# a new embedding vector
87+
lstm = LSTMModule(
88+
input_size=n_cells,
89+
hidden_size=128,
90+
device=device,
91+
in_key="embed",
92+
out_key="embed",
93+
)
94+
95+
# Define a multi-layer perceptron (MLP) module that takes the LSTM output as
96+
# input and outputs action values
97+
mlp = MLP(
98+
out_features=2,
99+
num_cells=[
100+
64,
101+
],
102+
device=device,
103+
)
104+
105+
# Initialize the bias of the last layer of the MLP to zero
106+
mlp[-1].bias.data.fill_(0.0)
107+
108+
# Wrap the MLP in a TensorDictModule to handle input/output keys
109+
mlp = Mod(mlp, in_keys=["embed"], out_keys=["action_value"])
110+
111+
# Define a Q-value module that computes the Q-value of the current state
112+
qval = QValueModule(action_space=None, spec=env.action_spec)
113+
114+
# Add a TensorDictPrimer to the environment to ensure that the policy is aware
115+
# of the supplementary inputs and outputs (recurrent states) during rollout execution
116+
# This is necessary when using batched environments or parallel data collection
117+
env.append_transform(lstm.make_tensordict_primer())
118+
119+
# Create a sequential module that combines the feature extractor, LSTM, MLP, and Q-value modules
120+
policy = Seq(OrderedDict(feature=feature, lstm=lstm, mlp=mlp, qval=qval))
121+
122+
# Roll out the policy in the environment for 100 steps
123+
rollout = env.rollout(100, policy)
124+
print(rollout)
125+
126+
# Print result:
127+
#
128+
# TensorDict(
129+
# fields={
130+
# action: Tensor(shape=torch.Size([10, 2]), device=cpu, dtype=torch.int64, is_shared=False),
131+
# action_value: Tensor(shape=torch.Size([10, 2]), device=cpu, dtype=torch.float32, is_shared=False),
132+
# chosen_action_value: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.float32, is_shared=False),
133+
# done: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False),
134+
# embed: Tensor(shape=torch.Size([10, 128]), device=cpu, dtype=torch.float32, is_shared=False),
135+
# is_init: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False),
136+
# next: TensorDict(
137+
# fields={
138+
# done: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False),
139+
# is_init: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False),
140+
# pixels: Tensor(shape=torch.Size([10, 1, 84, 84]), device=cpu, dtype=torch.float32, is_shared=False),
141+
# recurrent_state_c: Tensor(shape=torch.Size([10, 1, 128]), device=cpu, dtype=torch.float32, is_shared=False),
142+
# recurrent_state_h: Tensor(shape=torch.Size([10, 1, 128]), device=cpu, dtype=torch.float32, is_shared=False),
143+
# reward: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.float32, is_shared=False),
144+
# step_count: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.int64, is_shared=False),
145+
# terminated: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False),
146+
# truncated: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
147+
# batch_size=torch.Size([10]),
148+
# device=cpu,
149+
# is_shared=False),
150+
# pixels: Tensor(shape=torch.Size([10, 1, 84, 84]), device=cpu, dtype=torch.float32, is_shared=False),
151+
# recurrent_state_c: Tensor(shape=torch.Size([10, 1, 128]), device=cpu, dtype=torch.float32, is_shared=False),
152+
# recurrent_state_h: Tensor(shape=torch.Size([10, 1, 128]), device=cpu, dtype=torch.float32, is_shared=False),
153+
# step_count: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.int64, is_shared=False),
154+
# terminated: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False),
155+
# truncated: Tensor(shape=torch.Size([10, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
156+
# batch_size=torch.Size([10]),
157+
# device=cpu,
158+
# is_shared=False)
159+
#
160+
161+
# Notes:
162+
# 1. make_tensordict_primer
163+
#
164+
# Regarding make_tensordict_primer, it creates a TensorDictPrimer object that ensures the policy is aware
165+
# of the supplementary inputs and outputs (recurrent states) during rollout execution.
166+
# This is necessary when using batched environments or parallel data collection, as the recurrent states
167+
# need to be shared across processes and dealt with properly.
168+
#
169+
# In other words, make_tensordict_primer adds the LSTM's hidden states to the environment's specs,
170+
# allowing the environment to properly handle the recurrent states during rollouts. Without it, the policy
171+
# would not be able to use the LSTM's memory buffers correctly, leading to poorly defined behaviors,
172+
# especially in parallel settings.
173+
#
174+
# By adding the TensorDictPrimer to the environment, you ensure that the policy can correctly use the
175+
# LSTM's recurrent states, even when running in parallel or batched environments. This is why
176+
# env.append_transform(lstm.make_tensordict_primer()) is called before creating the policy and rolling it
177+
# out in the environment.
178+
#
179+
# 2. Using the LSTM to process multiple steps at once.
180+
#
181+
# When set_recurrent_mode("recurrent") is used, the LSTM will process the entire input tensordict as a sequence, using
182+
# its recurrent connections to maintain state across time steps. This mode may utilize CuDNN to accelerate the processing
183+
# of the sequence on CUDA devices. The behavior in this mode is akin to torch.nn.LSTM, where the LSTM expects the input
184+
# data to be organized in batches of sequences.
185+
#
186+
# On the other hand, when set_recurrent_mode("sequential") is used, the
187+
# LSTM will process each step in the input tensordict independently, without maintaining any state across time steps. This
188+
# mode makes the LSTM behave similarly to torch.nn.LSTMCell, where each input is treated as a separate, independent
189+
# element.
190+
#
191+
# In the example code, set_recurrent_mode("recurrent") is used to process a tensordict of shape [T], where T
192+
# is the number of steps. This allows the LSTM to use its recurrent connections to maintain state across the entire
193+
# sequence.
194+
#
195+
# In contrast, set_recurrent_mode("sequential") is used to process a single step from the tensordict (i.e.,
196+
# rollout[0]). In this case, the LSTM does not use its recurrent connections, and simply processes the single step as if
197+
# it were an independent input.
198+
199+
with set_recurrent_mode("recurrent"):
200+
# Process a tensordict of shape [T] where T is a number of steps
201+
print(policy(rollout))
202+
203+
with set_recurrent_mode("sequential"):
204+
# Process a tensordict of shape [T] where T is a number of steps
205+
print(policy(rollout[0]))

sota-implementations/decision_transformer/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def make_transformed_env(base_env, env_cfg, obs_loc, obs_std, train=False):
109109
)
110110

111111
# copy action from the input tensordict to the output
112-
transformed_env.append_transform(TensorDictPrimer(action=base_env.action_spec))
112+
transformed_env.append_transform(TensorDictPrimer(base_env.full_action_spec))
113113

114114
transformed_env.append_transform(DoubleToFloat())
115115
obsnorm = ObservationNorm(

0 commit comments

Comments
 (0)