Skip to content

Commit ae63a5d

Browse files
committed
init
1 parent ed051bc commit ae63a5d

File tree

3 files changed

+53
-14
lines changed

3 files changed

+53
-14
lines changed

.github/unittest/windows_optdepts/scripts/unittest.sh

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ env_dir="${root_dir}/env"
1414

1515
cd "${root_dir}"
1616

17+
echo "=== Starting Windows CI setup ==="
18+
echo "Current directory: $(pwd)"
19+
echo "Python version: $PYTHON_VERSION"
20+
echo "CU_VERSION: $CU_VERSION"
21+
echo "TORCH_VERSION: $TORCH_VERSION"
22+
1723
eval "$($(which conda) shell.bash hook)" && set -x
1824

1925
# Create test environment at ./env
@@ -28,11 +34,12 @@ echo $(which python)
2834
echo $(python --version)
2935
echo $(conda info -e)
3036

31-
37+
echo "=== Installing test dependencies ==="
3238
python -m pip install hypothesis future cloudpickle pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures expecttest pyyaml scipy coverage
3339

3440
# =================================== Install =================================================
3541

42+
echo "=== Installing PyTorch and dependencies ==="
3643

3744
# TODO, refactor the below logic to make it easy to understand how to get correct cuda_version.
3845
if [ "${CU_VERSION:-}" == cpu ] ; then
@@ -56,8 +63,8 @@ else
5663
cudatoolkit="${cuda_toolkit_pckg}=${version}"
5764
fi
5865

59-
6066
# submodules
67+
echo "=== Updating git submodules ==="
6168
git submodule sync && git submodule update --init --recursive
6269
python -m pip install "numpy<2.0"
6370

@@ -92,6 +99,7 @@ fi
9299
#python -m pip install pip --upgrade
93100

94101
# install tensordict
102+
echo "=== Installing tensordict ==="
95103
if [[ "$RELEASE" == 0 ]]; then
96104
conda install anaconda::cmake -y
97105

@@ -103,11 +111,13 @@ else
103111
fi
104112

105113
# smoke test
114+
echo "=== Testing tensordict import ==="
106115
python -c """
107116
from tensordict import TensorDict
108117
print('successfully imported tensordict')
109118
"""
110119

120+
echo "=== Setting up CUDA environment ==="
111121
source "$this_dir/set_cuda_envs.sh"
112122

113123
printf "* Installing torchrl\n"
@@ -117,13 +127,15 @@ whatsinside=$(ls -rtlh ./torchrl)
117127
echo $whatsinside
118128

119129
# smoke test
130+
echo "=== Testing torchrl import ==="
120131
python -c """
121132
from torchrl.data import ReplayBuffer
122133
print('successfully imported torchrl')
123134
"""
124135

125136
# =================================== Run =================================================
126137

138+
echo "=== Setting up test environment ==="
127139
source "$this_dir/set_cuda_envs.sh"
128140

129141
# we don't use torchsnapshot
@@ -132,5 +144,24 @@ export MAX_IDLE_COUNT=60
132144
export BATCHED_PIPE_TIMEOUT=60
133145
export LAZY_LEGACY_OP=False
134146

147+
echo "=== Collecting environment info ==="
135148
python -m torch.utils.collect_env
136-
pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py
149+
150+
echo "=== Starting pytest execution ==="
151+
echo "Current working directory: $(pwd)"
152+
echo "Python executable: $(which python)"
153+
echo "Pytest executable: $(which pytest)"
154+
155+
# Create test-results directory if it doesn't exist
156+
mkdir -p test-results
157+
158+
# Run pytest with explicit error handling
159+
set +e # Don't exit on error for pytest
160+
pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py --ignore test/llm
161+
PYTEST_EXIT_CODE=$?
162+
set -e # Re-enable exit on error
163+
164+
echo "=== Pytest completed with exit code: $PYTEST_EXIT_CODE ==="
165+
166+
# Exit with pytest's exit code
167+
exit $PYTEST_EXIT_CODE

torchrl/envs/llm/transforms/reason.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ def _step(
153153
Returns:
154154
The modified next_tensordict
155155
"""
156-
print("Reward", next_tensordict["reward"])
157156
# Handle batch dimensions
158157
if next_tensordict.batch_dims >= 1:
159158
ntds = []

torchrl/envs/transforms/transforms.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11155,6 +11155,9 @@ def __init__(self, out_keys: Sequence[NestedKey] = None, time_key: str = "time")
1115511155
self.last_inv_time = None
1115611156
self.last_call_time = None
1115711157
self.last_reset_time = None
11158+
self.time_step_key = self.out_keys[0]
11159+
self.time_policy_key = self.out_keys[1]
11160+
self.time_reset_key = self.out_keys[2]
1115811161

1115911162
def _reset_env_preprocess(self, tensordict: TensorDictBase) -> TensorDictBase:
1116011163
self.last_reset_time = self.last_inv_time = time.time()
@@ -11182,13 +11185,17 @@ def _reset(
1118211185
time_elapsed = torch.tensor(
1118311186
current_time - self.last_reset_time, device=tensordict.device
1118411187
)
11185-
self._maybe_expand_and_set(self.out_keys[2], time_elapsed, tensordict_reset)
1118611188
self._maybe_expand_and_set(
11187-
self.out_keys[0], time_elapsed * 0, tensordict_reset
11189+
self.time_reset_key, time_elapsed, tensordict_reset
11190+
)
11191+
self._maybe_expand_and_set(
11192+
self.time_step_key, time_elapsed * 0, tensordict_reset
1118811193
)
1118911194
self.last_call_time = current_time
1119011195
# Placeholder
11191-
self._maybe_expand_and_set(self.out_keys[1], time_elapsed * 0, tensordict_reset)
11196+
self._maybe_expand_and_set(
11197+
self.time_policy_key, time_elapsed * 0, tensordict_reset
11198+
)
1119211199
return tensordict_reset
1119311200

1119411201
def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase:
@@ -11197,7 +11204,7 @@ def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase:
1119711204
time_elapsed = torch.tensor(
1119811205
current_time - self.last_call_time, device=tensordict.device
1119911206
)
11200-
self._maybe_expand_and_set(self.out_keys[1], time_elapsed, tensordict)
11207+
self._maybe_expand_and_set(self.time_policy_key, time_elapsed, tensordict)
1120111208
self.last_inv_time = current_time
1120211209
return tensordict
1120311210

@@ -11209,23 +11216,25 @@ def _step(
1120911216
time_elapsed = torch.tensor(
1121011217
current_time - self.last_inv_time, device=tensordict.device
1121111218
)
11212-
self._maybe_expand_and_set(self.out_keys[0], time_elapsed, next_tensordict)
1121311219
self._maybe_expand_and_set(
11214-
self.out_keys[2], time_elapsed * 0, next_tensordict
11220+
self.time_step_key, time_elapsed, next_tensordict
11221+
)
11222+
self._maybe_expand_and_set(
11223+
self.time_reset_key, time_elapsed * 0, next_tensordict
1121511224
)
1121611225
self.last_call_time = current_time
1121711226
# presumbly no need to worry about batch size incongruencies here
11218-
next_tensordict.set(self.out_keys[1], tensordict.get(self.out_keys[1]))
11227+
next_tensordict.set(self.time_policy_key, tensordict.get(self.time_policy_key))
1121911228
return next_tensordict
1122011229

1122111230
def transform_observation_spec(self, observation_spec: TensorSpec) -> TensorSpec:
11222-
observation_spec[self.out_keys[0]] = Unbounded(
11231+
observation_spec[self.time_step_key] = Unbounded(
1122311232
shape=observation_spec.shape, device=observation_spec.device
1122411233
)
11225-
observation_spec[self.out_keys[1]] = Unbounded(
11234+
observation_spec[self.time_policy_key] = Unbounded(
1122611235
shape=observation_spec.shape, device=observation_spec.device
1122711236
)
11228-
observation_spec[self.out_keys[2]] = Unbounded(
11237+
observation_spec[self.time_reset_key] = Unbounded(
1122911238
shape=observation_spec.shape, device=observation_spec.device
1123011239
)
1123111240
return observation_spec

0 commit comments

Comments
 (0)