init

vmoens · vmoens · commit ae63a5d0904d · 2025-06-25T10:59:56.000+01:00
diff --git a/.github/unittest/windows_optdepts/scripts/unittest.sh b/.github/unittest/windows_optdepts/scripts/unittest.sh
@@ -14,6 +14,12 @@ env_dir="${root_dir}/env"
 
 cd "${root_dir}"
 
+echo "=== Starting Windows CI setup ==="
+echo "Current directory: $(pwd)"
+echo "Python version: $PYTHON_VERSION"
+echo "CU_VERSION: $CU_VERSION"
+echo "TORCH_VERSION: $TORCH_VERSION"
+
 eval "$($(which conda) shell.bash hook)" && set -x
 
 # Create test environment at ./env
@@ -28,11 +34,12 @@ echo $(which python)
 echo $(python --version)
 echo $(conda info -e)
 
-
+echo "=== Installing test dependencies ==="
 python -m pip install hypothesis future cloudpickle pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures expecttest pyyaml scipy coverage
 
 # =================================== Install =================================================
 
+echo "=== Installing PyTorch and dependencies ==="
 
 # TODO, refactor the below logic to make it easy to understand how to get correct cuda_version.
 if [ "${CU_VERSION:-}" == cpu ] ; then
@@ -56,8 +63,8 @@ else
     cudatoolkit="${cuda_toolkit_pckg}=${version}"
 fi
 
-
 # submodules
+echo "=== Updating git submodules ==="
 git submodule sync && git submodule update --init --recursive
 python -m pip install "numpy<2.0"
 
@@ -92,6 +99,7 @@ fi
 #python -m pip install pip --upgrade
 
 # install tensordict
+echo "=== Installing tensordict ==="
 if [[ "$RELEASE" == 0 ]]; then
   conda install anaconda::cmake -y
 
@@ -103,11 +111,13 @@ else
 fi
 
 # smoke test
+echo "=== Testing tensordict import ==="
 python -c """
 from tensordict import TensorDict
 print('successfully imported tensordict')
 """
 
+echo "=== Setting up CUDA environment ==="
 source "$this_dir/set_cuda_envs.sh"
 
 printf "* Installing torchrl\n"
@@ -117,13 +127,15 @@ whatsinside=$(ls -rtlh ./torchrl)
 echo $whatsinside
 
 # smoke test
+echo "=== Testing torchrl import ==="
 python -c """
 from torchrl.data import ReplayBuffer
 print('successfully imported torchrl')
 """
 
 # =================================== Run =================================================
 
+echo "=== Setting up test environment ==="
 source "$this_dir/set_cuda_envs.sh"
 
 # we don't use torchsnapshot
@@ -132,5 +144,24 @@ export MAX_IDLE_COUNT=60
 export BATCHED_PIPE_TIMEOUT=60
 export LAZY_LEGACY_OP=False
 
+echo "=== Collecting environment info ==="
 python -m torch.utils.collect_env
-pytest --junitxml=test-results/junit.xml -v --durations 200  --ignore test/test_distributed.py --ignore test/test_rlhf.py
+
+echo "=== Starting pytest execution ==="
+echo "Current working directory: $(pwd)"
+echo "Python executable: $(which python)"
+echo "Pytest executable: $(which pytest)"
+
+# Create test-results directory if it doesn't exist
+mkdir -p test-results
+
+# Run pytest with explicit error handling
+set +e  # Don't exit on error for pytest
+pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py --ignore test/llm
+PYTEST_EXIT_CODE=$?
+set -e  # Re-enable exit on error
+
+echo "=== Pytest completed with exit code: $PYTEST_EXIT_CODE ==="
+
+# Exit with pytest's exit code
+exit $PYTEST_EXIT_CODE
diff --git a/torchrl/envs/llm/transforms/reason.py b/torchrl/envs/llm/transforms/reason.py
@@ -153,7 +153,6 @@ def _step(
         Returns:
             The modified next_tensordict
         """
-        print("Reward", next_tensordict["reward"])
         # Handle batch dimensions
         if next_tensordict.batch_dims >= 1:
             ntds = []
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -11155,6 +11155,9 @@ def __init__(self, out_keys: Sequence[NestedKey] = None, time_key: str = "time")
         self.last_inv_time = None
         self.last_call_time = None
         self.last_reset_time = None
+        self.time_step_key = self.out_keys[0]
+        self.time_policy_key = self.out_keys[1]
+        self.time_reset_key = self.out_keys[2]
 
     def _reset_env_preprocess(self, tensordict: TensorDictBase) -> TensorDictBase:
         self.last_reset_time = self.last_inv_time = time.time()
@@ -11182,13 +11185,17 @@ def _reset(
             time_elapsed = torch.tensor(
                 current_time - self.last_reset_time, device=tensordict.device
             )
-            self._maybe_expand_and_set(self.out_keys[2], time_elapsed, tensordict_reset)
             self._maybe_expand_and_set(
-                self.out_keys[0], time_elapsed * 0, tensordict_reset
+                self.time_reset_key, time_elapsed, tensordict_reset
+            )
+            self._maybe_expand_and_set(
+                self.time_step_key, time_elapsed * 0, tensordict_reset
             )
         self.last_call_time = current_time
         # Placeholder
-        self._maybe_expand_and_set(self.out_keys[1], time_elapsed * 0, tensordict_reset)
+        self._maybe_expand_and_set(
+            self.time_policy_key, time_elapsed * 0, tensordict_reset
+        )
         return tensordict_reset
 
     def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase:
@@ -11197,7 +11204,7 @@ def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase:
             time_elapsed = torch.tensor(
                 current_time - self.last_call_time, device=tensordict.device
             )
-            self._maybe_expand_and_set(self.out_keys[1], time_elapsed, tensordict)
+            self._maybe_expand_and_set(self.time_policy_key, time_elapsed, tensordict)
         self.last_inv_time = current_time
         return tensordict
 
@@ -11209,23 +11216,25 @@ def _step(
             time_elapsed = torch.tensor(
                 current_time - self.last_inv_time, device=tensordict.device
             )
-            self._maybe_expand_and_set(self.out_keys[0], time_elapsed, next_tensordict)
             self._maybe_expand_and_set(
-                self.out_keys[2], time_elapsed * 0, next_tensordict
+                self.time_step_key, time_elapsed, next_tensordict
+            )
+            self._maybe_expand_and_set(
+                self.time_reset_key, time_elapsed * 0, next_tensordict
             )
         self.last_call_time = current_time
         # presumbly no need to worry about batch size incongruencies here
-        next_tensordict.set(self.out_keys[1], tensordict.get(self.out_keys[1]))
+        next_tensordict.set(self.time_policy_key, tensordict.get(self.time_policy_key))
         return next_tensordict
 
     def transform_observation_spec(self, observation_spec: TensorSpec) -> TensorSpec:
-        observation_spec[self.out_keys[0]] = Unbounded(
+        observation_spec[self.time_step_key] = Unbounded(
             shape=observation_spec.shape, device=observation_spec.device
         )
-        observation_spec[self.out_keys[1]] = Unbounded(
+        observation_spec[self.time_policy_key] = Unbounded(
             shape=observation_spec.shape, device=observation_spec.device
         )
-        observation_spec[self.out_keys[2]] = Unbounded(
+        observation_spec[self.time_reset_key] = Unbounded(
             shape=observation_spec.shape, device=observation_spec.device
         )
         return observation_spec