diff --git a/.github/unittest/windows_optdepts/scripts/unittest.sh b/.github/unittest/windows_optdepts/scripts/unittest.sh index f115c231388..634059af1e2 100755 --- a/.github/unittest/windows_optdepts/scripts/unittest.sh +++ b/.github/unittest/windows_optdepts/scripts/unittest.sh @@ -14,6 +14,12 @@ env_dir="${root_dir}/env" cd "${root_dir}" +echo "=== Starting Windows CI setup ===" +echo "Current directory: $(pwd)" +echo "Python version: $PYTHON_VERSION" +echo "CU_VERSION: $CU_VERSION" +echo "TORCH_VERSION: $TORCH_VERSION" + eval "$($(which conda) shell.bash hook)" && set -x # Create test environment at ./env @@ -28,11 +34,12 @@ echo $(which python) echo $(python --version) echo $(conda info -e) - +echo "=== Installing test dependencies ===" python -m pip install hypothesis future cloudpickle pytest pytest-cov pytest-mock pytest-instafail pytest-rerunfailures expecttest pyyaml scipy coverage # =================================== Install ================================================= +echo "=== Installing PyTorch and dependencies ===" # TODO, refactor the below logic to make it easy to understand how to get correct cuda_version. if [ "${CU_VERSION:-}" == cpu ] ; then @@ -56,8 +63,8 @@ else cudatoolkit="${cuda_toolkit_pckg}=${version}" fi - # submodules +echo "=== Updating git submodules ===" git submodule sync && git submodule update --init --recursive python -m pip install "numpy<2.0" @@ -92,6 +99,7 @@ fi #python -m pip install pip --upgrade # install tensordict +echo "=== Installing tensordict ===" if [[ "$RELEASE" == 0 ]]; then conda install anaconda::cmake -y @@ -103,11 +111,13 @@ else fi # smoke test +echo "=== Testing tensordict import ===" python -c """ from tensordict import TensorDict print('successfully imported tensordict') """ +echo "=== Setting up CUDA environment ===" source "$this_dir/set_cuda_envs.sh" printf "* Installing torchrl\n" @@ -117,6 +127,7 @@ whatsinside=$(ls -rtlh ./torchrl) echo $whatsinside # smoke test +echo "=== Testing torchrl import ===" python -c """ from torchrl.data import ReplayBuffer print('successfully imported torchrl') @@ -124,6 +135,7 @@ print('successfully imported torchrl') # =================================== Run ================================================= +echo "=== Setting up test environment ===" source "$this_dir/set_cuda_envs.sh" # we don't use torchsnapshot @@ -132,5 +144,24 @@ export MAX_IDLE_COUNT=60 export BATCHED_PIPE_TIMEOUT=60 export LAZY_LEGACY_OP=False +echo "=== Collecting environment info ===" python -m torch.utils.collect_env -pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py + +echo "=== Starting pytest execution ===" +echo "Current working directory: $(pwd)" +echo "Python executable: $(which python)" +echo "Pytest executable: $(which pytest)" + +# Create test-results directory if it doesn't exist +mkdir -p test-results + +# Run pytest with explicit error handling +set +e # Don't exit on error for pytest +pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py --ignore test/llm +PYTEST_EXIT_CODE=$? +set -e # Re-enable exit on error + +echo "=== Pytest completed with exit code: $PYTEST_EXIT_CODE ===" + +# Exit with pytest's exit code +exit $PYTEST_EXIT_CODE diff --git a/torchrl/envs/llm/transforms/reason.py b/torchrl/envs/llm/transforms/reason.py index a5c67cfd91c..6890d45b80e 100644 --- a/torchrl/envs/llm/transforms/reason.py +++ b/torchrl/envs/llm/transforms/reason.py @@ -153,7 +153,6 @@ def _step( Returns: The modified next_tensordict """ - print("Reward", next_tensordict["reward"]) # Handle batch dimensions if next_tensordict.batch_dims >= 1: ntds = [] diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py index a00747d7e02..9a9682e52da 100644 --- a/torchrl/envs/transforms/transforms.py +++ b/torchrl/envs/transforms/transforms.py @@ -11155,6 +11155,9 @@ def __init__(self, out_keys: Sequence[NestedKey] = None, time_key: str = "time") self.last_inv_time = None self.last_call_time = None self.last_reset_time = None + self.time_step_key = self.out_keys[0] + self.time_policy_key = self.out_keys[1] + self.time_reset_key = self.out_keys[2] def _reset_env_preprocess(self, tensordict: TensorDictBase) -> TensorDictBase: self.last_reset_time = self.last_inv_time = time.time() @@ -11182,13 +11185,17 @@ def _reset( time_elapsed = torch.tensor( current_time - self.last_reset_time, device=tensordict.device ) - self._maybe_expand_and_set(self.out_keys[2], time_elapsed, tensordict_reset) self._maybe_expand_and_set( - self.out_keys[0], time_elapsed * 0, tensordict_reset + self.time_reset_key, time_elapsed, tensordict_reset + ) + self._maybe_expand_and_set( + self.time_step_key, time_elapsed * 0, tensordict_reset ) self.last_call_time = current_time # Placeholder - self._maybe_expand_and_set(self.out_keys[1], time_elapsed * 0, tensordict_reset) + self._maybe_expand_and_set( + self.time_policy_key, time_elapsed * 0, tensordict_reset + ) return tensordict_reset def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase: @@ -11197,7 +11204,7 @@ def _inv_call(self, tensordict: TensorDictBase) -> TensorDictBase: time_elapsed = torch.tensor( current_time - self.last_call_time, device=tensordict.device ) - self._maybe_expand_and_set(self.out_keys[1], time_elapsed, tensordict) + self._maybe_expand_and_set(self.time_policy_key, time_elapsed, tensordict) self.last_inv_time = current_time return tensordict @@ -11209,23 +11216,25 @@ def _step( time_elapsed = torch.tensor( current_time - self.last_inv_time, device=tensordict.device ) - self._maybe_expand_and_set(self.out_keys[0], time_elapsed, next_tensordict) self._maybe_expand_and_set( - self.out_keys[2], time_elapsed * 0, next_tensordict + self.time_step_key, time_elapsed, next_tensordict + ) + self._maybe_expand_and_set( + self.time_reset_key, time_elapsed * 0, next_tensordict ) self.last_call_time = current_time # presumbly no need to worry about batch size incongruencies here - next_tensordict.set(self.out_keys[1], tensordict.get(self.out_keys[1])) + next_tensordict.set(self.time_policy_key, tensordict.get(self.time_policy_key)) return next_tensordict def transform_observation_spec(self, observation_spec: TensorSpec) -> TensorSpec: - observation_spec[self.out_keys[0]] = Unbounded( + observation_spec[self.time_step_key] = Unbounded( shape=observation_spec.shape, device=observation_spec.device ) - observation_spec[self.out_keys[1]] = Unbounded( + observation_spec[self.time_policy_key] = Unbounded( shape=observation_spec.shape, device=observation_spec.device ) - observation_spec[self.out_keys[2]] = Unbounded( + observation_spec[self.time_reset_key] = Unbounded( shape=observation_spec.shape, device=observation_spec.device ) return observation_spec