Skip to content

Commit afd2533

Browse files
author
Vincent Moens
committed
[CI] Fix nightly and benchmark CIs
ghstack-source-id: f39b257 Pull-Request-resolved: #2930
1 parent b7eda32 commit afd2533

File tree

13 files changed

+240
-111
lines changed

13 files changed

+240
-111
lines changed

.github/scripts/pre-build-script-win.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,3 @@
33
pip install --upgrade setuptools
44

55
export TORCHRL_BUILD_VERSION=0.8.0
6-
7-
${CONDA_RUN} pip install "pybind11[global]"
8-
${CONDA_RUN} conda install anaconda::cmake -y
9-
${CONDA_RUN} pip install git+https://github.com/pytorch/tensordict.git -U

.github/scripts/td_script.sh

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,20 @@ ARCH=${ARCH:-} # This sets ARCH to an empty string if it's not defined
99
if pip list | grep -q torch; then
1010
echo "Torch is installed."
1111

12-
${CONDA_RUN} pip install "pybind11[global]"
12+
# ${CONDA_RUN} conda install 'anaconda::cmake>=3.22' -y
1313

14-
if conda list cmake | grep -q 'cmake'; then
15-
echo "CMake is already installed."
16-
else
17-
echo "CMake is not installed. Installing now..."
18-
${CONDA_RUN} conda install anaconda::cmake -y --no-update-deps
19-
fi
14+
${CONDA_RUN} pip install "pybind11[global]"
2015

2116
${CONDA_RUN} pip install git+https://github.com/pytorch/tensordict.git -U --no-deps
2217
elif [[ -n "${SMOKE_TEST_SCRIPT:-}" ]]; then
2318
${CONDA_RUN} ${PIP_INSTALL_TORCH}
2419
# TODO: revert when nightlies of tensordict are fixed
2520
# if [[ "$ARCH" == "aarch64" ]]; then
2621

27-
${CONDA_RUN} pip install "pybind11[global]"
2822

29-
if conda list cmake | grep -q 'cmake'; then
30-
echo "CMake is already installed."
31-
else
32-
echo "CMake is not installed. Installing now..."
33-
${CONDA_RUN} conda install anaconda::cmake -y --no-update-deps
34-
fi
23+
# ${CONDA_RUN} conda install 'anaconda::cmake>=3.22' -y
24+
25+
${CONDA_RUN} pip install "pybind11[global]"
3526

3627
${CONDA_RUN} pip install git+https://github.com/pytorch/tensordict.git -U --no-deps
3728
else

.github/unittest/linux_libs/scripts_gym/setup_env.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
1212
apt-get update && apt-get install -y git wget gcc g++
1313

1414
apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
15-
apt-get install -y libegl-dev libegl
16-
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb
15+
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev
1716

1817
git config --global --add safe.directory '*'
1918
root_dir="$(git rev-parse --show-toplevel)"
@@ -92,7 +91,7 @@ conda env config vars set \
9291
# make env variables apparent
9392
conda deactivate && conda activate "${env_dir}"
9493

95-
pip install pip --upgrade
94+
# pip install pip --upgrade
9695

9796
conda env update --file "${this_dir}/environment.yml" --prune
9897
#conda install -c conda-forge fltk -y

.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ channels:
22
- pytorch
33
- defaults
44
dependencies:
5-
- pip<25.0
5+
- pip
66
- protobuf
77
- pip:
88
- hypothesis

.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ printf "* Installing vim - git - wget\n"
1515
apt-get install -y vim git wget
1616

1717
printf "* Installing glfw - glew - osmesa part 1\n"
18-
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb
18+
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libx11-dev libegl-dev
19+
20+
#printf "* Installing glfw - glew - osmesa part 2\n"
21+
#apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
1922

20-
printf "* Installing glfw - glew - osmesa part 2\n"
21-
apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 libegl-dev libx11-dev
22-
#
2323
if [ "${CU_VERSION:-}" == cpu ] ; then
2424
# solves version `GLIBCXX_3.4.29' not found for tensorboard
2525
# apt-get install -y gcc-4.9

.github/unittest/windows_optdepts/scripts/environment.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ dependencies:
1313
- pytest-instafail
1414
- pytest-rerunfailures
1515
- expecttest
16-
- pybind11[global]
1716
- pyyaml
1817
- scipy
1918
- coverage

.github/unittest/windows_optdepts/scripts/unittest.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,11 @@ fi
9393

9494
# install tensordict
9595
if [[ "$RELEASE" == 0 ]]; then
96-
git clone https://github.com/pytorch/tensordict
97-
cd tensordict
98-
python setup.py develop
99-
cd ..
96+
conda install anaconda::cmake -y
97+
98+
python -m pip install "pybind11[global]"
99+
100+
python -m pip install git+https://github.com/pytorch/tensordict
100101
else
101102
pip3 install tensordict
102103
fi

.github/workflows/benchmarks.yml

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ jobs:
2323

2424
benchmark_cpu:
2525
name: CPU Pytest benchmark
26-
runs-on: ubuntu-20.04
26+
runs-on: linux.g5.4xlarge.nvidia.cpu
27+
defaults:
28+
run:
29+
shell: bash -l {0}
30+
container:
31+
image: nvidia/cuda:12.3.0-base-ubuntu22.04
32+
options: --cpus all
2733
steps:
2834
- name: Who triggered this?
2935
run: |
3036
echo "Action triggered by ${{ github.event.pull_request.html_url }}"
37+
- name: Check ldd --version
38+
run: ldd --version
3139
- name: Checkout
3240
uses: actions/checkout@v4
3341
with:
@@ -38,24 +46,54 @@ jobs:
3846
python-version: '3.10'
3947
- name: Setup Environment
4048
run: |
41-
python3.10 -m venv ./py310
49+
export TZ=Europe/London
50+
export DEBIAN_FRONTEND=noninteractive # tzdata bug
51+
apt-get update -y
52+
apt-get install software-properties-common cmake -y
53+
add-apt-repository ppa:git-core/candidate -y
54+
apt-get update -y
55+
apt-get upgrade -y
56+
apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev
57+
- name: Setup git
58+
run: git config --global --add safe.directory /__w/rl/rl
59+
- name: setup Path
60+
run: |
61+
echo /usr/local/bin >> $GITHUB_PATH
62+
- name: Setup benchmarks
63+
run: |
64+
echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV
65+
echo "HEAD_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c1-8)" >> $GITHUB_ENV
66+
echo "BASELINE_JSON=$(mktemp)" >> $GITHUB_ENV
67+
echo "CONTENDER_JSON=$(mktemp)" >> $GITHUB_ENV
68+
echo "PR_COMMENT=$(mktemp)" >> $GITHUB_ENV
69+
- name: Run
70+
run: |
71+
python3.10 -m venv --system-site-packages ./py310
4272
source ./py310/bin/activate
73+
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
4374
44-
python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U
45-
python3 -m pip install git+https://github.com/pytorch/tensordict
46-
python3 setup.py develop
47-
python3 -m pip install pytest pytest-benchmark
48-
python3 -m pip install "gym[accept-rom-license,atari]"
49-
python3 -m pip install "dm_control" "mujoco"
75+
python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U
76+
python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]"
77+
python3 -m pip install "pybind11[global]"
78+
python3.10 -m pip install git+https://github.com/pytorch/tensordict
79+
python3.10 setup.py develop
80+
81+
# test import
82+
python3 -c """import torch
83+
assert torch.cuda.device_count()
84+
"""
5085
5186
cd benchmarks/
5287
export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
5388
export COMPOSITE_LP_AGGREGATE=0
89+
export CUDA_VISIBLE_DEVICES=
5490
export TD_GET_DEFAULTS_TO_NONE=1
5591
python3 -m pytest -vvv --rank 0 --benchmark-json output.json --ignore test_collectors_benchmark.py
5692
- name: Store benchmark results
5793
uses: benchmark-action/github-action-benchmark@v1
5894
if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' }}
95+
env:
96+
GIT_WORK_TREE: /__w/rl/rl
5997
with:
6098
name: CPU Benchmark Results
6199
tool: 'pytest'
@@ -68,7 +106,6 @@ jobs:
68106
gh-pages-branch: gh-pages
69107
auto-push: true
70108

71-
72109
benchmark_gpu:
73110
name: GPU Pytest benchmark
74111
runs-on: linux.g5.4xlarge.nvidia.gpu
@@ -121,7 +158,8 @@ jobs:
121158
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
122159
123160
python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U
124-
python3.10 -m pip install cmake ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]"
161+
python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]"
162+
python3 -m pip install "pybind11[global]"
125163
python3.10 -m pip install git+https://github.com/pytorch/tensordict
126164
python3.10 setup.py develop
127165

.github/workflows/benchmarks_pr.yml

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,30 @@ jobs:
1414

1515
benchmark_cpu:
1616
name: CPU Pytest benchmark
17-
runs-on: ubuntu-20.04
17+
runs-on: linux.g5.4xlarge.nvidia.cpu
18+
defaults:
19+
run:
20+
shell: bash -l {0}
21+
container:
22+
image: nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
23+
options: --cpus all
1824
steps:
25+
- name: Set GITHUB_BRANCH environment variable
26+
run: |
27+
if [ "${{ github.event_name }}" == "push" ]; then
28+
export GITHUB_BRANCH=${{ github.event.branch }}
29+
elif [ "${{ github.event_name }}" == "pull_request" ]; then
30+
export GITHUB_BRANCH=${{ github.event.pull_request.head.ref }}
31+
else
32+
echo "Unsupported event type"
33+
exit 1
34+
fi
35+
echo "GITHUB_BRANCH=$GITHUB_BRANCH" >> $GITHUB_ENV
1936
- name: Who triggered this?
2037
run: |
2138
echo "Action triggered by ${{ github.event.pull_request.html_url }}"
39+
- name: Check ldd --version
40+
run: ldd --version
2241
- name: Checkout
2342
uses: actions/checkout@v4
2443
with:
@@ -27,28 +46,50 @@ jobs:
2746
uses: actions/setup-python@v4
2847
with:
2948
python-version: '3.10'
49+
- name: Setup Environment
50+
run: |
51+
export TZ=Europe/London
52+
export DEBIAN_FRONTEND=noninteractive # tzdata bug
53+
apt-get update -y
54+
apt-get install software-properties-common cmake -y
55+
add-apt-repository ppa:git-core/candidate -y
56+
apt-get update -y
57+
apt-get upgrade -y
58+
apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev
59+
- name: Setup git
60+
run: git config --global --add safe.directory /__w/rl/rl
61+
- name: setup Path
62+
run: |
63+
echo /usr/local/bin >> $GITHUB_PATH
3064
- name: Setup benchmarks
3165
run: |
3266
echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV
3367
echo "HEAD_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c1-8)" >> $GITHUB_ENV
3468
echo "BASELINE_JSON=$(mktemp)" >> $GITHUB_ENV
3569
echo "CONTENDER_JSON=$(mktemp)" >> $GITHUB_ENV
3670
echo "PR_COMMENT=$(mktemp)" >> $GITHUB_ENV
37-
- name: Setup Environment and tests
71+
- name: Run
3872
run: |
39-
python3.10 -m venv ./py310
73+
python3.10 -m venv --system-site-packages ./py310
4074
source ./py310/bin/activate
75+
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
76+
77+
python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U
78+
python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]"
79+
python3.10 -m pip install "pybind11[global]"
80+
python3.10 -m pip install git+https://github.com/pytorch/tensordict
81+
python3.10 setup.py develop
82+
# python3.10 -m pip install git+https://github.com/pytorch/rl@$GITHUB_BRANCH
83+
84+
# test import
85+
python3 -c """import torch
86+
assert torch.cuda.device_count()
87+
"""
4188
42-
python3 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U
43-
python3 -m pip install git+https://github.com/pytorch/tensordict
44-
python3 setup.py develop
45-
python3 -m pip install pytest pytest-benchmark
46-
python3 -m pip install "gym[accept-rom-license,atari]"
47-
python3 -m pip install "dm_control" "mujoco"
48-
4989
cd benchmarks/
5090
export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
5191
export COMPOSITE_LP_AGGREGATE=0
92+
export CUDA_VISIBLE_DEVICES=
5293
export TD_GET_DEFAULTS_TO_NONE=1
5394
RUN_BENCHMARK="python3 -m pytest -vvv --rank 0 --ignore test_collectors_benchmark.py --benchmark-json "
5495
git checkout ${{ github.event.pull_request.base.sha }}
@@ -57,6 +98,8 @@ jobs:
5798
$RUN_BENCHMARK ${{ env.CONTENDER_JSON }}
5899
- name: Publish results
59100
uses: apbard/pytest-benchmark-commenter@v3
101+
env:
102+
GIT_WORK_TREE: /__w/rl/rl
60103
with:
61104
token: ${{ secrets.GITHUB_TOKEN }}
62105
benchmark-file: ${{ env.CONTENDER_JSON }}
@@ -130,7 +173,8 @@ jobs:
130173
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
131174
132175
python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128 -U
133-
python3.10 -m pip install cmake ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]"
176+
python3.10 -m pip install ninja pytest pytest-benchmark mujoco dm_control "gym[accept-rom-license,atari]"
177+
python3.10 -m pip install "pybind11[global]"
134178
python3.10 -m pip install git+https://github.com/pytorch/tensordict
135179
python3.10 setup.py develop
136180
# python3.10 -m pip install git+https://github.com/pytorch/rl@$GITHUB_BRANCH

0 commit comments

Comments
 (0)