Skip to content

Commit d7ea591

Browse files
committed
Update gpu flavor to cuda 11.2
1 parent 687b28e commit d7ea591

File tree

1 file changed

+57
-85
lines changed

1 file changed

+57
-85
lines changed

gpu-flavor/Dockerfile

Lines changed: 57 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,27 @@ ENV WORKSPACE_FLAVOR=$ARG_WORKSPACE_FLAVOR
88
USER root
99

1010
### NVIDIA CUDA BASE ###
11-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18.04-x86_64/base/Dockerfile
12-
RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \
13-
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
14-
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
15-
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
11+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20.04-x86_64/base/Dockerfile
12+
RUN apt-get update && apt-get install -y --no-install-recommends \
13+
gnupg2 curl ca-certificates && \
14+
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub | apt-key add - && \
15+
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
16+
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
1617
# Cleanup - cannot use cleanup script here, otherwise too much is removed
1718
apt-get clean && \
1819
rm -rf $HOME/.cache/* && \
1920
rm -rf /tmp/* && \
2021
rm -rf /var/lib/apt/lists/*
2122

22-
ENV CUDA_VERSION 11.2.1
23-
ENV CUDA_PKG_VERSION 11-2=$CUDA_VERSION-1
24-
ENV CUDART_VERSION 11-2=$CUDA_VERSION46-1
23+
ENV CUDA_VERSION 11.2.2
24+
#ENV CUDA_PKG_VERSION 11-2=$CUDA_VERSION-1
25+
#ENV CUDART_VERSION 11-2=$CUDA_VERSION46-1
2526

2627
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
2728
RUN apt-get update && apt-get install -y --no-install-recommends \
28-
cuda-cudart-$CUDART_VERSION \
29-
cuda-compat-11-2 && \
30-
ln -s cuda-11.2 /usr/local/cuda && \
29+
cuda-cudart-11-2=11.2.152-1 \
30+
cuda-compat-11-2 \
31+
&& ln -s cuda-11.2 /usr/local/cuda && \
3132
rm -rf /var/lib/apt/lists/* && \
3233
# Cleanup - cannot use cleanup script here, otherwise too much is removed
3334
apt-get clean && \
@@ -36,111 +37,101 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3637
rm -rf /var/lib/apt/lists/*
3738

3839
# Required for nvidia-docker v1
39-
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
40-
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
40+
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
41+
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
4142

4243
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
43-
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
44+
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
4445

4546
# nvidia-container-runtime
4647
# https://github.com/NVIDIA/nvidia-container-runtime#environment-variables-oci-spec
4748
# nvidia-container-runtime
4849
ENV NVIDIA_VISIBLE_DEVICES all
4950
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
50-
ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411 brand=tesla,driver>=418,driver<419"
51+
ENV NVIDIA_REQUIRE_CUDA "cuda>=11.2 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 driver>=450"
5152

5253
### CUDA RUNTIME ###
53-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18.04-x86_64/runtime/Dockerfile
54+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20.04-x86_64/runtime/Dockerfile
5455

5556
ENV NCCL_VERSION 2.8.4
5657

5758
RUN apt-get update && apt-get install -y --no-install-recommends \
58-
cuda-libraries-$CUDA_PKG_VERSION \
59-
libnpp-11-2=11.3.2.139-1 \
60-
cuda-nvtx-11-2=11.2.67-1 \
61-
libcublas-11-2=11.4.1.1026-1 \
62-
libcusparse-11-2=11.4.0.135-1 \
63-
libnccl2=$NCCL_VERSION-1+cuda11.2 \
64-
&& apt-mark hold libnccl2 \
59+
cuda-libraries-11-2=11.2.2-1 \
60+
libnpp-11-2=11.3.2.152-1 \
61+
cuda-nvtx-11-2=11.2.152-1 \
62+
libcublas-11-2=11.4.1.1043-1 \
63+
libcusparse-11-2=11.4.1.1152-1 \
64+
libnccl2=$NCCL_VERSION-1+cuda11.2 \
65+
&& rm -rf /var/lib/apt/lists/* \
6566
# Cleanup - cannot use cleanup script here, otherwise too much is removed
6667
&& apt-get clean \
6768
&& rm -rf $HOME/.cache/* \
6869
&& rm -rf /tmp/* \
6970
&& rm -rf /var/lib/apt/lists/*
7071

71-
# apt from auto upgrading the cublas package. See https://gitlab.com/nvidia/container-images/cuda/-/issues/88
72-
RUN apt-mark hold libcublas10
72+
RUN apt-mark hold libcublas-11-2 libnccl2
7373

7474
### END CUDA RUNTIME ###
7575

7676
### CUDA DEVEL ###
77-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18.04-x86_64/devel/Dockerfile
77+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20.04-x86_64/devel/Dockerfile
7878
RUN apt-get update && apt-get install -y --no-install-recommends \
79-
libtinfo5 libncursesw5 \
80-
cuda-cudart-dev-$CUDART_VERSION \
81-
cuda-nvml-dev-11-2=11.2.67-1 \
82-
cuda-command-line-tools-$CUDA_PKG_VERSION \
83-
libnpp-dev-11-2=11.3.2.139-1 \
84-
cuda-libraries-dev-$CUDA_PKG_VERSION \
85-
cuda-minimal-build-$CUDA_PKG_VERSION \
86-
libcublas-dev-11-2=11.4.1.1026-1 \
87-
libcusparse-dev-11-2=11.4.0.135-1 \
88-
libnpp-dev-11-2=11.3.2.139-1 \
89-
libnccl-dev=$NCCL_VERSION-1+cuda11.2 && \
90-
apt-mark hold libnccl-dev && \
79+
libtinfo5 libncursesw5 \
80+
cuda-cudart-dev-11-2=11.2.152-1 \
81+
cuda-command-line-tools-11-2=11.2.2-1 \
82+
cuda-minimal-build-11-2=11.2.2-1 \
83+
cuda-libraries-dev-11-2=11.2.2-1 \
84+
cuda-nvml-dev-11-2=11.2.152-1 \
85+
libnpp-dev-11-2=11.3.2.152-1 \
86+
libnccl-dev=2.8.4-1+cuda11.2 \
87+
libcublas-dev-11-2=11.4.1.1043-1 \
88+
libcusparse-dev-11-2=11.4.1.1152-1 && \
9189
# Cleanup - cannot use cleanup script here, otherwise too much is removed
9290
apt-get clean && \
9391
rm -rf $HOME/.cache/* && \
9492
rm -rf /tmp/* && \
9593
rm -rf /var/lib/apt/lists/*
9694

9795
# apt from auto upgrading the cublas package. See https://gitlab.com/nvidia/container-images/cuda/-/issues/88
98-
RUN apt-mark hold libcublas-dev
99-
96+
RUN apt-mark hold libcublas-dev-11-2 libnccl-dev
10097
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
10198

99+
102100
### END CUDA DEVEL ###
103101

104-
### CUDANN7 DEVEL ###
105-
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18.04-x86_64/devel/cudnn7/Dockerfile
102+
### CUDANN8 DEVEL ###
103+
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20.04-x86_64/devel/cudnn8/Dockerfile
106104

107-
ENV CUDNN_VERSION 8.1.0.77
105+
ENV CUDNN_VERSION 8.1.1.33
108106
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
109107

110-
RUN apt-get update && \
111-
apt-get install -y --no-install-recommends \
112-
libcudnn7=$CUDNN_VERSION-1+cuda11.2 \
113-
libcudnn7-dev=$CUDNN_VERSION-1+cuda11.2 && \
114-
apt-mark hold libcudnn8 && \
108+
RUN apt-get update && apt-get install -y --no-install-recommends \
109+
libcudnn8=$CUDNN_VERSION-1+cuda11.2 \
110+
libcudnn8-dev=$CUDNN_VERSION-1+cuda11.2 \
111+
&& apt-mark hold libcudnn8 && \
115112
# Cleanup
116113
apt-get clean && \
117114
rm -rf /root/.cache/* && \
118115
rm -rf /tmp/* && \
119116
rm -rf /var/lib/apt/lists/*
120117

121-
### END CUDANN7 ###
118+
### END CUDANN8 ###
122119

123120
# Link Cupti:
124121
ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/local/cuda/extras/CUPTI/lib64
125122

126-
# Install TensorRT. Requires that libcudnn8 is installed above.
127-
# https://www.tensorflow.org/install/gpu#ubuntu_1804_cuda_101
128-
RUN apt-get update && apt-get install -y --no-install-recommends \
129-
libnvinfer7=7.1.3-1+cuda11.0 \
130-
libnvinfer-dev=7.1.3-1+cuda11.0 \
131-
libnvinfer-plugin7=7.1.3-1+cuda11.0 && \
132-
# Cleanup
133-
clean-layer.sh
134-
135123
### GPU DATA SCIENCE LIBRARIES ###
136124

137125
RUN \
138126
apt-get update && \
139127
apt-get install -y libomp-dev libopenblas-base && \
140-
# Not needed? Install cuda-toolkit (e.g. for pytorch: https://pytorch.org/): https://anaconda.org/anaconda/cudatoolkit
141-
conda install -y cudatoolkit=11.0.221 -c pytorch && \
128+
# Install pytorch gpu
129+
# uninstall cpu only packages via conda
130+
conda remove --force -y pytorch cpuonly && \
131+
# https://pytorch.org/get-started/locally/
132+
conda install pytorch cudatoolkit=11.2 -c pytorch -c nvidia && \
142133
# Install cupy: https://cupy.chainer.org/
143-
pip install --no-cache-dir cupy-cuda112 && \
134+
pip install --no-cache-dir cupy-cuda111 && \
144135
# Install pycuda: https://pypi.org/project/pycuda
145136
pip install --no-cache-dir pycuda && \
146137
# Install gpu utils libs
@@ -149,25 +140,19 @@ RUN \
149140
pip install --no-cache-dir scikit-cuda && \
150141
# Install tensorflow gpu
151142
pip uninstall -y tensorflow tensorflow-cpu intel-tensorflow && \
152-
# TODO: tensorflow 2.3.1 installs tenorboard 2.4.0 with problems, use 2.3.0
153-
pip install --no-cache-dir tensorflow-gpu==2.4.1 && \
143+
pip install --no-cache-dir tensorflow-gpu==2.5.0 && \
154144
# Install ONNX GPU Runtime
155-
# TODO: 1.4.x is latest with cuda 10.1 support
156145
pip uninstall -y onnxruntime && \
157-
pip install --no-cache-dir onnxruntime-gpu==1.7.0 && \
158-
# Install pytorch gpu
159-
# uninstall cpu only packages via conda
160-
conda remove --force -y pytorch cpuonly && \
161-
# https://pytorch.org/get-started/locally/
162-
conda install -y pytorch -c pytorch && \
146+
pip install --no-cache-dir onnxruntime-gpu==1.8.0 onnxruntime-training==1.8.0 && \
163147
# Install faiss gpu
164148
conda remove --force -y faiss-cpu && \
165149
conda install -y faiss-gpu -c pytorch && \
166150
# Update mxnet to gpu edition
167151
pip uninstall -y mxnet-mkl && \
168-
pip install --no-cache-dir mxnet-cu101mkl==1.6.0.post0 && \
152+
# cuda111 -> >= 11.1
153+
pip install --no-cache-dir mxnet-cu110 && \
169154
# install jax: https://github.com/google/jax#pip-installation
170-
pip install --upgrade jax jaxlib==0.1.62+cuda110 -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
155+
pip install --upgrade jax[cuda111] -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
171156
# Install pygpu - Required for theano: http://deeplearning.net/software/libgpuarray/
172157
conda install -y pygpu && \
173158
# Install lightgbm
@@ -182,19 +167,6 @@ RUN \
182167
# Cleanup
183168
clean-layer.sh
184169

185-
# TODO: nvdashboard does not work with relative paths
186-
# RUN \
187-
# # Install Jupyterlab GPU Plugin: https://github.com/rapidsai/jupyterlab-nvdashboard
188-
# pip install jupyterlab-nvdashboard && \
189-
# jupyter labextension install jupyterlab-nvdashboard && \
190-
# # Clean jupyter lab cache: https://github.com/jupyterlab/jupyterlab/issues/4930
191-
# jupyter lab clean && \
192-
# jlpm cache clean && \
193-
# # Remove build folder -> should be remove by lab clean as well?
194-
# rm -rf $CONDA_ROOT/share/jupyter/lab/staging && \
195-
# # Cleanup
196-
# clean-layer.sh
197-
198170
# TODO install DALI: https://docs.nvidia.com/deeplearning/dali/user-guide/docs/installation.html#dali-and-ngc
199171
# TODO: if > Ubuntu 19.04 -> install nvtop: https://github.com/Syllo/nvtop
200172
# TODO: Install Arrrayfire: https://arrayfire.com/download/ pip install --no-cache-dir arrayfire && \

0 commit comments

Comments
 (0)