Skip to content

Commit 8a20862

Browse files
authored
Upgrade Base Image: colab_20250404-060113_RC00 (#1484)
This particular image had issues with UV installs however does highlight a solution and will be included in the next image: googlecolab/colabtools#5237 Base image also removes Gensim due to SciPy 1.14.1, we included a fix to install both, since Gensim is a popular package 200 users per day. Updated mocks for GCS related tests, latest version causes issues Adding a few packages back into requirements.txt that were remove due to fixes that have been since resolved
1 parent 3cdabdf commit 8a20862

7 files changed

+38
-66
lines changed

Dockerfile.tmpl

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ FROM gcr.io/kaggle-images/python-lightgbm-whl:${BASE_IMAGE_TAG}-${LIGHTGBM_VERSI
77
{{ end }}
88
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
99

10+
#b/415358342: UV reports missing requirements files https://github.com/googlecolab/colabtools/issues/5237
11+
ENV UV_CONSTRAINT= \
12+
UV_BUILD_CONSTRAINT=
13+
1014
ADD kaggle_requirements.txt /kaggle_requirements.txt
1115

1216
# Freeze existing requirements from base image for critical packages:
@@ -27,26 +31,19 @@ RUN uv pip install --system -r /requirements.txt
2731
RUN uv pip uninstall --system google-cloud-bigquery-storage
2832

2933
# b/394382016: sigstore (dependency of kagglehub) requires a prerelease packages, installing separate.
30-
RUN uv pip install --system --force-reinstall --prerelease=allow kagglehub[pandas-datasets,hf-datasets,signing]>=0.3.9
31-
32-
# b/408284143: google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1
33-
34-
# b/408284435: Keras 3.6 broke test_keras.py > test_train > keras.datasets.mnist.load_data()
35-
# See https://github.com/keras-team/keras/commit/dcefb139863505d166dd1325066f329b3033d45a
36-
# Colab base is on Keras 3.8, we have to install the package separately
37-
RUN uv pip install --system google-cloud-automl==1.0.1 google-cloud-aiplatform google-cloud-translate==3.12.1 \
38-
google-cloud-videointelligence google-cloud-vision google-genai "keras<3.6"
34+
# b/408284143: google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1,
35+
# installed outside of kaggle_requirements.txt due to requiring an incompatibile version of protobuf.
36+
RUN uv pip install --system --force-reinstall --prerelease=allow kagglehub[pandas-datasets,hf-datasets,signing]>=0.3.12 \
37+
google-cloud-automl==1.0.1
3938

4039
# uv cannot install this in requirements.txt without --no-build-isolation
4140
# to avoid affecting the larger build, we'll post-install it.
4241
RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/learntools"
4342

4443
# b/408281617: Torch is adamant that it can not install cudnn 9.3.x, only 9.1.x, but Tensorflow can only support 9.3.x.
4544
# This conflict causes a number of package downgrades, which are handled in this command
46-
# b/302136621: Fix eli5 import for learntools
4745
RUN uv pip install --system --force-reinstall --extra-index-url https://pypi.nvidia.com "cuml-cu12==25.2.1" \
48-
"nvidia-cudnn-cu12==9.3.0.75" scipy tsfresh scikit-learn==1.2.2 category-encoders eli5
49-
46+
"nvidia-cudnn-cu12==9.3.0.75"
5047
RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2"
5148

5249
# b/385145217 Latest Colab lacks mkl numpy, install it.
@@ -56,10 +53,10 @@ RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel
5653
RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2"
5754

5855
# b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune.
59-
RUN uv pip install --system --force-reinstall --no-deps torchtune
56+
# b/415358158: Gensim removed from Colab image to upgrade scipy
57+
RUN uv pip install --system --force-reinstall --no-deps torchtune gensim
6058

6159
# Adding non-package dependencies:
62-
6360
ADD clean-layer.sh /tmp/clean-layer.sh
6461
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
6562
ADD patches/template_conf.json /opt/kaggle/conf.json
@@ -171,13 +168,13 @@ RUN mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py
171168
mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \
172169
/tmp/clean-layer.sh
173170

174-
# Fix to import bq_helper library without downgrading setuptools
171+
# Fix to import bq_helper library without downgrading setuptools and upgrading protobuf
175172
RUN mkdir -p ~/src && git clone https://github.com/SohierDane/BigQuery_Helper ~/src/BigQuery_Helper && \
176173
mkdir -p ~/src/BigQuery_Helper/bq_helper && \
177174
mv ~/src/BigQuery_Helper/bq_helper.py ~/src/BigQuery_Helper/bq_helper/__init__.py && \
178175
mv ~/src/BigQuery_Helper/test_helper.py ~/src/BigQuery_Helper/bq_helper/ && \
179176
sed -i 's/)/packages=["bq_helper"])/g' ~/src/BigQuery_Helper/setup.py && \
180-
uv pip install --system -e ~/src/BigQuery_Helper && \
177+
uv pip install --system -e ~/src/BigQuery_Helper "protobuf<3.21"&& \
181178
/tmp/clean-layer.sh
182179

183180

config.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime
2-
BASE_IMAGE_TAG=release-colab_20250219-060225_RC01
2+
BASE_IMAGE_TAG=release-colab_20250404-060113_RC00
33
LIGHTGBM_VERSION=4.6.0
44
CUDA_MAJOR_VERSION=12
55
CUDA_MINOR_VERSION=5

kaggle_requirements.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,20 @@ arrow
2020
bayesian-optimization
2121
boto3
2222
catboost
23+
category-encoders
2324
cesium
2425
comm
2526
cytoolz
2627
dask-expr
2728
# Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported"
2829
# https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor
2930
datasets>=2.14.6
30-
datashader
3131
deap
3232
dipy
3333
docker
3434
easyocr
35+
# b/302136621: Fix eli5 import for learntools
36+
eli5
3537
emoji
3638
fastcore>=1.7.20
3739
fasttext
@@ -42,6 +44,13 @@ fuzzywuzzy
4244
geojson
4345
# geopandas > v0.14.4 breaks learn tools
4446
geopandas==v0.14.4
47+
gensim
48+
google-cloud-aiplatform
49+
# b/315753846: Unpin translate package.
50+
google-cloud-translate==3.12.1
51+
google-cloud-videointelligence
52+
google-cloud-vision
53+
google-genai
4554
gpxpy
4655
h2o
4756
haversine
@@ -112,12 +121,15 @@ qtconsole
112121
ray
113122
rgf-python
114123
s3fs
124+
scikit-learn==1.2.2
115125
# Scikit-learn accelerated library for x86
116126
scikit-learn-intelex>=2023.0.1
117127
scikit-multilearn
118128
scikit-optimize
119129
scikit-plot
120130
scikit-surprise
131+
# b/415358158: Gensim removed from Colab image to upgrade scipy to 1.14.1
132+
scipy==1.15.1
121133
# Also pinning seaborn for learntools
122134
seaborn==0.12.2
123135
git+https://github.com/facebookresearch/segment-anything.git

tests/test_automl.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
def _make_credentials():
1010
import google.auth.credentials
11-
return Mock(spec=google.auth.credentials.Credentials)
11+
credentials = Mock(spec=google.auth.credentials.Credentials)
12+
credentials.universe_domain = 'googleapis.com'
13+
return credentials
1214

1315
class TestAutoMl(unittest.TestCase):
1416

tests/test_datashader.py

Lines changed: 0 additions & 42 deletions
This file was deleted.

tests/test_gcs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
def _make_credentials():
1010
import google.auth.credentials
11-
return Mock(spec=google.auth.credentials.Credentials)
11+
credentials = Mock(spec=google.auth.credentials.Credentials)
12+
credentials.universe_domain = 'googleapis.com'
13+
return credentials
1214

1315
class TestStorage(unittest.TestCase):
1416

tests/test_keras.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99

1010
class TestKeras(unittest.TestCase):
1111
def test_train(self):
12-
# Load the data and split it between train and test sets
13-
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data(
14-
path='/input/tests/data/mnist.npz'
15-
)
12+
path = '/input/tests/data/mnist.npz'
13+
with np.load(path) as f:
14+
x_train, y_train = f['x_train'], f['y_train']
15+
x_test, y_test = f['x_test'], f['y_test']
16+
1617

1718
# Scale images to the [0, 1] range
1819
x_train = x_train.astype("float32") / 255

0 commit comments

Comments
 (0)