reviewer feedback

calderjo · calderjo · commit 129532093003 · 2025-05-07T23:10:01.000Z
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -31,27 +31,20 @@ RUN uv pip install --system -r /requirements.txt
 RUN uv pip uninstall --system google-cloud-bigquery-storage
 
 # b/394382016: sigstore (dependency of kagglehub) requires a prerelease packages, installing separate.
-# b/408284143: google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1
+# b/408284143: google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1,
+# installed outside of kaggle_requirements.txt due to requiring an incompatibile version of protobuf.
 RUN uv pip install --system --force-reinstall --prerelease=allow kagglehub[pandas-datasets,hf-datasets,signing]>=0.3.12 \
     google-cloud-automl==1.0.1
 
-# b/408284435: Keras 3.6 broke test_keras.py > test_train > keras.datasets.mnist.load_data()
-# See https://github.com/keras-team/keras/commit/dcefb139863505d166dd1325066f329b3033d45a
-# Colab base is on Keras 3.8, we have to install the package separately
-RUN uv pip install --system "keras<3.6"
-
 # uv cannot install this in requirements.txt without --no-build-isolation
 # to avoid affecting the larger build, we'll post-install it.
 RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/learntools"
 
+# b/302136621: Fix eli5 import for learntools
 # b/408281617: Torch is adamant that it can not install cudnn 9.3.x, only 9.1.x, but Tensorflow can only support 9.3.x.
 # This conflict causes a number of package downgrades, which are handled in this command
-# b/302136621: Fix eli5 import for learntools
-# b/416137032: cuda 12.9.0 breaks datashader 1.18.0
 RUN uv pip install --system --force-reinstall --extra-index-url https://pypi.nvidia.com "cuml-cu12==25.2.1" \
-    "nvidia-cudnn-cu12==9.3.0.75" cuda-bindings==12.8.0 cuda-python==12.8.0 \
-    scipy tsfresh scikit-learn==1.2.2 category-encoders eli5
-
+    "nvidia-cudnn-cu12==9.3.0.75"
 RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2"
 
 # b/385145217 Latest Colab lacks mkl numpy, install it.
diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt
@@ -20,18 +20,20 @@ arrow
 bayesian-optimization
 boto3
 catboost
+category-encoders
 cesium
 comm
 cytoolz
 dask-expr
 # Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported"
 # https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor
 datasets>=2.14.6
-datashader
 deap
 dipy
 docker
 easyocr
+# b/302136621: Fix eli5 import for learntools
+eli5
 emoji
 fastcore>=1.7.20
 fasttext
@@ -119,12 +121,15 @@ qtconsole
 ray
 rgf-python
 s3fs
+scikit-learn==1.2.2
 # Scikit-learn accelerated library for x86
 scikit-learn-intelex>=2023.0.1
 scikit-multilearn
 scikit-optimize
 scikit-plot
 scikit-surprise
+# b/415358158: Gensim removed from Colab image to upgrade scipy to 1.14.1
+scipy==1.15.1
 # Also pinning seaborn for learntools
 seaborn==0.12.2
 git+https://github.com/facebookresearch/segment-anything.git
diff --git a/tests/test_datashader.py b/tests/test_datashader.py
diff --git a/tests/test_keras.py b/tests/test_keras.py
@@ -9,10 +9,11 @@
 
 class TestKeras(unittest.TestCase):
     def test_train(self):
-        # Load the data and split it between train and test sets
-        (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data(
-            path='/input/tests/data/mnist.npz'
-        )
+        path = '/input/tests/data/mnist.npz'
+        with np.load(path) as f:
+            x_train, y_train = f['x_train'], f['y_train']
+            x_test, y_test = f['x_test'], f['y_test']
+
 
         # Scale images to the [0, 1] range
         x_train = x_train.astype("float32") / 255