FIX Add input array check to randomized_svd and randomized_range_finder (scikit-learn#30819)

clane9 · adrinjalali · jeremiedbb · web-flow · commit 9f3ca07560c5 · 2025-04-17T09:28:03.000Z
Co-authored-by: Adrin Jalali &lt;adrin.jalali@gmail.com&gt;
Co-authored-by: Jérémie du Boisberranger &lt;jeremie@probabl.ai&gt;
diff --git a/doc/whats_new/upcoming_changes/array-api/30819.feature.rst b/doc/whats_new/upcoming_changes/array-api/30819.feature.rst
@@ -0,0 +1,2 @@
+- :func:`sklearn.utils.extmath.randomized_svd` now support Array API compatible inputs.
+  By :user:`Connor Lane <clane9>` and :user:`Jérémie du Boisberranger <jeremiedbb>`.
diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/30819.fix.rst b/doc/whats_new/upcoming_changes/sklearn.utils/30819.fix.rst
@@ -0,0 +1,4 @@
+- :func:`utils.extmath.randomized_svd` and :func:`utils.extmath.randomized_range_finder`
+  now validate their input array to fail early with an informative error message on
+  invalid input.
+  By :user:`Connor Lane <clane9>`.
diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py
@@ -14,7 +14,7 @@
 from ..base import BaseEstimator, BiclusterMixin, _fit_context
 from ..utils import check_random_state, check_scalar
 from ..utils._param_validation import Interval, StrOptions
-from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot
+from ..utils.extmath import _randomized_svd, make_nonnegative, safe_sparse_dot
 from ..utils.validation import assert_all_finite, validate_data
 from ._kmeans import KMeans, MiniBatchKMeans
 
@@ -144,7 +144,7 @@ def _svd(self, array, n_components, n_discard):
             kwargs = {}
             if self.n_svd_vecs is not None:
                 kwargs["n_oversamples"] = self.n_svd_vecs
-            u, _, vt = randomized_svd(
+            u, _, vt = _randomized_svd(
                 array, n_components, random_state=self.random_state, **kwargs
             )
 
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
@@ -21,7 +21,7 @@
 from ..linear_model import Lars, Lasso, LassoLars, orthogonal_mp_gram
 from ..utils import check_array, check_random_state, gen_batches, gen_even_slices
 from ..utils._param_validation import Interval, StrOptions, validate_params
-from ..utils.extmath import randomized_svd, row_norms, svd_flip
+from ..utils.extmath import _randomized_svd, row_norms, svd_flip
 from ..utils.parallel import Parallel, delayed
 from ..utils.validation import check_is_fitted, validate_data
 
@@ -2049,7 +2049,7 @@ def _initialize_dict(self, X, random_state):
             dictionary = self.dict_init
         else:
             # Init V with SVD of X
-            _, S, dictionary = randomized_svd(
+            _, S, dictionary = _randomized_svd(
                 X, self._n_components, random_state=random_state
             )
             dictionary = S[:, np.newaxis] * dictionary
diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py
@@ -32,7 +32,7 @@
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
 from ..utils._param_validation import Interval, StrOptions
-from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
+from ..utils.extmath import _randomized_svd, fast_logdet, squared_norm
 from ..utils.validation import check_is_fitted, validate_data
 
 
@@ -264,7 +264,7 @@ def my_svd(X):
             random_state = check_random_state(self.random_state)
 
             def my_svd(X):
-                _, s, Vt = randomized_svd(
+                _, s, Vt = _randomized_svd(
                     X,
                     n_components,
                     random_state=random_state,
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
@@ -28,7 +28,7 @@
     StrOptions,
     validate_params,
 )
-from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
+from ..utils.extmath import _randomized_svd, safe_sparse_dot, squared_norm
 from ..utils.validation import (
     check_is_fitted,
     check_non_negative,
@@ -314,7 +314,7 @@ def _initialize_nmf(X, n_components, init=None, eps=1e-6, random_state=None):
         return W, H
 
     # NNDSVD initialization
-    U, S, V = randomized_svd(X, n_components, random_state=random_state)
+    U, S, V = _randomized_svd(X, n_components, random_state=random_state)
     W = np.zeros_like(U)
     H = np.zeros_like(V)
 
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -16,7 +16,7 @@
 from ..utils._arpack import _init_arpack_v0
 from ..utils._array_api import _convert_to_numpy, get_namespace
 from ..utils._param_validation import Interval, RealNotInt, StrOptions
-from ..utils.extmath import fast_logdet, randomized_svd, stable_cumsum, svd_flip
+from ..utils.extmath import _randomized_svd, fast_logdet, stable_cumsum, svd_flip
 from ..utils.sparsefuncs import _implicit_column_offset, mean_variance_axis
 from ..utils.validation import check_is_fitted, validate_data
 from ._base import _BasePCA
@@ -754,7 +754,7 @@ def _fit_truncated(self, X, n_components, xp):
 
         elif svd_solver == "randomized":
             # sign flipping is done inside
-            U, S, Vt = randomized_svd(
+            U, S, Vt = _randomized_svd(
                 X_centered,
                 n_components=n_components,
                 n_oversamples=self.n_oversamples,
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
@@ -18,7 +18,7 @@
 from ..utils import check_array, check_random_state
 from ..utils._arpack import _init_arpack_v0
 from ..utils._param_validation import Interval, StrOptions
-from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip
+from ..utils.extmath import _randomized_svd, safe_sparse_dot, svd_flip
 from ..utils.sparsefuncs import mean_variance_axis
 from ..utils.validation import check_is_fitted, validate_data
 
@@ -241,7 +241,7 @@ def fit_transform(self, X, y=None):
                     f"n_components({self.n_components}) must be <="
                     f" n_features({X.shape[1]})."
                 )
-            U, Sigma, VT = randomized_svd(
+            U, Sigma, VT = _randomized_svd(
                 X,
                 self.n_components,
                 n_iter=self.n_iter,
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -219,7 +219,7 @@ def randomized_range_finder(
 
     Parameters
     ----------
-    A : 2D array
+    A : {array-like, sparse matrix} of shape (n_samples, n_features)
         The input data matrix.
 
     size : int
@@ -246,9 +246,9 @@ def randomized_range_finder(
 
     Returns
     -------
-    Q : ndarray
-        A (size x size) projection matrix, the range of which
-        approximates well the range of the input matrix A.
+    Q : ndarray of shape (size, size)
+        A projection matrix, the range of which approximates well the range of the
+        input matrix A.
 
     Notes
     -----
@@ -273,6 +273,21 @@ def randomized_range_finder(
            [-0.52...,  0.24...],
            [-0.82..., -0.38...]])
     """
+    A = check_array(A, accept_sparse=True)
+
+    return _randomized_range_finder(
+        A,
+        size=size,
+        n_iter=n_iter,
+        power_iteration_normalizer=power_iteration_normalizer,
+        random_state=random_state,
+    )
+
+
+def _randomized_range_finder(
+    A, *, size, n_iter, power_iteration_normalizer="auto", random_state=None
+):
+    """Body of randomized_range_finder without input validation."""
     xp, is_array_api_compliant = get_namespace(A)
     random_state = check_random_state(random_state)
 
@@ -344,7 +359,7 @@ def randomized_range_finder(
 
 @validate_params(
     {
-        "M": [np.ndarray, "sparse matrix"],
+        "M": ["array-like", "sparse matrix"],
         "n_components": [Interval(Integral, 1, None, closed="left")],
         "n_oversamples": [Interval(Integral, 0, None, closed="left")],
         "n_iter": [Interval(Integral, 0, None, closed="left"), StrOptions({"auto"})],
@@ -381,7 +396,7 @@ def randomized_svd(
 
     Parameters
     ----------
-    M : {ndarray, sparse matrix}
+    M : {array-like, sparse matrix} of shape (n_samples, n_features)
         Matrix to decompose.
 
     n_components : int
@@ -499,6 +514,35 @@ def randomized_svd(
     >>> U.shape, s.shape, Vh.shape
     ((3, 2), (2,), (2, 4))
     """
+    M = check_array(M, accept_sparse=True)
+    return _randomized_svd(
+        M,
+        n_components=n_components,
+        n_oversamples=n_oversamples,
+        n_iter=n_iter,
+        power_iteration_normalizer=power_iteration_normalizer,
+        transpose=transpose,
+        flip_sign=flip_sign,
+        random_state=random_state,
+        svd_lapack_driver=svd_lapack_driver,
+    )
+
+
+def _randomized_svd(
+    M,
+    n_components,
+    *,
+    n_oversamples=10,
+    n_iter="auto",
+    power_iteration_normalizer="auto",
+    transpose="auto",
+    flip_sign=True,
+    random_state=None,
+    svd_lapack_driver="gesdd",
+):
+    """Body of randomized_svd without input validation."""
+    xp, is_array_api_compliant = get_namespace(M)
+
     if sparse.issparse(M) and M.format in ("lil", "dok"):
         warnings.warn(
             "Calculating SVD of a {} is expensive. "
@@ -521,7 +565,7 @@ def randomized_svd(
         # this implementation is a bit faster with smaller shape[1]
         M = M.T
 
-    Q = randomized_range_finder(
+    Q = _randomized_range_finder(
         M,
         size=n_random,
         n_iter=n_iter,
@@ -533,7 +577,6 @@ def randomized_svd(
     B = Q.T @ M
 
     # compute the SVD on the thin matrix: (k + p) wide
-    xp, is_array_api_compliant = get_namespace(B)
     if is_array_api_compliant:
         Uhat, s, Vt = xp.linalg.svd(B, full_matrices=False)
     else:
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
@@ -9,10 +9,18 @@
 from scipy.linalg import eigh
 from scipy.sparse.linalg import eigsh
 
+from sklearn import config_context
 from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix
 from sklearn.utils import gen_batches
 from sklearn.utils._arpack import _init_arpack_v0
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    _get_namespace_device_dtype_ids,
+    get_namespace,
+    yield_namespace_device_dtype_combinations,
+)
 from sklearn.utils._testing import (
+    _array_api_for_tests,
     assert_allclose,
     assert_allclose_dense_sparse,
     assert_almost_equal,
@@ -28,6 +36,7 @@
     _safe_accumulator_op,
     cartesian,
     density,
+    randomized_range_finder,
     randomized_svd,
     row_norms,
     safe_sparse_dot,
@@ -1060,3 +1069,53 @@ def test_approximate_mode():
     # 25% * 99.000 = 24.750
     # 25% *  1.000 =    250
     assert_array_equal(ret, [24750, 250])
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+def test_randomized_svd_array_api_compliance(array_namespace, device, dtype):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    rng = np.random.RandomState(0)
+    X = rng.normal(size=(30, 10)).astype(dtype)
+    X_xp = xp.asarray(X, device=device)
+    n_components = 5
+    atol = 1e-5 if dtype == "float32" else 0
+
+    with config_context(array_api_dispatch=True):
+        u_np, s_np, vt_np = randomized_svd(X, n_components, random_state=0)
+        u_xp, s_xp, vt_xp = randomized_svd(X_xp, n_components, random_state=0)
+
+        assert get_namespace(u_xp)[0].__name__ == xp.__name__
+        assert get_namespace(s_xp)[0].__name__ == xp.__name__
+        assert get_namespace(vt_xp)[0].__name__ == xp.__name__
+
+        assert_allclose(_convert_to_numpy(u_xp, xp), u_np, atol=atol)
+        assert_allclose(_convert_to_numpy(s_xp, xp), s_np, atol=atol)
+        assert_allclose(_convert_to_numpy(vt_xp, xp), vt_np, atol=atol)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+def test_randomized_range_finder_array_api_compliance(array_namespace, device, dtype):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    rng = np.random.RandomState(0)
+    X = rng.normal(size=(30, 10)).astype(dtype)
+    X_xp = xp.asarray(X, device=device)
+    size = 5
+    n_iter = 10
+    atol = 1e-5 if dtype == "float32" else 0
+
+    with config_context(array_api_dispatch=True):
+        Q_np = randomized_range_finder(X, size=size, n_iter=n_iter, random_state=0)
+        Q_xp = randomized_range_finder(X_xp, size=size, n_iter=n_iter, random_state=0)
+
+        assert get_namespace(Q_xp)[0].__name__ == xp.__name__
+        assert_allclose(_convert_to_numpy(Q_xp, xp), Q_np, atol=atol)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+- :func:`sklearn.utils.extmath.randomized_svd` now support Array API compatible inputs.
	`2`	+ By :user:`Connor Lane <clane9>` and :user:`Jérémie du Boisberranger <jeremiedbb>`.