neurodata
diff --git a/‎doc/api_reference.py
Lines changed: 1 addition & 0 deletions b/‎doc/api_reference.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/whats_new/v1.6.rst
Lines changed: 6 additions & 0 deletions b/‎doc/whats_new/v1.6.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎sklearn/base.py
Lines changed: 0 additions & 261 deletions b/‎sklearn/base.py
Lines changed: 0 additions & 261 deletions
diff --git a/‎sklearn/cluster/_affinity_propagation.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/cluster/_affinity_propagation.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sklearn/cluster/_agglomerative.py
Lines changed: 3 additions & 3 deletions b/‎sklearn/cluster/_agglomerative.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎sklearn/cluster/_bicluster.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/cluster/_bicluster.py
Lines changed: 2 additions & 2 deletions
@@ -1183,6 +1183,7 @@ def _get_submodule(module_name, submodule_name):
                     "validation.check_symmetric",
                     "validation.column_or_1d",
                     "validation.has_fit_parameter",
+                    "validation.validate_data",
                 ],
             },
             {
 
@@ -25,6 +25,12 @@ Version 1.6.0
 Changes impacting many modules
 ------------------------------
 
+- |API| :func:`utils.validation.validate_data` is introduced and replaces previously
+  private `base.BaseEstimator._validate_data` method. This is intended for third party
+  estimator developers, who should use this function in most cases instead of
+  :func:`utils.validation.check_array` and :func:`utils.validation.check_X_y`.
+  :pr:`29696` by `Adrin Jalali`_.
+
 - |Enhancement| `__sklearn_tags__` was introduced for setting tags in estimators.
   More details in :ref:`estimator_tags`.
   :pr:`22606` by `Thomas Fan`_ and :pr:`29677` by `Adrin Jalali`_.
 
@@ -24,14 +24,10 @@
 from .utils.fixes import _IS_32BIT
 from .utils.validation import (
     _check_feature_names_in,
-    _check_y,
     _generate_get_feature_names_out,
-    _get_feature_names,
     _is_fitted,
-    _num_features,
     check_array,
     check_is_fitted,
-    check_X_y,
 )
 
 
@@ -386,262 +382,6 @@ def __setstate__(self, state):
     def __sklearn_tags__(self):
         return default_tags(self)
 
-    def _check_n_features(self, X, reset):
-        """Set the `n_features_in_` attribute, or check against it.
-
-        Parameters
-        ----------
-        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
-            The input samples.
-        reset : bool
-            If True, the `n_features_in_` attribute is set to `X.shape[1]`.
-            If False and the attribute exists, then check that it is equal to
-            `X.shape[1]`. If False and the attribute does *not* exist, then
-            the check is skipped.
-            .. note::
-               It is recommended to call reset=True in `fit` and in the first
-               call to `partial_fit`. All other methods that validate `X`
-               should set `reset=False`.
-        """
-        try:
-            n_features = _num_features(X)
-        except TypeError as e:
-            if not reset and hasattr(self, "n_features_in_"):
-                raise ValueError(
-                    "X does not contain any features, but "
-                    f"{self.__class__.__name__} is expecting "
-                    f"{self.n_features_in_} features"
-                ) from e
-            # If the number of features is not defined and reset=True,
-            # then we skip this check
-            return
-
-        if reset:
-            self.n_features_in_ = n_features
-            return
-
-        if not hasattr(self, "n_features_in_"):
-            # Skip this check if the expected number of expected input features
-            # was not recorded by calling fit first. This is typically the case
-            # for stateless transformers.
-            return
-
-        if n_features != self.n_features_in_:
-            raise ValueError(
-                f"X has {n_features} features, but {self.__class__.__name__} "
-                f"is expecting {self.n_features_in_} features as input."
-            )
-
-    def _check_feature_names(self, X, *, reset):
-        """Set or check the `feature_names_in_` attribute.
-
-        .. versionadded:: 1.0
-
-        Parameters
-        ----------
-        X : {ndarray, dataframe} of shape (n_samples, n_features)
-            The input samples.
-
-        reset : bool
-            Whether to reset the `feature_names_in_` attribute.
-            If False, the input will be checked for consistency with
-            feature names of data provided when reset was last True.
-            .. note::
-               It is recommended to call `reset=True` in `fit` and in the first
-               call to `partial_fit`. All other methods that validate `X`
-               should set `reset=False`.
-        """
-
-        if reset:
-            feature_names_in = _get_feature_names(X)
-            if feature_names_in is not None:
-                self.feature_names_in_ = feature_names_in
-            elif hasattr(self, "feature_names_in_"):
-                # Delete the attribute when the estimator is fitted on a new dataset
-                # that has no feature names.
-                delattr(self, "feature_names_in_")
-            return
-
-        fitted_feature_names = getattr(self, "feature_names_in_", None)
-        X_feature_names = _get_feature_names(X)
-
-        if fitted_feature_names is None and X_feature_names is None:
-            # no feature names seen in fit and in X
-            return
-
-        if X_feature_names is not None and fitted_feature_names is None:
-            warnings.warn(
-                f"X has feature names, but {self.__class__.__name__} was fitted without"
-                " feature names"
-            )
-            return
-
-        if X_feature_names is None and fitted_feature_names is not None:
-            warnings.warn(
-                "X does not have valid feature names, but"
-                f" {self.__class__.__name__} was fitted with feature names"
-            )
-            return
-
-        # validate the feature names against the `feature_names_in_` attribute
-        if len(fitted_feature_names) != len(X_feature_names) or np.any(
-            fitted_feature_names != X_feature_names
-        ):
-            message = (
-                "The feature names should match those that were passed during fit.\n"
-            )
-            fitted_feature_names_set = set(fitted_feature_names)
-            X_feature_names_set = set(X_feature_names)
-
-            unexpected_names = sorted(X_feature_names_set - fitted_feature_names_set)
-            missing_names = sorted(fitted_feature_names_set - X_feature_names_set)
-
-            def add_names(names):
-                output = ""
-                max_n_names = 5
-                for i, name in enumerate(names):
-                    if i >= max_n_names:
-                        output += "- ...\n"
-                        break
-                    output += f"- {name}\n"
-                return output
-
-            if unexpected_names:
-                message += "Feature names unseen at fit time:\n"
-                message += add_names(unexpected_names)
-
-            if missing_names:
-                message += "Feature names seen at fit time, yet now missing:\n"
-                message += add_names(missing_names)
-
-            if not missing_names and not unexpected_names:
-                message += (
-                    "Feature names must be in the same order as they were in fit.\n"
-                )
-
-            raise ValueError(message)
-
-    def _validate_data(
-        self,
-        X="no_validation",
-        y="no_validation",
-        reset=True,
-        validate_separately=False,
-        cast_to_ndarray=True,
-        **check_params,
-    ):
-        """Validate input data and set or check the `n_features_in_` attribute.
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix, dataframe} of shape \
-                (n_samples, n_features), default='no validation'
-            The input samples.
-            If `'no_validation'`, no validation is performed on `X`. This is
-            useful for meta-estimator which can delegate input validation to
-            their underlying estimator(s). In that case `y` must be passed and
-            the only accepted `check_params` are `multi_output` and
-            `y_numeric`.
-
-        y : array-like of shape (n_samples,), default='no_validation'
-            The targets.
-
-            - If `None`, `check_array` is called on `X`. If the estimator's
-              requires_y tag is True, then an error will be raised.
-            - If `'no_validation'`, `check_array` is called on `X` and the
-              estimator's requires_y tag is ignored. This is a default
-              placeholder and is never meant to be explicitly set. In that case
-              `X` must be passed.
-            - Otherwise, only `y` with `_check_y` or both `X` and `y` are
-              checked with either `check_array` or `check_X_y` depending on
-              `validate_separately`.
-
-        reset : bool, default=True
-            Whether to reset the `n_features_in_` attribute.
-            If False, the input will be checked for consistency with data
-            provided when reset was last True.
-            .. note::
-               It is recommended to call reset=True in `fit` and in the first
-               call to `partial_fit`. All other methods that validate `X`
-               should set `reset=False`.
-
-        validate_separately : False or tuple of dicts, default=False
-            Only used if y is not None.
-            If False, call validate_X_y(). Else, it must be a tuple of kwargs
-            to be used for calling check_array() on X and y respectively.
-
-            `estimator=self` is automatically added to these dicts to generate
-            more informative error message in case of invalid input data.
-
-        cast_to_ndarray : bool, default=True
-            Cast `X` and `y` to ndarray with checks in `check_params`. If
-            `False`, `X` and `y` are unchanged and only `feature_names_in_` and
-            `n_features_in_` are checked.
-
-        **check_params : kwargs
-            Parameters passed to :func:`sklearn.utils.check_array` or
-            :func:`sklearn.utils.check_X_y`. Ignored if validate_separately
-            is not False.
-
-            `estimator=self` is automatically added to these params to generate
-            more informative error message in case of invalid input data.
-
-        Returns
-        -------
-        out : {ndarray, sparse matrix} or tuple of these
-            The validated input. A tuple is returned if both `X` and `y` are
-            validated.
-        """
-        self._check_feature_names(X, reset=reset)
-
-        if y is None and self.__sklearn_tags__().target_tags.required:
-            raise ValueError(
-                f"This {self.__class__.__name__} estimator "
-                "requires y to be passed, but the target y is None."
-            )
-
-        no_val_X = isinstance(X, str) and X == "no_validation"
-        no_val_y = y is None or isinstance(y, str) and y == "no_validation"
-
-        if no_val_X and no_val_y:
-            raise ValueError("Validation should be done on X, y or both.")
-
-        default_check_params = {"estimator": self}
-        check_params = {**default_check_params, **check_params}
-
-        if not cast_to_ndarray:
-            if not no_val_X and no_val_y:
-                out = X
-            elif no_val_X and not no_val_y:
-                out = y
-            else:
-                out = X, y
-        elif not no_val_X and no_val_y:
-            out = check_array(X, input_name="X", **check_params)
-        elif no_val_X and not no_val_y:
-            out = _check_y(y, **check_params)
-        else:
-            if validate_separately:
-                # We need this because some estimators validate X and y
-                # separately, and in general, separately calling check_array()
-                # on X and y isn't equivalent to just calling check_X_y()
-                # :(
-                check_X_params, check_y_params = validate_separately
-                if "estimator" not in check_X_params:
-                    check_X_params = {**default_check_params, **check_X_params}
-                X = check_array(X, input_name="X", **check_X_params)
-                if "estimator" not in check_y_params:
-                    check_y_params = {**default_check_params, **check_y_params}
-                y = check_array(y, input_name="y", **check_y_params)
-            else:
-                X, y = check_X_y(X, y, **check_params)
-            out = X, y
-
-        if not no_val_X and check_params.get("ensure_2d", True):
-            self._check_n_features(X, reset=reset)
-
-        return out
-
     def _validate_params(self):
         """Validate types and values of constructor parameters
 
@@ -984,7 +724,6 @@ def get_submatrix(self, i, data):
         Works with sparse matrices. Only works if ``rows_`` and
         ``columns_`` attributes exist.
         """
-        from .utils.validation import check_array
 
         data = check_array(data, accept_sparse="csr")
         row_ind, col_ind = self.get_indices(i)
 
@@ -14,7 +14,7 @@
 from ..metrics import euclidean_distances, pairwise_distances_argmin
 from ..utils import check_random_state
 from ..utils._param_validation import Interval, StrOptions, validate_params
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, validate_data
 
 
 def _equal_similarities_and_preferences(S, preference):
@@ -504,10 +504,10 @@ def fit(self, X, y=None):
             Returns the instance itself.
         """
         if self.affinity == "precomputed":
-            X = self._validate_data(X, copy=self.copy, force_writeable=True)
+            X = validate_data(self, X, copy=self.copy, force_writeable=True)
             self.affinity_matrix_ = X
         else:  # self.affinity == "euclidean"
-            X = self._validate_data(X, accept_sparse="csr")
+            X = validate_data(self, X, accept_sparse="csr")
             self.affinity_matrix_ = -euclidean_distances(X, squared=True)
 
         if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:
@@ -559,7 +559,7 @@ def predict(self, X):
             Cluster labels.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, reset=False, accept_sparse="csr")
+        X = validate_data(self, X, reset=False, accept_sparse="csr")
         if not hasattr(self, "cluster_centers_"):
             raise ValueError(
                 "Predict method is not supported when affinity='precomputed'."
 
@@ -38,7 +38,7 @@
     validate_params,
 )
 from ..utils.graph import _fix_connected_components
-from ..utils.validation import check_memory
+from ..utils.validation import check_memory, validate_data
 
 # mypy error: Module 'sklearn.cluster' has no attribute '_hierarchical_fast'
 from . import _hierarchical_fast as _hierarchical  # type: ignore
@@ -989,7 +989,7 @@ def fit(self, X, y=None):
         self : object
             Returns the fitted instance.
         """
-        X = self._validate_data(X, ensure_min_samples=2)
+        X = validate_data(self, X, ensure_min_samples=2)
         return self._fit(X)
 
     def _fit(self, X):
@@ -1338,7 +1338,7 @@ def fit(self, X, y=None):
         self : object
             Returns the transformer.
         """
-        X = self._validate_data(X, ensure_min_features=2)
+        X = validate_data(self, X, ensure_min_features=2)
         super()._fit(X.T)
         self._n_features_out = self.n_clusters_
         return self
 
@@ -15,7 +15,7 @@
 from ..utils import check_random_state, check_scalar
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot
-from ..utils.validation import assert_all_finite
+from ..utils.validation import assert_all_finite, validate_data
 from ._kmeans import KMeans, MiniBatchKMeans
 
 __all__ = ["SpectralCoclustering", "SpectralBiclustering"]
@@ -131,7 +131,7 @@ def fit(self, X, y=None):
         self : object
             SpectralBiclustering instance.
         """
-        X = self._validate_data(X, accept_sparse="csr", dtype=np.float64)
+        X = validate_data(self, X, accept_sparse="csr", dtype=np.float64)
         self._check_parameters(X.shape[0])
         self._fit(X)
         return self
Original file line number	Diff line number	Diff line change
`@@ -1183,6 +1183,7 @@ def _get_submodule(module_name, submodule_name):`
`1183`	`1183`	`"validation.check_symmetric",`
`1184`	`1184`	`"validation.column_or_1d",`
`1185`	`1185`	`"validation.has_fit_parameter",`
	`1186`	`+ "validation.validate_data",`
`1186`	`1187`	`],`
`1187`	`1188`	`},`
`1188`	`1189`	`{`