MAINT Fix typos found by codespell (scikit-learn#26852)

DimitriPapadopoulos · web-flow · commit 1ba0783f64d8 · 2023-07-18T10:20:40.000+02:00
diff --git a/doc/whats_new/older_versions.rst b/doc/whats_new/older_versions.rst
@@ -344,9 +344,9 @@ API changes summary
   Please use :class:`~covariance.EllipticEnvelope` instead.
 
 - ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
-  :ref:`neighbors`. Use the classes :class:`~neighors.KNeighborsClassifier`,
-  :class:`~neighors.RadiusNeighborsClassifier`, :class:`~neighors.KNeighborsRegressor`
-  and/or :class:`~neighors.RadiusNeighborsRegressor` instead.
+  :ref:`neighbors`. Use the classes :class:`~neighbors.KNeighborsClassifier`,
+  :class:`~neighbors.RadiusNeighborsClassifier`, :class:`~neighbors.KNeighborsRegressor`
+  and/or :class:`~neighbors.RadiusNeighborsRegressor` instead.
 
 - Sparse classes in the :ref:`sgd` module are now deprecated.
 
diff --git a/examples/bicluster/plot_spectral_biclustering.py b/examples/bicluster/plot_spectral_biclustering.py
@@ -57,7 +57,7 @@
 
 # %%
 # We redefine the shuffled data and plot it. We observe that we lost the
-# strucuture of original data matrix.
+# structure of original data matrix.
 data = data[row_idx_shuffled][:, col_idx_shuffled]
 
 plt.matshow(data, cmap=plt.cm.Blues)
diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py
@@ -113,7 +113,7 @@ def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=
 # ----------------------
 # HDBSCAN is much more than scale invariant though -- it is capable of
 # multi-scale clustering, which accounts for clusters with varying density.
-# Traditional DBSCAN assumes that any potential clusters are homogenous in
+# Traditional DBSCAN assumes that any potential clusters are homogeneous in
 # density. HDBSCAN is free from such constraints. To demonstrate this we
 # consider the following dataset
 centers = [[-0.85, -0.85], [-0.85, 0.85], [3, 3], [3, -3]]
diff --git a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py
@@ -11,7 +11,7 @@
 The comparison is made by varying the parameters that control the number of
 trees according to each estimator:
 
-- `n_estimators` controls the number of trees in the forest. It's a fixed numer.
+- `n_estimators` controls the number of trees in the forest. It's a fixed number.
 - `max_iter` is the the maximum number of iterations in a gradient boosting
   based model. The number of iterations corresponds to the number of trees for
   regression and binary classification problems. Furthermore, the actual number
@@ -202,7 +202,7 @@
 # makes fitting and scoring slower. The RF model reaches such plateau earlier
 # and can never reach the test score of the largest HGBDT model.
 #
-# Note that the results shown on the above plot can change sightly across runs
+# Note that the results shown on the above plot can change slightly across runs
 # and even more significantly when running on other machines: try to run this
 # example on your own local machine.
 #
diff --git a/examples/linear_model/plot_ridge_coeffs.py b/examples/linear_model/plot_ridge_coeffs.py
@@ -162,7 +162,7 @@
 # `alpha` increases, the highest coefficients shrink more rapidly, rendering
 # their corresponding features less influential in the training process. This
 # can enhance a model's ability to generalize to unseen data (if there was a lot
-# of noise to capture), but it also poses the risk of loosing performance if the
+# of noise to capture), but it also poses the risk of losing performance if the
 # regularization becomes too strong compared to the amount of noise the data
 # contained (as in this example).
 #
diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py
@@ -39,7 +39,7 @@
 # Create :class:`ConfusionMatrixDisplay`
 ##############################################################################
 # With the fitted model, we compute the predictions of the model on the test
-# dataset. These predictions are used to compute the confustion matrix which
+# dataset. These predictions are used to compute the confusion matrix which
 # is plotted with the :class:`ConfusionMatrixDisplay`
 from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
 
diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py
@@ -368,7 +368,7 @@ def predict(self, X, **predict_params):
 # In ``get_metadata_routing``, we add ``self`` to the routing using
 # ``add_self_request`` to indicate this estimator is consuming
 # ``sample_weight`` as well as being a router; which also adds a
-# ``$self_request`` key to the routing info as illustrated bellow. Now let's
+# ``$self_request`` key to the routing info as illustrated below. Now let's
 # look at some examples:
 
 # %%
diff --git a/sklearn/cluster/_hdbscan/_tree.pyx b/sklearn/cluster/_hdbscan/_tree.pyx
@@ -133,7 +133,7 @@ cpdef cnp.ndarray[CONDENSED_t, ndim=1, mode='c'] _condense_tree(
         A single linkage hierarchy in scipy.cluster.hierarchy format.
 
     min_cluster_size : int, optional (default 10)
-        The minimum size of clusters to consider. Clusters smaler than this
+        The minimum size of clusters to consider. Clusters smaller than this
         are pruned from the tree.
 
     Returns
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -900,7 +900,7 @@ def _weighted_cluster_center(self, X):
             self.medoids_ = np.empty((n_clusters, X.shape[1]), dtype=np.float64)
 
         # Need to handle iteratively seen each cluster may have a different
-        # number of samples, hence we can't create a homogenous 3D array.
+        # number of samples, hence we can't create a homogeneous 3D array.
         for idx in range(n_clusters):
             mask = self.labels_ == idx
             data = X[mask]
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
@@ -1534,7 +1534,7 @@ def test_fetch_openml_verify_checksum(monkeypatch, as_frame, cache, tmpdir, pars
         modified_gzip.write(data)
 
     # Requests are already mocked by monkey_patch_webbased_functions.
-    # We want to re-use that mock for all requests except file download,
+    # We want to reuse that mock for all requests except file download,
     # hence creating a thin mock over the original mock
     mocked_openml_url = sklearn.datasets._openml.urlopen
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
@@ -1200,7 +1200,7 @@ def _check_params(self, X):
                 (
                     "The default value of `n_components` will change from `None` to"
                     " `'auto'` in 1.6. Set the value of `n_components` to `None`"
-                    " explicitly to supress the warning."
+                    " explicitly to suppress the warning."
                 ),
                 FutureWarning,
             )
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -382,7 +382,7 @@ def fit(self, X, y, sample_weight=None):
 
         rng = check_random_state(self.random_state)
 
-        # When warm starting, we want to re-use the same seed that was used
+        # When warm starting, we want to reuse the same seed that was used
         # the first time fit was called (e.g. for subsampling or for the
         # train/val split).
         if not (self.warm_start and self._is_fitted()):
@@ -538,7 +538,7 @@ def fit(self, X, y, sample_weight=None):
                     # we're going to compute scoring w.r.t the loss. As losses
                     # take raw predictions as input (unlike the scorers), we
                     # can optimize a bit and avoid repeating computing the
-                    # predictions of the previous trees. We'll re-use
+                    # predictions of the previous trees. We'll reuse
                     # raw_predictions (as it's needed for training anyway) for
                     # evaluating the training loss, and create
                     # raw_predictions_val for storing the raw predictions of
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -313,7 +313,7 @@ def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
     fpr : ndarray of shape (n_thresholds,)
         False positive rate (FPR) such that element i is the false positive
         rate of predictions with score >= thresholds[i]. This is occasionally
-        referred to as false acceptance propability or fall-out.
+        referred to as false acceptance probability or fall-out.
 
     fnr : ndarray of shape (n_thresholds,)
         False negative rate (FNR) such that element i is the false negative
diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
@@ -485,7 +485,7 @@ def test_kbinsdiscretizer_subsample(strategy, global_random_seed):
     kbd_no_subsampling.set_params(subsample=None)
     kbd_no_subsampling.fit(X)
 
-    # We use a large tolerance because we can't expect the bin edges to be exactely the
+    # We use a large tolerance because we can't expect the bin edges to be exactly the
     # same when subsampling is used.
     assert_allclose(
         kbd_subsampling.bin_edges_[0], kbd_no_subsampling.bin_edges_[0], rtol=1e-2
diff --git a/sklearn/preprocessing/tests/test_target_encoder.py b/sklearn/preprocessing/tests/test_target_encoder.py
@@ -484,7 +484,7 @@ def test_target_encoding_for_linear_regression(smooth, global_random_seed):
     # itself independent of the target variable: target encoding such a feature
     # without internal cross-validation should cause catastrophic overfitting
     # for the downstream regressor, even with shrinkage. This kind of features
-    # typically represents near unique idenfiers of samples. In general they
+    # typically represents near unique identifiers of samples. In general they
     # should be removed from a machine learning datasets but here we want to
     # study the ability of the default behavior of TargetEncoder to mitigate
     # them automatically.
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
@@ -72,7 +72,7 @@ class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
         optimization problem. Prefer dual=False when n_samples > n_features.
         `dual="auto"` will choose the value of the parameter automatically,
         based on the values of `n_samples`, `n_features`, `loss`, `multi_class`
-        and `penalty`. If `n_samples` < `n_features` and optmizer supports
+        and `penalty`. If `n_samples` < `n_features` and optimizer supports
         chosen `loss`, `multi_class` and `penalty`, then dual will be set to True,
         otherwise it will be set to False.
 
@@ -409,7 +409,7 @@ class LinearSVR(RegressorMixin, LinearModel):
         optimization problem. Prefer dual=False when n_samples > n_features.
         `dual="auto"` will choose the value of the parameter automatically,
         based on the values of `n_samples`, `n_features` and `loss`. If
-        `n_samples` < `n_features` and optmizer supports chosen `loss`,
+        `n_samples` < `n_features` and optimizer supports chosen `loss`,
         then dual will be set to True, otherwise it will be set to False.
 
         .. versionchanged:: 1.3
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
@@ -219,7 +219,7 @@ def discriminant_func(sample, coef, intercept, clazz):
 
     assert prob_ref == pytest.approx(prob_ref_2)
     # check that the probability of LDA are close to the theoretical
-    # probabilties
+    # probabilities
     assert_allclose(
         lda.predict_proba(sample), np.hstack([prob, prob_ref])[np.newaxis], atol=1e-2
     )
diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -271,7 +271,7 @@ def _iter_test_indices(self, X=None, y=None, groups=None):
 - routing_methods: list of all methods to check for routing
 - preserves_metadata: Whether the metaestimator passes the metadata to the
   sub-estimator without modification or not. If it does, we check that the
-  values are identical. If it doesn', no check is performed. TODO Maybe
+  values are identical. If it doesn't, no check is performed. TODO Maybe
   something smarter could be done if the data is modified.
 
 """
diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py
@@ -734,7 +734,7 @@ class MetadataRouter:
 
     # this is here for us to use this attribute's value instead of doing
     # `isinstance`` in our checks, so that we avoid issues when people vendor
-    # this file instad of using it directly from scikit-learn.
+    # this file instead of using it directly from scikit-learn.
     _type = "metadata_router"
 
     def __init__(self, owner):

Original file line number	Diff line number	Diff line change
`@@ -162,7 +162,7 @@`
`162`	`162`	# `alpha` increases, the highest coefficients shrink more rapidly, rendering
`163`	`163`	`# their corresponding features less influential in the training process. This`
`164`	`164`	`# can enhance a model's ability to generalize to unseen data (if there was a lot`
`165`		`-# of noise to capture), but it also poses the risk of loosing performance if the`
	`165`	`+# of noise to capture), but it also poses the risk of losing performance if the`
`166`	`166`	`# regularization becomes too strong compared to the amount of noise the data`
`167`	`167`	`# contained (as in this example).`
`168`	`168`	`#`