Skip to content

Commit 512f34c

Browse files
authored
Merge branch 'scikit-learn:main' into submodulev2
2 parents 4840d4e + d2b9c80 commit 512f34c

File tree

20 files changed

+407
-64
lines changed

20 files changed

+407
-64
lines changed

build_tools/circle/doc_min_dependencies_environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ dependencies:
2323
- memory_profiler
2424
- compilers
2525
- sphinx=6.0.0 # min
26-
- sphinx-gallery=0.7.0 # min
26+
- sphinx-gallery=0.10.1 # min
2727
- sphinx-copybutton=0.5.2 # min
2828
- numpydoc=1.2.0 # min
2929
- sphinx-prompt=1.3.0 # min

build_tools/circle/doc_min_dependencies_linux-64_conda.lock

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Generated by conda-lock.
22
# platform: linux-64
3-
# input_hash: bd03405f744d4d5d0c59edd290d1e4245873b8593fcd3bbc3efdf8654a283161
3+
# input_hash: d465abb23248872aed411e2f2f7293a661f0b783f1a84420ffa5431229814cab
44
@EXPLICIT
55
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
66
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
@@ -100,7 +100,7 @@ https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py38h01eb140_0.
100100
https://conda.anaconda.org/conda-forge/noarch/networkx-3.1-pyhd8ed1ab_0.conda#254f787d5068bc89f578bf63893ce8b4
101101
https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
102102
https://conda.anaconda.org/conda-forge/linux-64/pillow-9.4.0-py38hde6dc18_1.conda#3de5619d3f556f966189e5251a266125
103-
https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
103+
https://conda.anaconda.org/conda-forge/noarch/pluggy-1.2.0-pyhd8ed1ab_0.conda#7263924c642d22e311d9e59b839f1b33
104104
https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py38h1de0b5d_0.conda#92e899e7b0ed27c793014d1fa54f9b7b
105105
https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
106106
https://conda.anaconda.org/conda-forge/noarch/pygments-2.15.1-pyhd8ed1ab_0.conda#d316679235612869eba305aa7d41d9bf
@@ -138,15 +138,15 @@ https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_
138138
https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_0.conda#721dab5803ea92ce02ddc4ee50aa0c48
139139
https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df
140140
https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433
141-
https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.2-pyhd8ed1ab_1.conda#f2465696f4396245eca4613f6e924796
141+
https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
142142
https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
143143
https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
144144
https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.3-pyhd8ed1ab_0.conda#ae465d0fbf9f1979cb2d8d4043d885e2
145145
https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.14.5-h0935bb2_2.tar.bz2#eb125ee86480e00a4a1ed45a577c3311
146146
https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.7.0-hd8ed1ab_0.conda#27a4cec373ec84d1c1aa02a1e37f8eaf
147147
https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-20_mkl.tar.bz2#8274dc30518af9df1de47f5d9e73165c
148148
https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar.bz2#bc0cbf611fe2f86eab29b98e51404f5e
149-
https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.6.0-pyhd8ed1ab_0.conda#741384b21c1b512617f4ee4ea8457c5d
149+
https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.8.0-pyhd8ed1ab_0.conda#3e4aca765371893ad848397794600632
150150
https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
151151
https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
152152
https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed
@@ -167,7 +167,7 @@ https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.b
167167
https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.16.2-py38hb3f55d8_0.tar.bz2#468b398fefac8884cd6e6513af66549b
168168
https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42
169169
https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995
170-
https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.7.0-py_0.tar.bz2#80bad3f857ecc86a4ab73f3e57addd13
170+
https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.10.1-pyhd8ed1ab_0.tar.bz2#4918585fe5e5341740f7e63c61743efb
171171
https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc
172172
https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.3-py38_0.tar.bz2#1992ab91bbff86ded8d99d1f488d8e8b
173173
https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e

doc/conf.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,15 @@
7272
import jupyterlite_sphinx # noqa: F401
7373

7474
extensions.append("jupyterlite_sphinx")
75+
with_jupyterlite = True
7576
except ImportError:
7677
# In some cases we don't want to require jupyterlite_sphinx to be installed,
7778
# e.g. the doc-min-dependencies build
7879
warnings.warn(
7980
"jupyterlite_sphinx is not installed, you need to install it "
8081
"if you want JupyterLite links to appear in each example"
8182
)
83+
with_jupyterlite = False
8284

8385
# Produce `plot::` directives for examples that contain `import matplotlib` or
8486
# `from matplotlib import`.
@@ -526,13 +528,16 @@ def reset_sklearn_config(gallery_conf, fname):
526528
"dependencies": "./binder/requirements.txt",
527529
"use_jupyter_lab": True,
528530
},
529-
"jupyterlite": {"notebook_modification_function": notebook_modification_function},
530531
# avoid generating too many cross links
531532
"inspect_global_variables": False,
532533
"remove_config_comments": True,
533534
"plot_gallery": "True",
534535
"reset_modules": ("matplotlib", "seaborn", reset_sklearn_config),
535536
}
537+
if with_jupyterlite:
538+
sphinx_gallery_conf["jupyterlite"] = {
539+
"notebook_modification_function": notebook_modification_function
540+
}
536541

537542

538543
# The following dictionary contains the information used to create the

doc/developers/advanced_installation.rst

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ feature, code or documentation improvement).
9090

9191
.. prompt:: bash $
9292

93-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
93+
pip install -v --no-use-pep517 --no-build-isolation -e .
9494

9595
#. Check that the installed scikit-learn has a version number ending with
9696
`.dev0`:
@@ -109,7 +109,9 @@ feature, code or documentation improvement).
109109
(ending in `.pyx` or `.pxd`). This can happen when you edit them or when you
110110
use certain git commands such as `git pull`. Use the ``--no-build-isolation`` flag
111111
to avoid compiling the whole project each time, only the files you have
112-
modified.
112+
modified. Include the ``--no-use-pep517`` flag because the ``--no-build-isolation``
113+
option might not work otherwise (this is due to a bug which will be fixed in the
114+
future).
113115

114116
Dependencies
115117
------------
@@ -242,7 +244,7 @@ Finally, build scikit-learn from this command prompt:
242244

243245
.. prompt:: bash $
244246

245-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
247+
pip install -v --no-use-pep517 --no-build-isolation -e .
246248

247249
.. _compiler_macos:
248250

@@ -284,7 +286,7 @@ scikit-learn from source:
284286
joblib threadpoolctl pytest compilers llvm-openmp
285287
conda activate sklearn-dev
286288
make clean
287-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
289+
pip install -v --no-use-pep517 --no-build-isolation -e .
288290

289291
.. note::
290292

@@ -364,7 +366,7 @@ Finally, build scikit-learn in verbose mode (to check for the presence of the
364366
.. prompt:: bash $
365367

366368
make clean
367-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
369+
pip install -v --no-use-pep517 --no-build-isolation -e .
368370

369371
.. _compiler_linux:
370372

@@ -424,7 +426,7 @@ in the user folder using conda:
424426
conda create -n sklearn-dev -c conda-forge python numpy scipy cython \
425427
joblib threadpoolctl pytest compilers
426428
conda activate sklearn-dev
427-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
429+
pip install -v --no-use-pep517 --no-build-isolation -e .
428430

429431
.. _compiler_freebsd:
430432

@@ -453,7 +455,7 @@ Finally, build the package using the standard command:
453455

454456
.. prompt:: bash $
455457

456-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
458+
pip install -v --no-use-pep517 --no-build-isolation -e .
457459

458460
For the upcoming FreeBSD 12.1 and 11.3 versions, OpenMP will be included in
459461
the base system and these steps will not be necessary.
@@ -514,7 +516,7 @@ and environment variable as follows before calling the ``pip install`` or
514516
``python setup.py build_ext`` commands::
515517

516518
export SKLEARN_BUILD_PARALLEL=3
517-
pip install --verbose --no-use-pep517 --no-build-isolation --editable .
519+
pip install -v --no-use-pep517 --no-build-isolation -e .
518520

519521
On a machine with 2 CPU cores, it can be beneficial to use a parallelism level
520522
of 3 to overlap IO bound tasks (reading and writing files on disk) with CPU

doc/whats_new/v1.3.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,11 @@ Changelog
231231
:user:`Jérémie du Boisberranger <jeremiedbb>`,
232232
:user:`Guillaume Lemaitre <glemaitre>`.
233233

234+
- |Fix| :class:`cluster.KMeans`, :class:`cluster.MiniBatchKMeans` and
235+
:func:`cluster.k_means` now correctly handle the combination of `n_init="auto"`
236+
and `init` being an array-like, running one initialization in that case.
237+
:pr:`26657` by :user:`Binesh Bannerjee <bnsh>`.
238+
234239
- |API| The `sample_weight` parameter in `predict` for
235240
:meth:`cluster.KMeans.predict` and :meth:`cluster.MiniBatchKMeans.predict`
236241
is now deprecated and will be removed in v1.5.
@@ -455,6 +460,11 @@ Changelog
455460
on linearly separable problems.
456461
:pr:`25214` by `Tom Dupre la Tour`_.
457462

463+
- |Fix| Fix a crash when calling `fit` on
464+
:class:`linear_model.LogisticRegression(solver="newton-cholesky", max_iter=0)`
465+
which failed to inspect the state of the model prior to the first parameter update.
466+
:pr:`26653` by :user:`Olivier Grisel <ogrisel>`.
467+
458468
- |API| Deprecates `n_iter` in favor of `max_iter` in
459469
:class:`linear_model.BayesianRidge` and :class:`linear_model.ARDRegression`.
460470
`n_iter` will be removed in scikit-learn 1.5. This change makes those

doc/whats_new/v1.4.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,13 @@ TODO: update at the time of the release.
5858
:meth:`base.OutlierMixin.fit_predict` now accept ``**kwargs`` which are
5959
passed to the ``fit`` method of the the estimator. :pr:`26506` by `Adrin
6060
Jalali`_.
61+
62+
:mod:`sklearn.decomposition`
63+
............................
64+
65+
- |Enhancement| An "auto" option was added to the `n_components` parameter of
66+
:func:`decomposition.non_negative_factorization`, :class:`decomposition.NMF` and
67+
:class:`decomposition.MiniBatchNMF` to automatically infer the number of components from W or H shapes
68+
when using a custom initialization. The default value of this parameter will change
69+
from `None` to `auto` in version 1.6.
70+
:pr:`26634` by :user:`Alexandre Landeau <AlexL>` and :user:`Alexandre Vigny <avigny>`.

sklearn/_min_dependencies.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"pyarrow": ("12.0.0", "tests"),
4545
"sphinx": ("6.0.0", "docs"),
4646
"sphinx-copybutton": ("0.5.2", "docs"),
47-
"sphinx-gallery": ("0.7.0", "docs"),
47+
"sphinx-gallery": ("0.10.1", "docs"),
4848
"numpydoc": ("1.2.0", "docs, tests"),
4949
"Pillow": ("7.1.2", "docs"),
5050
"pooch": ("1.6.0", "docs, examples, tests"),

sklearn/cluster/_kmeans.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,8 @@ def k_means(
354354
n_init consecutive runs in terms of inertia.
355355
356356
When `n_init='auto'`, the number of runs depends on the value of init:
357-
10 if using `init='random'`, 1 if using `init='k-means++'`.
357+
10 if using `init='random'` or `init` is a callable;
358+
1 if using `init='k-means++'` or `init` is an array-like.
358359
359360
.. versionadded:: 1.2
360361
Added 'auto' option for `n_init`.
@@ -884,10 +885,14 @@ def _check_params_vs_input(self, X, default_n_init=None):
884885
)
885886
self._n_init = default_n_init
886887
if self._n_init == "auto":
887-
if self.init == "k-means++":
888+
if isinstance(self.init, str) and self.init == "k-means++":
888889
self._n_init = 1
889-
else:
890+
elif isinstance(self.init, str) and self.init == "random":
891+
self._n_init = default_n_init
892+
elif callable(self.init):
890893
self._n_init = default_n_init
894+
else: # array-like
895+
self._n_init = 1
891896

892897
if _is_arraylike_not_scalar(self.init) and self._n_init != 1:
893898
warnings.warn(
@@ -1241,7 +1246,8 @@ class KMeans(_BaseKMeans):
12411246
high-dimensional problems (see :ref:`kmeans_sparse_high_dim`).
12421247
12431248
When `n_init='auto'`, the number of runs depends on the value of init:
1244-
10 if using `init='random'`, 1 if using `init='k-means++'`.
1249+
10 if using `init='random'` or `init` is a callable;
1250+
1 if using `init='k-means++'` or `init` is an array-like.
12451251
12461252
.. versionadded:: 1.2
12471253
Added 'auto' option for `n_init`.
@@ -1777,7 +1783,8 @@ class MiniBatchKMeans(_BaseKMeans):
17771783
:ref:`kmeans_sparse_high_dim`).
17781784
17791785
When `n_init='auto'`, the number of runs depends on the value of init:
1780-
3 if using `init='random'`, 1 if using `init='k-means++'`.
1786+
3 if using `init='random'` or `init` is a callable;
1787+
1 if using `init='k-means++'` or `init` is an array-like.
17811788
17821789
.. versionadded:: 1.2
17831790
Added 'auto' option for `n_init`.

sklearn/cluster/tests/test_k_means.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,37 @@ def test_minibatch_kmeans_partial_fit_init(init):
348348
_check_fitted_model(km)
349349

350350

351+
@pytest.mark.parametrize(
352+
"init, expected_n_init",
353+
[
354+
("k-means++", 1),
355+
("random", "default"),
356+
(
357+
lambda X, n_clusters, random_state: random_state.uniform(
358+
size=(n_clusters, X.shape[1])
359+
),
360+
"default",
361+
),
362+
("array-like", 1),
363+
],
364+
)
365+
@pytest.mark.parametrize("Estimator", [KMeans, MiniBatchKMeans])
366+
def test_kmeans_init_auto_with_initial_centroids(Estimator, init, expected_n_init):
367+
"""Check that `n_init="auto"` chooses the right number of initializations.
368+
Non-regression test for #26657:
369+
https://github.com/scikit-learn/scikit-learn/pull/26657
370+
"""
371+
n_sample, n_features, n_clusters = 100, 10, 5
372+
X = np.random.randn(n_sample, n_features)
373+
if init == "array-like":
374+
init = np.random.randn(n_clusters, n_features)
375+
if expected_n_init == "default":
376+
expected_n_init = 3 if Estimator is MiniBatchKMeans else 10
377+
378+
kmeans = Estimator(n_clusters=n_clusters, init=init, n_init="auto").fit(X)
379+
assert kmeans._n_init == expected_n_init
380+
381+
351382
@pytest.mark.parametrize("Estimator", [KMeans, MiniBatchKMeans])
352383
def test_fortran_aligned_data(Estimator, global_random_seed):
353384
# Check that KMeans works with fortran-aligned data.

0 commit comments

Comments
 (0)