|
| 1 | +# ruff: noqa: CPY001 |
| 2 | +""" |
| 3 | +======================================= |
| 4 | +Release Highlights for scikit-learn 1.7 |
| 5 | +======================================= |
| 6 | +
|
| 7 | +.. currentmodule:: sklearn |
| 8 | +
|
| 9 | +We are pleased to announce the release of scikit-learn 1.7! Many bug fixes |
| 10 | +and improvements were added, as well as some key new features. Below we |
| 11 | +detail the highlights of this release. **For an exhaustive list of |
| 12 | +all the changes**, please refer to the :ref:`release notes <release_notes_1_7>`. |
| 13 | +
|
| 14 | +To install the latest version (with pip):: |
| 15 | +
|
| 16 | + pip install --upgrade scikit-learn |
| 17 | +
|
| 18 | +or with conda:: |
| 19 | +
|
| 20 | + conda install -c conda-forge scikit-learn |
| 21 | +
|
| 22 | +""" |
| 23 | + |
| 24 | +# %% |
| 25 | +# Improved estimator's HTML representation |
| 26 | +# ---------------------------------------- |
| 27 | +# The HTML representation of estimators now includes a section containing the list of |
| 28 | +# parameters and their values. Non-default parameters are highlighted in orange. A copy |
| 29 | +# button is also available to copy the "fully-qualified" parameter name without the |
| 30 | +# need to call the `get_params` method. It is particularly useful when defining a |
| 31 | +# parameter grid for a grid-search or a randomized-search with a complex pipeline. |
| 32 | +# |
| 33 | +# See the example below and click on the different estimator's blocks to see the |
| 34 | +# improved HTML representation. |
| 35 | + |
| 36 | +from sklearn.linear_model import LogisticRegression |
| 37 | +from sklearn.pipeline import make_pipeline |
| 38 | +from sklearn.preprocessing import StandardScaler |
| 39 | + |
| 40 | +model = make_pipeline(StandardScaler(with_std=False), LogisticRegression(C=2.0)) |
| 41 | +model |
| 42 | + |
| 43 | +# %% |
| 44 | +# Custom validation set for histogram-based Gradient Boosting estimators |
| 45 | +# ---------------------------------------------------------------------- |
| 46 | +# The :class:`ensemble.HistGradientBoostingClassifier` and |
| 47 | +# :class:`ensemble.HistGradientBoostingRegressor` now support directly passing a custom |
| 48 | +# validation set for early stopping to the `fit` method, using the `X_val`, `y_val`, and |
| 49 | +# `sample_weight_val` parameters. |
| 50 | +# In a :class:`pipeline.Pipeline`, the validation set `X_val` can be transformed along |
| 51 | +# with `X` using the `transform_input` parameter. |
| 52 | + |
| 53 | +import sklearn |
| 54 | +from sklearn.datasets import make_classification |
| 55 | +from sklearn.ensemble import HistGradientBoostingClassifier |
| 56 | +from sklearn.model_selection import train_test_split |
| 57 | +from sklearn.pipeline import Pipeline |
| 58 | +from sklearn.preprocessing import StandardScaler |
| 59 | + |
| 60 | +sklearn.set_config(enable_metadata_routing=True) |
| 61 | + |
| 62 | +X, y = make_classification(random_state=0) |
| 63 | +X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0) |
| 64 | + |
| 65 | +clf = HistGradientBoostingClassifier() |
| 66 | +clf.set_fit_request(X_val=True, y_val=True) |
| 67 | + |
| 68 | +model = Pipeline([("sc", StandardScaler()), ("clf", clf)], transform_input=["X_val"]) |
| 69 | +model.fit(X, y, X_val=X_val, y_val=y_val) |
| 70 | + |
| 71 | +# %% |
| 72 | +# Plotting ROC curves from cross-validation results |
| 73 | +# ------------------------------------------------- |
| 74 | +# The class :class:`metrics.RocCurveDisplay` has a new class method `from_cv_results` |
| 75 | +# that allows to easily plot multiple ROC curves from the results of |
| 76 | +# :func:`model_selection.cross_validate`. |
| 77 | + |
| 78 | +from sklearn.datasets import make_classification |
| 79 | +from sklearn.linear_model import LogisticRegression |
| 80 | +from sklearn.metrics import RocCurveDisplay |
| 81 | +from sklearn.model_selection import cross_validate |
| 82 | + |
| 83 | +X, y = make_classification(n_samples=150, random_state=0) |
| 84 | +clf = LogisticRegression(random_state=0) |
| 85 | +cv_results = cross_validate(clf, X, y, cv=5, return_estimator=True, return_indices=True) |
| 86 | +_ = RocCurveDisplay.from_cv_results(cv_results, X, y) |
| 87 | + |
| 88 | +# %% |
| 89 | +# Array API support |
| 90 | +# ----------------- |
| 91 | +# Several functions have been updated to support array API compatible inputs since |
| 92 | +# version 1.6, especially metrics from the :mod:`sklearn.metrics` module. |
| 93 | +# |
| 94 | +# In addition, it is no longer required to install the `array-api-compat` package to use |
| 95 | +# the experimental array API support in scikit-learn. |
| 96 | +# |
| 97 | +# Please refer to the :ref:`array API support<array_api>` page for instructions to use |
| 98 | +# scikit-learn with array API compatible libraries such as PyTorch or CuPy. |
| 99 | + |
| 100 | +# %% |
| 101 | +# Improved API consistency of Multi-layer Perceptron |
| 102 | +# -------------------------------------------------- |
| 103 | +# The :class:`neural_network.MLPRegressor` has a new parameter `loss` and now supports |
| 104 | +# the "poisson" loss in addition to the default "squared_error" loss. |
| 105 | +# Moreover, the :class:`neural_network.MLPClassifier` and |
| 106 | +# :class:`neural_network.MLPRegressor` estimators now support sample weights. |
| 107 | +# These improvements have been made to improve the consistency of these estimators |
| 108 | +# with regard to the other estimators in scikit-learn. |
| 109 | + |
| 110 | +# %% |
| 111 | +# Migration toward sparse arrays |
| 112 | +# ------------------------------ |
| 113 | +# In order to prepare `SciPy migration from sparse matrices to sparse arrays <https://docs.scipy.org/doc/scipy/reference/sparse.migration_to_sparray.html>`_, |
| 114 | +# all scikit-learn estimators that accept sparse matrices as input now also accept |
| 115 | +# sparse arrays. |
0 commit comments