@@ -191,8 +191,7 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
191
191
affect model performance.
192
192
193
193
For a comparison of the different scalers, transformers, and normalizers,
194
- see :ref:`examples/preprocessing/plot_all_scaling.py
195
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
194
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
196
195
197
196
.. warning:: Risk of data leak
198
197
@@ -294,6 +293,12 @@ class MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
294
293
This transformation is often used as an alternative to zero mean,
295
294
unit variance scaling.
296
295
296
+ `MinMaxScaler` doesn't reduce the effect of outliers, but it linearily
297
+ scales them down into a fixed range, where the largest occuring data point
298
+ corresponds to the maximum value and the smallest one corresponds to the
299
+ minimum value. For an example visualization, refer to :ref:`Compare
300
+ MinMaxScaler with other scalers <plot_all_scaling_minmax_scaler_section>`.
301
+
297
302
Read more in the :ref:`User Guide <preprocessing_scaler>`.
298
303
299
304
Parameters
@@ -367,10 +372,6 @@ class MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
367
372
NaNs are treated as missing values: disregarded in fit, and maintained in
368
373
transform.
369
374
370
- For a comparison of the different scalers, transformers, and normalizers,
371
- see :ref:`examples/preprocessing/plot_all_scaling.py
372
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
373
-
374
375
Examples
375
376
--------
376
377
>>> from sklearn.preprocessing import MinMaxScaler
@@ -641,8 +642,7 @@ def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):
641
642
Notes
642
643
-----
643
644
For a comparison of the different scalers, transformers, and normalizers,
644
- see :ref:`examples/preprocessing/plot_all_scaling.py
645
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
645
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
646
646
"""
647
647
# Unlike the scaler object, this function allows 1d input.
648
648
# If copy is required, it will be done inside the scaler object.
@@ -695,6 +695,11 @@ class StandardScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
695
695
than others, it might dominate the objective function and make the
696
696
estimator unable to learn from other features correctly as expected.
697
697
698
+ `StandardScaler` is sensitive to outliers, and the features may scale
699
+ differently from each other in the presence of outliers. For an example
700
+ visualization, refer to :ref:`Compare StandardScaler with other scalers
701
+ <plot_all_scaling_standard_scaler_section>`.
702
+
698
703
This scaler can also be applied to sparse CSR or CSC matrices by passing
699
704
`with_mean=False` to avoid breaking the sparsity structure of the data.
700
705
@@ -776,10 +781,6 @@ class StandardScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
776
781
`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to
777
782
affect model performance.
778
783
779
- For a comparison of the different scalers, transformers, and normalizers,
780
- see :ref:`examples/preprocessing/plot_all_scaling.py
781
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
782
-
783
784
Examples
784
785
--------
785
786
>>> from sklearn.preprocessing import StandardScaler
@@ -1093,6 +1094,10 @@ class MaxAbsScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
1093
1094
1094
1095
This scaler can also be applied to sparse CSR or CSC matrices.
1095
1096
1097
+ `MaxAbsScaler` doesn't reduce the effect of outliers; it only linearily
1098
+ scales them down. For an example visualization, refer to :ref:`Compare
1099
+ MaxAbsScaler with other scalers <plot_all_scaling_max_abs_scaler_section>`.
1100
+
1096
1101
.. versionadded:: 0.17
1097
1102
1098
1103
Parameters
@@ -1136,10 +1141,6 @@ class MaxAbsScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
1136
1141
NaNs are treated as missing values: disregarded in fit, and maintained in
1137
1142
transform.
1138
1143
1139
- For a comparison of the different scalers, transformers, and normalizers,
1140
- see :ref:`examples/preprocessing/plot_all_scaling.py
1141
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1142
-
1143
1144
Examples
1144
1145
--------
1145
1146
>>> from sklearn.preprocessing import MaxAbsScaler
@@ -1367,8 +1368,7 @@ def maxabs_scale(X, *, axis=0, copy=True):
1367
1368
and maintained during the data transformation.
1368
1369
1369
1370
For a comparison of the different scalers, transformers, and normalizers,
1370
- see :ref:`examples/preprocessing/plot_all_scaling.py
1371
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1371
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
1372
1372
"""
1373
1373
# Unlike the scaler object, this function allows 1d input.
1374
1374
@@ -1411,11 +1411,13 @@ class RobustScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
1411
1411
set. Median and interquartile range are then stored to be used on
1412
1412
later data using the :meth:`transform` method.
1413
1413
1414
- Standardization of a dataset is a common requirement for many
1415
- machine learning estimators. Typically this is done by removing the mean
1416
- and scaling to unit variance. However, outliers can often influence the
1417
- sample mean / variance in a negative way. In such cases, the median and
1418
- the interquartile range often give better results.
1414
+ Standardization of a dataset is a common preprocessing for many machine
1415
+ learning estimators. Typically this is done by removing the mean and
1416
+ scaling to unit variance. However, outliers can often influence the sample
1417
+ mean / variance in a negative way. In such cases, using the median and the
1418
+ interquartile range often give better results. For an example visualization
1419
+ and comparison to other scalers, refer to :ref:`Compare RobustScaler with
1420
+ other scalers <plot_all_scaling_robust_scaler_section>`.
1419
1421
1420
1422
.. versionadded:: 0.17
1421
1423
@@ -1486,9 +1488,6 @@ class RobustScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
1486
1488
1487
1489
Notes
1488
1490
-----
1489
- For a comparison of the different scalers, transformers, and normalizers,
1490
- see :ref:`examples/preprocessing/plot_all_scaling.py
1491
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1492
1491
1493
1492
https://en.wikipedia.org/wiki/Median
1494
1493
https://en.wikipedia.org/wiki/Interquartile_range
@@ -1751,8 +1750,7 @@ def robust_scale(
1751
1750
To avoid memory copy the caller should pass a CSR matrix.
1752
1751
1753
1752
For a comparison of the different scalers, transformers, and normalizers,
1754
- see :ref:`examples/preprocessing/plot_all_scaling.py
1755
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1753
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
1756
1754
1757
1755
.. warning:: Risk of data leak
1758
1756
@@ -1853,8 +1851,7 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
1853
1851
Notes
1854
1852
-----
1855
1853
For a comparison of the different scalers, transformers, and normalizers,
1856
- see :ref:`examples/preprocessing/plot_all_scaling.py
1857
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1854
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
1858
1855
"""
1859
1856
if axis == 0 :
1860
1857
sparse_format = "csc"
@@ -1924,6 +1921,9 @@ class Normalizer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
1924
1921
of the vectors and is the base similarity metric for the Vector
1925
1922
Space Model commonly used by the Information Retrieval community.
1926
1923
1924
+ For an example visualization, refer to :ref:`Compare Normalizer with other
1925
+ scalers <plot_all_scaling_normalizer_section>`.
1926
+
1927
1927
Read more in the :ref:`User Guide <preprocessing_normalization>`.
1928
1928
1929
1929
Parameters
@@ -1962,10 +1962,6 @@ class Normalizer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
1962
1962
:meth:`transform`, as parameter validation is only performed in
1963
1963
:meth:`fit`.
1964
1964
1965
- For a comparison of the different scalers, transformers, and normalizers,
1966
- see :ref:`examples/preprocessing/plot_all_scaling.py
1967
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1968
-
1969
1965
Examples
1970
1966
--------
1971
1967
>>> from sklearn.preprocessing import Normalizer
@@ -2459,6 +2455,9 @@ class QuantileTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator)
2459
2455
correlations between variables measured at the same scale but renders
2460
2456
variables measured at different scales more directly comparable.
2461
2457
2458
+ For example visualizations, refer to :ref:`Compare QuantileTransformer with
2459
+ other scalers <plot_all_scaling_quantile_transformer_section>`.
2460
+
2462
2461
Read more in the :ref:`User Guide <preprocessing_transformer>`.
2463
2462
2464
2463
.. versionadded:: 0.19
@@ -2536,10 +2535,6 @@ class QuantileTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator)
2536
2535
NaNs are treated as missing values: disregarded in fit, and maintained in
2537
2536
transform.
2538
2537
2539
- For a comparison of the different scalers, transformers, and normalizers,
2540
- see :ref:`examples/preprocessing/plot_all_scaling.py
2541
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
2542
-
2543
2538
Examples
2544
2539
--------
2545
2540
>>> import numpy as np
@@ -2988,8 +2983,7 @@ def quantile_transform(
2988
2983
LogisticRegression())`.
2989
2984
2990
2985
For a comparison of the different scalers, transformers, and normalizers,
2991
- see :ref:`examples/preprocessing/plot_all_scaling.py
2992
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
2986
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
2993
2987
2994
2988
Examples
2995
2989
--------
@@ -3033,6 +3027,12 @@ class PowerTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
3033
3027
By default, zero-mean, unit-variance normalization is applied to the
3034
3028
transformed data.
3035
3029
3030
+ For an example visualization, refer to :ref:`Compare PowerTransformer with
3031
+ other scalers <plot_all_scaling_power_transformer_section>`. To see the
3032
+ effect of Box-Cox and Yeo-Johnson transformations on different
3033
+ distributions, see:
3034
+ :ref:`sphx_glr_auto_examples_preprocessing_plot_map_data_to_normal.py`.
3035
+
3036
3036
Read more in the :ref:`User Guide <preprocessing_transformer>`.
3037
3037
3038
3038
.. versionadded:: 0.20
@@ -3080,10 +3080,6 @@ class PowerTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
3080
3080
NaNs are treated as missing values: disregarded in ``fit``, and maintained
3081
3081
in ``transform``.
3082
3082
3083
- For a comparison of the different scalers, transformers, and normalizers,
3084
- see :ref:`examples/preprocessing/plot_all_scaling.py
3085
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
3086
-
3087
3083
References
3088
3084
----------
3089
3085
@@ -3500,8 +3496,7 @@ def power_transform(X, method="yeo-johnson", *, standardize=True, copy=True):
3500
3496
in ``transform``.
3501
3497
3502
3498
For a comparison of the different scalers, transformers, and normalizers,
3503
- see :ref:`examples/preprocessing/plot_all_scaling.py
3504
- <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
3499
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
3505
3500
3506
3501
References
3507
3502
----------
0 commit comments