@@ -725,7 +725,7 @@ def predict_quantiles(self, X, quantiles=0.5, method="nearest"):
725
725
----------
726
726
X : {array-like, sparse matrix} of shape (n_samples, n_features)
727
727
Input data.
728
- quantiles : float, optional
728
+ quantiles : array-like, float, optional
729
729
The quantiles at which to evaluate, by default 0.5 (median).
730
730
method : str, optional
731
731
The method to interpolate, by default 'linear'. Can be any keyword
@@ -746,7 +746,7 @@ def predict_quantiles(self, X, quantiles=0.5, method="nearest"):
746
746
X = self ._validate_X_predict (X )
747
747
748
748
if not isinstance (quantiles , (np .ndarray , list )):
749
- quantiles = np .array ([ quantiles ] )
749
+ quantiles = np .atleast_1d ( np . array (quantiles ) )
750
750
751
751
# if we trained a binning tree, then we should re-bin the data
752
752
# XXX: this is inefficient and should be improved to be in line with what
@@ -777,15 +777,15 @@ def predict_quantiles(self, X, quantiles=0.5, method="nearest"):
777
777
778
778
# (n_total_leaf_samples, n_outputs)
779
779
leaf_node_samples = np .vstack (
780
- (
780
+ [
781
781
est .leaf_nodes_samples_ [leaf_nodes [jdx ]]
782
782
for jdx , est in enumerate (self .estimators_ )
783
- )
783
+ ]
784
784
)
785
785
786
786
# get quantiles across all leaf node samples
787
787
y_hat [idx , ...] = np .quantile (
788
- leaf_node_samples , quantiles , axis = 0 , interpolation = method
788
+ leaf_node_samples , quantiles , axis = 0 , method = method
789
789
)
790
790
791
791
if is_classifier (self ):
@@ -1550,6 +1550,17 @@ class RandomForestClassifier(ForestClassifier):
1550
1550
1551
1551
.. versionadded:: 1.4
1552
1552
1553
+ categorical : array-like or str
1554
+ Array of feature indices, boolean array of length n_features,
1555
+ ``'all'`` or `None`. Indicates which features should be
1556
+ considered as categorical rather than ordinal. For decision trees,
1557
+ the maximum number of categories is 64. In practice, the limit will
1558
+ often be lower because the process of searching for the best possible
1559
+ split grows exponentially with the number of categories. However, a
1560
+ shortcut due to Breiman (1984) is used when fitting data with binary
1561
+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
1562
+ the runtime is linear in the number of categories.
1563
+
1553
1564
Attributes
1554
1565
----------
1555
1566
estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier`
@@ -1693,6 +1704,7 @@ def __init__(
1693
1704
max_bins = None ,
1694
1705
store_leaf_values = False ,
1695
1706
monotonic_cst = None ,
1707
+ categorical = None ,
1696
1708
):
1697
1709
super ().__init__ (
1698
1710
estimator = DecisionTreeClassifier (),
@@ -1710,6 +1722,7 @@ def __init__(
1710
1722
"ccp_alpha" ,
1711
1723
"store_leaf_values" ,
1712
1724
"monotonic_cst" ,
1725
+ "categorical" ,
1713
1726
),
1714
1727
bootstrap = bootstrap ,
1715
1728
oob_score = oob_score ,
@@ -1733,6 +1746,7 @@ def __init__(
1733
1746
self .min_impurity_decrease = min_impurity_decrease
1734
1747
self .monotonic_cst = monotonic_cst
1735
1748
self .ccp_alpha = ccp_alpha
1749
+ self .categorical = categorical
1736
1750
1737
1751
1738
1752
class RandomForestRegressor (ForestRegressor ):
@@ -1935,6 +1949,17 @@ class RandomForestRegressor(ForestRegressor):
1935
1949
1936
1950
.. versionadded:: 1.4
1937
1951
1952
+ categorical : array-like or str
1953
+ Array of feature indices, boolean array of length n_features,
1954
+ ``'all'`` or `None`. Indicates which features should be
1955
+ considered as categorical rather than ordinal. For decision trees,
1956
+ the maximum number of categories is 64. In practice, the limit will
1957
+ often be lower because the process of searching for the best possible
1958
+ split grows exponentially with the number of categories. However, a
1959
+ shortcut due to Breiman (1984) is used when fitting data with binary
1960
+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
1961
+ the runtime is linear in the number of categories.
1962
+
1938
1963
Attributes
1939
1964
----------
1940
1965
estimator_ : :class:`~sklearn.tree.DecisionTreeRegressor`
@@ -2065,6 +2090,7 @@ def __init__(
2065
2090
max_bins = None ,
2066
2091
store_leaf_values = False ,
2067
2092
monotonic_cst = None ,
2093
+ categorical = None ,
2068
2094
):
2069
2095
super ().__init__ (
2070
2096
estimator = DecisionTreeRegressor (),
@@ -2082,6 +2108,7 @@ def __init__(
2082
2108
"ccp_alpha" ,
2083
2109
"store_leaf_values" ,
2084
2110
"monotonic_cst" ,
2111
+ "categorical" ,
2085
2112
),
2086
2113
bootstrap = bootstrap ,
2087
2114
oob_score = oob_score ,
@@ -2104,6 +2131,7 @@ def __init__(
2104
2131
self .min_impurity_decrease = min_impurity_decrease
2105
2132
self .ccp_alpha = ccp_alpha
2106
2133
self .monotonic_cst = monotonic_cst
2134
+ self .categorical = categorical
2107
2135
2108
2136
2109
2137
class ExtraTreesClassifier (ForestClassifier ):
@@ -2316,24 +2344,16 @@ class ExtraTreesClassifier(ForestClassifier):
2316
2344
2317
2345
.. versionadded:: 1.4
2318
2346
2319
- monotonic_cst : array-like of int of shape (n_features), default=None
2320
- Indicates the monotonicity constraint to enforce on each feature.
2321
- - 1: monotonically increasing
2322
- - 0: no constraint
2323
- - -1: monotonically decreasing
2324
-
2325
- If monotonic_cst is None, no constraints are applied.
2326
-
2327
- Monotonicity constraints are not supported for:
2328
- - multiclass classifications (i.e. when `n_classes > 2`),
2329
- - multioutput classifications (i.e. when `n_outputs_ > 1`),
2330
- - classifications trained on data with missing values.
2331
-
2332
- The constraints hold over the probability of the positive class.
2333
-
2334
- Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
2335
-
2336
- .. versionadded:: 1.4
2347
+ categorical : array-like or str
2348
+ Array of feature indices, boolean array of length n_features,
2349
+ ``'all'`` or `None`. Indicates which features should be
2350
+ considered as categorical rather than ordinal. For decision trees,
2351
+ the maximum number of categories is 64. In practice, the limit will
2352
+ often be lower because the process of searching for the best possible
2353
+ split grows exponentially with the number of categories. However, a
2354
+ shortcut due to Breiman (1984) is used when fitting data with binary
2355
+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
2356
+ the runtime is linear in the number of categories.
2337
2357
2338
2358
Attributes
2339
2359
----------
@@ -2467,6 +2487,7 @@ def __init__(
2467
2487
max_bins = None ,
2468
2488
store_leaf_values = False ,
2469
2489
monotonic_cst = None ,
2490
+ categorical = None ,
2470
2491
):
2471
2492
super ().__init__ (
2472
2493
estimator = ExtraTreeClassifier (),
@@ -2484,6 +2505,7 @@ def __init__(
2484
2505
"ccp_alpha" ,
2485
2506
"store_leaf_values" ,
2486
2507
"monotonic_cst" ,
2508
+ "categorical" ,
2487
2509
),
2488
2510
bootstrap = bootstrap ,
2489
2511
oob_score = oob_score ,
@@ -2507,6 +2529,7 @@ def __init__(
2507
2529
self .min_impurity_decrease = min_impurity_decrease
2508
2530
self .ccp_alpha = ccp_alpha
2509
2531
self .monotonic_cst = monotonic_cst
2532
+ self .categorical = categorical
2510
2533
2511
2534
2512
2535
class ExtraTreesRegressor (ForestRegressor ):
@@ -2704,6 +2727,17 @@ class ExtraTreesRegressor(ForestRegressor):
2704
2727
2705
2728
.. versionadded:: 1.4
2706
2729
2730
+ categorical : array-like or str
2731
+ Array of feature indices, boolean array of length n_features,
2732
+ ``'all'`` or `None`. Indicates which features should be
2733
+ considered as categorical rather than ordinal. For decision trees,
2734
+ the maximum number of categories is 64. In practice, the limit will
2735
+ often be lower because the process of searching for the best possible
2736
+ split grows exponentially with the number of categories. However, a
2737
+ shortcut due to Breiman (1984) is used when fitting data with binary
2738
+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
2739
+ the runtime is linear in the number of categories.
2740
+
2707
2741
Attributes
2708
2742
----------
2709
2743
estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor`
@@ -2819,6 +2853,7 @@ def __init__(
2819
2853
max_bins = None ,
2820
2854
store_leaf_values = False ,
2821
2855
monotonic_cst = None ,
2856
+ categorical = None ,
2822
2857
):
2823
2858
super ().__init__ (
2824
2859
estimator = ExtraTreeRegressor (),
@@ -2836,6 +2871,7 @@ def __init__(
2836
2871
"ccp_alpha" ,
2837
2872
"store_leaf_values" ,
2838
2873
"monotonic_cst" ,
2874
+ "categorical" ,
2839
2875
),
2840
2876
bootstrap = bootstrap ,
2841
2877
oob_score = oob_score ,
@@ -2858,6 +2894,7 @@ def __init__(
2858
2894
self .min_impurity_decrease = min_impurity_decrease
2859
2895
self .ccp_alpha = ccp_alpha
2860
2896
self .monotonic_cst = monotonic_cst
2897
+ self .categorical = categorical
2861
2898
2862
2899
2863
2900
class RandomTreesEmbedding (TransformerMixin , BaseForest ):
@@ -2969,6 +3006,17 @@ class RandomTreesEmbedding(TransformerMixin, BaseForest):
2969
3006
new forest. See :term:`Glossary <warm_start>` and
2970
3007
:ref:`gradient_boosting_warm_start` for details.
2971
3008
3009
+ categorical : array-like or str
3010
+ Array of feature indices, boolean array of length n_features,
3011
+ ``'all'`` or `None`. Indicates which features should be
3012
+ considered as categorical rather than ordinal. For decision trees,
3013
+ the maximum number of categories is 64. In practice, the limit will
3014
+ often be lower because the process of searching for the best possible
3015
+ split grows exponentially with the number of categories. However, a
3016
+ shortcut due to Breiman (1984) is used when fitting data with binary
3017
+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
3018
+ the runtime is linear in the number of categories.
3019
+
2972
3020
Attributes
2973
3021
----------
2974
3022
estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance
@@ -3073,6 +3121,7 @@ def __init__(
3073
3121
verbose = 0 ,
3074
3122
warm_start = False ,
3075
3123
store_leaf_values = False ,
3124
+ categorical = None ,
3076
3125
):
3077
3126
super ().__init__ (
3078
3127
estimator = ExtraTreeRegressor (),
@@ -3088,6 +3137,7 @@ def __init__(
3088
3137
"min_impurity_decrease" ,
3089
3138
"random_state" ,
3090
3139
"store_leaf_values" ,
3140
+ "categorical" ,
3091
3141
),
3092
3142
bootstrap = False ,
3093
3143
oob_score = False ,
@@ -3106,6 +3156,7 @@ def __init__(
3106
3156
self .max_leaf_nodes = max_leaf_nodes
3107
3157
self .min_impurity_decrease = min_impurity_decrease
3108
3158
self .sparse_output = sparse_output
3159
+ self .categorical = categorical
3109
3160
3110
3161
def _set_oob_score_and_attributes (self , X , y , scoring_function = None ):
3111
3162
raise NotImplementedError ("OOB score not supported by tree embedding" )
0 commit comments