Skip to content

Commit cc035d0

Browse files
committed
Fix regression error
Signed-off-by: Adam Li <adam2392@gmail.com>
1 parent 4bc651d commit cc035d0

File tree

4 files changed

+20
-11
lines changed

4 files changed

+20
-11
lines changed

sklearn/ensemble/_forest.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,28 @@ class calls the ``fit`` method of each sub-estimator on random samples
4040
# License: BSD 3 clause
4141

4242

43-
from time import time
4443
import threading
4544
from abc import ABCMeta, abstractmethod
4645
from numbers import Integral, Real
46+
from time import time
4747
from warnings import catch_warnings, simplefilter, warn
4848

4949
import numpy as np
5050
from scipy.sparse import hstack as sparse_hstack
5151
from scipy.sparse import issparse
5252

53-
from sklearn.base import is_classifier, _fit_context
5453
from sklearn.base import (
5554
ClassifierMixin,
5655
MultiOutputMixin,
5756
RegressorMixin,
5857
TransformerMixin,
58+
_fit_context,
59+
is_classifier,
5960
)
60-
61-
from sklearn.metrics import accuracy_score, r2_score
62-
from sklearn.preprocessing import OneHotEncoder
61+
from sklearn.ensemble._base import BaseEnsemble, _partition_estimators
62+
from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
6363
from sklearn.exceptions import DataConversionWarning
64+
from sklearn.metrics import accuracy_score, r2_score
6465
from sklearn.preprocessing import OneHotEncoder
6566
from sklearn.tree import (
6667
BaseDecisionTree,
@@ -69,8 +70,8 @@ class calls the ``fit`` method of each sub-estimator on random samples
6970
ExtraTreeClassifier,
7071
ExtraTreeRegressor,
7172
)
72-
from ..tree._tree import DOUBLE, DTYPE
7373
from sklearn.utils import check_random_state, compute_sample_weight
74+
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
7475
from sklearn.utils._param_validation import Interval, RealNotInt, StrOptions
7576
from sklearn.utils.multiclass import check_classification_targets, type_of_target
7677
from sklearn.utils.parallel import Parallel, delayed
@@ -80,9 +81,8 @@ class calls the ``fit`` method of each sub-estimator on random samples
8081
_num_samples,
8182
check_is_fitted,
8283
)
83-
from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
84-
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
85-
from sklearn.ensemble._base import BaseEnsemble, _partition_estimators
84+
85+
from ..tree._tree import DOUBLE, DTYPE
8686

8787
__all__ = [
8888
"RandomForestClassifier",

sklearn/tree/_criterion.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1496,10 +1496,11 @@ cdef class Poisson(RegressionCriterion):
14961496
"""Half Poisson deviance as impurity criterion.
14971497
14981498
Poisson deviance = 2/n * sum(y_true * log(y_true/y_pred) + y_pred - y_true)
1499-
1499+
15001500
Note that the deviance is >= 0, and since we have `y_pred = mean(y_true)`
15011501
at the leaves, one always has `sum(y_pred - y_true) = 0`. It remains the
15021502
implemented impurity (factor 2 is skipped):
1503+
15031504
1/n * sum(y_true * log(y_true/y_pred)
15041505
"""
15051506
# FIXME in 1.0:

sklearn/tree/_splitter.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,12 @@ cdef inline int node_split_best(
507507
current_split.pos = p
508508

509509
# Reject if min_samples_leaf is not guaranteed
510+
if missing_go_to_left:
511+
n_left = current_split.pos - splitter.start + n_missing
512+
n_right = end_non_missing - current_split.pos
513+
else:
514+
n_left = current_split.pos - splitter.start
515+
n_right = end_non_missing - current_split.pos + n_missing
510516
if splitter.check_presplit_conditions(current_split, n_missing, missing_go_to_left) == 1:
511517
continue
512518

sklearn/tree/tests/test_tree.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2426,7 +2426,9 @@ def test_missing_values_on_equal_nodes_no_missing(criterion):
24262426
X = np.array([[0, 1, 2, 3, 8, 9, 11, 12, 15]]).T
24272427
y = np.array([0.1, 0.2, 0.3, 0.2, 1.4, 1.4, 1.5, 1.6, 2.6])
24282428

2429-
dtc = DecisionTreeRegressor(random_state=42, max_depth=1, criterion=criterion, store_leaf_values=True)
2429+
dtc = DecisionTreeRegressor(
2430+
random_state=42, max_depth=1, criterion=criterion, store_leaf_values=True
2431+
)
24302432
dtc.fit(X, y)
24312433

24322434
# Goes to right node because it has the most data points

0 commit comments

Comments
 (0)