@@ -455,6 +455,10 @@ def _prep_data(
455
455
)
456
456
457
457
458
+ # The existing implementation of _fit was almost nothing but data prep and
459
+ # state initialization, followed by a call to _build_tree. This made it
460
+ # impossible to tweak _fit ever so slightly without duplicating a lot of
461
+ # code. So we've modularized it a bit.
458
462
def _fit (
459
463
self ,
460
464
X ,
@@ -473,6 +477,11 @@ def _fit(
473
477
classes = classes
474
478
)
475
479
480
+ # Criterion can't be created until we do the class distribution analysis
481
+ # in _prep_data, so we have to create it here, and best to do it as a
482
+ # factory which can be overridden if necessary. This used to be in
483
+ # _build_tree, but that is the wrong place to commit to a particular
484
+ # implementation; it should be passed in as a parameter.
476
485
criterion = BaseDecisionTree ._create_criterion (
477
486
self ,
478
487
n_outputs = bta .y .shape [1 ],
@@ -559,20 +568,6 @@ def _build_tree(
559
568
"""
560
569
n_samples = X .shape [0 ]
561
570
562
- # Build tree
563
- # criterion = self.criterion
564
- # if not isinstance(criterion, BaseCriterion):
565
- # if is_classifier(self):
566
- # criterion = CRITERIA_CLF[self.criterion](
567
- # self.n_outputs_, self.n_classes_
568
- # )
569
- # else:
570
- # criterion = CRITERIA_REG[self.criterion](self.n_outputs_, n_samples)
571
- # else:
572
- # # Make a deepcopy in case the criterion has mutable attributes that
573
- # # might be shared and modified concurrently during parallel fitting
574
- # criterion = copy.deepcopy(criterion)
575
-
576
571
SPLITTERS = SPARSE_SPLITTERS if issparse (X ) else DENSE_SPLITTERS
577
572
578
573
if self .monotonic_cst is None :
0 commit comments