Skip to content

Commit 15f7cfb

Browse files
authored
CLN Renames missing_values_in_feature_mask (scikit-learn#26580)
1 parent 03ab632 commit 15f7cfb

File tree

5 files changed

+44
-37
lines changed

5 files changed

+44
-37
lines changed

sklearn/tree/_classes.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def get_n_leaves(self):
181181
def _support_missing_values(self, X):
182182
return not issparse(X) and self._get_tags()["allow_nan"]
183183

184-
def _compute_feature_has_missing(self, X):
184+
def _compute_missing_values_in_feature_mask(self, X):
185185
"""Return boolean mask denoting if there are missing values for each feature.
186186
187187
This method also ensures that X is finite.
@@ -193,7 +193,7 @@ def _compute_feature_has_missing(self, X):
193193
194194
Returns
195195
-------
196-
feature_has_missing : ndarray of shape (n_features,), or None
196+
missing_values_in_feature_mask : ndarray of shape (n_features,), or None
197197
Missing value mask. If missing values are not supported or there
198198
are no missing values, return None.
199199
"""
@@ -214,11 +214,16 @@ def _compute_feature_has_missing(self, X):
214214
if not np.isnan(overall_sum):
215215
return None
216216

217-
feature_has_missing = _any_isnan_axis0(X)
218-
return feature_has_missing
217+
missing_values_in_feature_mask = _any_isnan_axis0(X)
218+
return missing_values_in_feature_mask
219219

220220
def _fit(
221-
self, X, y, sample_weight=None, check_input=True, feature_has_missing=None
221+
self,
222+
X,
223+
y,
224+
sample_weight=None,
225+
check_input=True,
226+
missing_values_in_feature_mask=None,
222227
):
223228
random_state = check_random_state(self.random_state)
224229

@@ -227,7 +232,7 @@ def _fit(
227232
# We can't pass multi_output=True because that would allow y to be
228233
# csr.
229234

230-
# _compute_feature_has_missing will check for finite values and
235+
# _compute_missing_values_in_feature_mask will check for finite values and
231236
# compute the missing mask if the tree supports missing values
232237
check_X_params = dict(
233238
dtype=DTYPE, accept_sparse="csc", force_all_finite=False
@@ -237,7 +242,9 @@ def _fit(
237242
X, y, validate_separately=(check_X_params, check_y_params)
238243
)
239244

240-
feature_has_missing = self._compute_feature_has_missing(X)
245+
missing_values_in_feature_mask = (
246+
self._compute_missing_values_in_feature_mask(X)
247+
)
241248
if issparse(X):
242249
X.sort_indices()
243250

@@ -432,7 +439,7 @@ def _fit(
432439
self.min_impurity_decrease,
433440
)
434441

435-
builder.build(self.tree_, X, y, sample_weight, feature_has_missing)
442+
builder.build(self.tree_, X, y, sample_weight, missing_values_in_feature_mask)
436443

437444
if self.n_outputs_ == 1 and is_classifier(self):
438445
self.n_classes_ = self.n_classes_[0]

sklearn/tree/_splitter.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ cdef class Splitter:
8181
object X,
8282
const DOUBLE_t[:, ::1] y,
8383
const DOUBLE_t[:] sample_weight,
84-
const unsigned char[::1] feature_has_missing,
84+
const unsigned char[::1] missing_values_in_feature_mask,
8585
) except -1
8686

8787
cdef int node_reset(

sklearn/tree/_splitter.pyx

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ cdef class Splitter:
107107
object X,
108108
const DOUBLE_t[:, ::1] y,
109109
const DOUBLE_t[:] sample_weight,
110-
const unsigned char[::1] feature_has_missing,
110+
const unsigned char[::1] missing_values_in_feature_mask,
111111
) except -1:
112112
"""Initialize the splitter.
113113
@@ -172,7 +172,7 @@ cdef class Splitter:
172172
self.y = y
173173

174174
self.sample_weight = sample_weight
175-
if feature_has_missing is not None:
175+
if missing_values_in_feature_mask is not None:
176176
self.criterion.init_sum_missing()
177177
return 0
178178

@@ -808,19 +808,19 @@ cdef class DensePartitioner:
808808
cdef SIZE_t start
809809
cdef SIZE_t end
810810
cdef SIZE_t n_missing
811-
cdef const unsigned char[::1] feature_has_missing
811+
cdef const unsigned char[::1] missing_values_in_feature_mask
812812

813813
def __init__(
814814
self,
815815
const DTYPE_t[:, :] X,
816816
SIZE_t[::1] samples,
817817
DTYPE_t[::1] feature_values,
818-
const unsigned char[::1] feature_has_missing,
818+
const unsigned char[::1] missing_values_in_feature_mask,
819819
):
820820
self.X = X
821821
self.samples = samples
822822
self.feature_values = feature_values
823-
self.feature_has_missing = feature_has_missing
823+
self.missing_values_in_feature_mask = missing_values_in_feature_mask
824824

825825
cdef inline void init_node_split(self, SIZE_t start, SIZE_t end) noexcept nogil:
826826
"""Initialize splitter at the beginning of node_split."""
@@ -843,13 +843,13 @@ cdef class DensePartitioner:
843843
const DTYPE_t[:, :] X = self.X
844844
SIZE_t[::1] samples = self.samples
845845
SIZE_t n_missing = 0
846-
const unsigned char[::1] feature_has_missing = self.feature_has_missing
846+
const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
847847

848848
# Sort samples along that feature; by
849849
# copying the values into an array and
850850
# sorting the array in a manner which utilizes the cache more
851851
# effectively.
852-
if feature_has_missing is not None and feature_has_missing[current_feature]:
852+
if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
853853
i, current_end = self.start, self.end - 1
854854
# Missing values are placed at the end and do not participate in the sorting.
855855
while i <= current_end:
@@ -1018,7 +1018,7 @@ cdef class SparsePartitioner:
10181018
cdef SIZE_t start
10191019
cdef SIZE_t end
10201020
cdef SIZE_t n_missing
1021-
cdef const unsigned char[::1] feature_has_missing
1021+
cdef const unsigned char[::1] missing_values_in_feature_mask
10221022

10231023
cdef const DTYPE_t[::1] X_data
10241024
cdef const INT32_t[::1] X_indices
@@ -1039,7 +1039,7 @@ cdef class SparsePartitioner:
10391039
SIZE_t[::1] samples,
10401040
SIZE_t n_samples,
10411041
DTYPE_t[::1] feature_values,
1042-
const unsigned char[::1] feature_has_missing,
1042+
const unsigned char[::1] missing_values_in_feature_mask,
10431043
):
10441044
if not isspmatrix_csc(X):
10451045
raise ValueError("X should be in csc format")
@@ -1063,7 +1063,7 @@ cdef class SparsePartitioner:
10631063
for p in range(n_samples):
10641064
self.index_to_samples[samples[p]] = p
10651065

1066-
self.feature_has_missing = feature_has_missing
1066+
self.missing_values_in_feature_mask = missing_values_in_feature_mask
10671067

10681068
cdef inline void init_node_split(self, SIZE_t start, SIZE_t end) noexcept nogil:
10691069
"""Initialize splitter at the beginning of node_split."""
@@ -1434,11 +1434,11 @@ cdef class BestSplitter(Splitter):
14341434
object X,
14351435
const DOUBLE_t[:, ::1] y,
14361436
const DOUBLE_t[:] sample_weight,
1437-
const unsigned char[::1] feature_has_missing,
1437+
const unsigned char[::1] missing_values_in_feature_mask,
14381438
) except -1:
1439-
Splitter.init(self, X, y, sample_weight, feature_has_missing)
1439+
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
14401440
self.partitioner = DensePartitioner(
1441-
X, self.samples, self.feature_values, feature_has_missing
1441+
X, self.samples, self.feature_values, missing_values_in_feature_mask
14421442
)
14431443

14441444
cdef int node_split(self, double impurity, SplitRecord* split,
@@ -1460,11 +1460,11 @@ cdef class BestSparseSplitter(Splitter):
14601460
object X,
14611461
const DOUBLE_t[:, ::1] y,
14621462
const DOUBLE_t[:] sample_weight,
1463-
const unsigned char[::1] feature_has_missing,
1463+
const unsigned char[::1] missing_values_in_feature_mask,
14641464
) except -1:
1465-
Splitter.init(self, X, y, sample_weight, feature_has_missing)
1465+
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
14661466
self.partitioner = SparsePartitioner(
1467-
X, self.samples, self.n_samples, self.feature_values, feature_has_missing
1467+
X, self.samples, self.n_samples, self.feature_values, missing_values_in_feature_mask
14681468
)
14691469

14701470
cdef int node_split(self, double impurity, SplitRecord* split,
@@ -1486,11 +1486,11 @@ cdef class RandomSplitter(Splitter):
14861486
object X,
14871487
const DOUBLE_t[:, ::1] y,
14881488
const DOUBLE_t[:] sample_weight,
1489-
const unsigned char[::1] feature_has_missing,
1489+
const unsigned char[::1] missing_values_in_feature_mask,
14901490
) except -1:
1491-
Splitter.init(self, X, y, sample_weight, feature_has_missing)
1491+
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
14921492
self.partitioner = DensePartitioner(
1493-
X, self.samples, self.feature_values, feature_has_missing
1493+
X, self.samples, self.feature_values, missing_values_in_feature_mask
14941494
)
14951495

14961496
cdef int node_split(self, double impurity, SplitRecord* split,
@@ -1512,11 +1512,11 @@ cdef class RandomSparseSplitter(Splitter):
15121512
object X,
15131513
const DOUBLE_t[:, ::1] y,
15141514
const DOUBLE_t[:] sample_weight,
1515-
const unsigned char[::1] feature_has_missing,
1515+
const unsigned char[::1] missing_values_in_feature_mask,
15161516
) except -1:
1517-
Splitter.init(self, X, y, sample_weight, feature_has_missing)
1517+
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
15181518
self.partitioner = SparsePartitioner(
1519-
X, self.samples, self.n_samples, self.feature_values, feature_has_missing
1519+
X, self.samples, self.n_samples, self.feature_values, missing_values_in_feature_mask
15201520
)
15211521

15221522
cdef int node_split(self, double impurity, SplitRecord* split,

sklearn/tree/_tree.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ cdef class TreeBuilder:
107107
object X,
108108
const DOUBLE_t[:, ::1] y,
109109
const DOUBLE_t[:] sample_weight=*,
110-
const unsigned char[::1] feature_has_missing=*,
110+
const unsigned char[::1] missing_values_in_feature_mask=*,
111111
)
112112

113113
cdef _check_input(

sklearn/tree/_tree.pyx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ cdef class TreeBuilder:
9494
object X,
9595
const DOUBLE_t[:, ::1] y,
9696
const DOUBLE_t[:] sample_weight=None,
97-
const unsigned char[::1] feature_has_missing=None,
97+
const unsigned char[::1] missing_values_in_feature_mask=None,
9898
):
9999
"""Build a decision tree from the training set (X, y)."""
100100
pass
@@ -168,7 +168,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
168168
object X,
169169
const DOUBLE_t[:, ::1] y,
170170
const DOUBLE_t[:] sample_weight=None,
171-
const unsigned char[::1] feature_has_missing=None,
171+
const unsigned char[::1] missing_values_in_feature_mask=None,
172172
):
173173
"""Build a decision tree from the training set (X, y)."""
174174

@@ -194,7 +194,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
194194
cdef double min_impurity_decrease = self.min_impurity_decrease
195195

196196
# Recursive partition (without actual recursion)
197-
splitter.init(X, y, sample_weight, feature_has_missing)
197+
splitter.init(X, y, sample_weight, missing_values_in_feature_mask)
198198

199199
cdef SIZE_t start
200200
cdef SIZE_t end
@@ -366,7 +366,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
366366
object X,
367367
const DOUBLE_t[:, ::1] y,
368368
const DOUBLE_t[:] sample_weight=None,
369-
const unsigned char[::1] feature_has_missing=None,
369+
const unsigned char[::1] missing_values_in_feature_mask=None,
370370
):
371371
"""Build a decision tree from the training set (X, y)."""
372372

@@ -378,7 +378,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
378378
cdef SIZE_t max_leaf_nodes = self.max_leaf_nodes
379379

380380
# Recursive partition (without actual recursion)
381-
splitter.init(X, y, sample_weight, feature_has_missing)
381+
splitter.init(X, y, sample_weight, missing_values_in_feature_mask)
382382

383383
cdef vector[FrontierRecord] frontier
384384
cdef FrontierRecord record

0 commit comments

Comments
 (0)