Skip to content

Commit 6fd2f83

Browse files
authored
MNT convert unsigned char to uint8_t (scikit-learn#29510)
Signed-off-by: Adam Li <adam2392@gmail.com>
1 parent a4e2bfb commit 6fd2f83

File tree

6 files changed

+35
-36
lines changed

6 files changed

+35
-36
lines changed

sklearn/tree/_splitter.pxd

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from ._criterion cimport Criterion
66
from ._tree cimport ParentInfo
77

8-
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
8+
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint8_t, uint32_t
99

1010

1111
cdef struct SplitRecord:
@@ -20,7 +20,7 @@ cdef struct SplitRecord:
2020
float64_t impurity_right # Impurity of the right split.
2121
float64_t lower_bound # Lower bound on value of both children for monotonicity
2222
float64_t upper_bound # Upper bound on value of both children for monotonicity
23-
unsigned char missing_go_to_left # Controls if missing values go to the left node.
23+
uint8_t missing_go_to_left # Controls if missing values go to the left node.
2424
intp_t n_missing # Number of missing values for the feature being split on
2525

2626
cdef class Splitter:
@@ -81,7 +81,7 @@ cdef class Splitter:
8181
object X,
8282
const float64_t[:, ::1] y,
8383
const float64_t[:] sample_weight,
84-
const unsigned char[::1] missing_values_in_feature_mask,
84+
const uint8_t[::1] missing_values_in_feature_mask,
8585
) except -1
8686

8787
cdef int node_reset(

sklearn/tree/_splitter.pyx

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ cdef class Splitter:
113113
object X,
114114
const float64_t[:, ::1] y,
115115
const float64_t[:] sample_weight,
116-
const unsigned char[::1] missing_values_in_feature_mask,
116+
const uint8_t[::1] missing_values_in_feature_mask,
117117
) except -1:
118118
"""Initialize the splitter.
119119
@@ -932,14 +932,14 @@ cdef class DensePartitioner:
932932
cdef intp_t start
933933
cdef intp_t end
934934
cdef intp_t n_missing
935-
cdef const unsigned char[::1] missing_values_in_feature_mask
935+
cdef const uint8_t[::1] missing_values_in_feature_mask
936936

937937
def __init__(
938938
self,
939939
const float32_t[:, :] X,
940940
intp_t[::1] samples,
941941
float32_t[::1] feature_values,
942-
const unsigned char[::1] missing_values_in_feature_mask,
942+
const uint8_t[::1] missing_values_in_feature_mask,
943943
):
944944
self.X = X
945945
self.samples = samples
@@ -967,7 +967,7 @@ cdef class DensePartitioner:
967967
const float32_t[:, :] X = self.X
968968
intp_t[::1] samples = self.samples
969969
intp_t n_missing = 0
970-
const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
970+
const uint8_t[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
971971

972972
# Sort samples along that feature; by
973973
# copying the values into an array and
@@ -1022,7 +1022,7 @@ cdef class DensePartitioner:
10221022
float32_t max_feature_value = -INFINITY_32t
10231023
float32_t[::1] feature_values = self.feature_values
10241024
intp_t n_missing = 0
1025-
const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
1025+
const uint8_t[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
10261026

10271027
# We are copying the values into an array and
10281028
# finding min/max of the array in a manner which utilizes the cache more
@@ -1184,7 +1184,7 @@ cdef class SparsePartitioner:
11841184
cdef intp_t start
11851185
cdef intp_t end
11861186
cdef intp_t n_missing
1187-
cdef const unsigned char[::1] missing_values_in_feature_mask
1187+
cdef const uint8_t[::1] missing_values_in_feature_mask
11881188

11891189
cdef const float32_t[::1] X_data
11901190
cdef const int32_t[::1] X_indices
@@ -1205,7 +1205,7 @@ cdef class SparsePartitioner:
12051205
intp_t[::1] samples,
12061206
intp_t n_samples,
12071207
float32_t[::1] feature_values,
1208-
const unsigned char[::1] missing_values_in_feature_mask,
1208+
const uint8_t[::1] missing_values_in_feature_mask,
12091209
):
12101210
if not (issparse(X) and X.format == "csc"):
12111211
raise ValueError("X should be in csc format")
@@ -1607,7 +1607,7 @@ cdef class BestSplitter(Splitter):
16071607
object X,
16081608
const float64_t[:, ::1] y,
16091609
const float64_t[:] sample_weight,
1610-
const unsigned char[::1] missing_values_in_feature_mask,
1610+
const uint8_t[::1] missing_values_in_feature_mask,
16111611
) except -1:
16121612
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
16131613
self.partitioner = DensePartitioner(
@@ -1635,7 +1635,7 @@ cdef class BestSparseSplitter(Splitter):
16351635
object X,
16361636
const float64_t[:, ::1] y,
16371637
const float64_t[:] sample_weight,
1638-
const unsigned char[::1] missing_values_in_feature_mask,
1638+
const uint8_t[::1] missing_values_in_feature_mask,
16391639
) except -1:
16401640
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
16411641
self.partitioner = SparsePartitioner(
@@ -1663,7 +1663,7 @@ cdef class RandomSplitter(Splitter):
16631663
object X,
16641664
const float64_t[:, ::1] y,
16651665
const float64_t[:] sample_weight,
1666-
const unsigned char[::1] missing_values_in_feature_mask,
1666+
const uint8_t[::1] missing_values_in_feature_mask,
16671667
) except -1:
16681668
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
16691669
self.partitioner = DensePartitioner(
@@ -1691,7 +1691,7 @@ cdef class RandomSparseSplitter(Splitter):
16911691
object X,
16921692
const float64_t[:, ::1] y,
16931693
const float64_t[:] sample_weight,
1694-
const unsigned char[::1] missing_values_in_feature_mask,
1694+
const uint8_t[::1] missing_values_in_feature_mask,
16951695
) except -1:
16961696
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
16971697
self.partitioner = SparsePartitioner(

sklearn/tree/_tree.pxd

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
cimport numpy as cnp
88

9-
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int32_t, uint32_t
9+
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int32_t, uint8_t, uint32_t
1010

1111
from ._splitter cimport Splitter
1212
from ._splitter cimport SplitRecord
@@ -21,7 +21,7 @@ cdef struct Node:
2121
float64_t impurity # Impurity of the node (i.e., the value of the criterion)
2222
intp_t n_node_samples # Number of samples at the node
2323
float64_t weighted_n_node_samples # Weighted number of samples at the node
24-
unsigned char missing_go_to_left # Whether features have missing values
24+
uint8_t missing_go_to_left # Whether features have missing values
2525

2626

2727
cdef struct ParentInfo:
@@ -58,7 +58,7 @@ cdef class Tree:
5858
intp_t feature, float64_t threshold, float64_t impurity,
5959
intp_t n_node_samples,
6060
float64_t weighted_n_node_samples,
61-
unsigned char missing_go_to_left) except -1 nogil
61+
uint8_t missing_go_to_left) except -1 nogil
6262
cdef int _resize(self, intp_t capacity) except -1 nogil
6363
cdef int _resize_c(self, intp_t capacity=*) except -1 nogil
6464

@@ -105,7 +105,7 @@ cdef class TreeBuilder:
105105
object X,
106106
const float64_t[:, ::1] y,
107107
const float64_t[:] sample_weight=*,
108-
const unsigned char[::1] missing_values_in_feature_mask=*,
108+
const uint8_t[::1] missing_values_in_feature_mask=*,
109109
)
110110

111111
cdef _check_input(

sklearn/tree/_tree.pyx

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ cdef class TreeBuilder:
8080
object X,
8181
const float64_t[:, ::1] y,
8282
const float64_t[:] sample_weight=None,
83-
const unsigned char[::1] missing_values_in_feature_mask=None,
83+
const uint8_t[::1] missing_values_in_feature_mask=None,
8484
):
8585
"""Build a decision tree from the training set (X, y)."""
8686
pass
@@ -156,7 +156,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
156156
object X,
157157
const float64_t[:, ::1] y,
158158
const float64_t[:] sample_weight=None,
159-
const unsigned char[::1] missing_values_in_feature_mask=None,
159+
const uint8_t[::1] missing_values_in_feature_mask=None,
160160
):
161161
"""Build a decision tree from the training set (X, y)."""
162162

@@ -411,7 +411,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
411411
object X,
412412
const float64_t[:, ::1] y,
413413
const float64_t[:] sample_weight=None,
414-
const unsigned char[::1] missing_values_in_feature_mask=None,
414+
const uint8_t[::1] missing_values_in_feature_mask=None,
415415
):
416416
"""Build a decision tree from the training set (X, y)."""
417417

@@ -910,7 +910,7 @@ cdef class Tree:
910910
intp_t feature, float64_t threshold, float64_t impurity,
911911
intp_t n_node_samples,
912912
float64_t weighted_n_node_samples,
913-
unsigned char missing_go_to_left) except -1 nogil:
913+
uint8_t missing_go_to_left) except -1 nogil:
914914
"""Add a node to the tree.
915915
916916
The new node registers itself as the child of its parent.
@@ -1578,7 +1578,7 @@ cdef class _CCPPruneController:
15781578
"""Save metrics when pruning"""
15791579
pass
15801580

1581-
cdef void after_pruning(self, unsigned char[:] in_subtree) noexcept nogil:
1581+
cdef void after_pruning(self, uint8_t[:] in_subtree) noexcept nogil:
15821582
"""Called after pruning"""
15831583
pass
15841584

@@ -1597,7 +1597,7 @@ cdef class _AlphaPruner(_CCPPruneController):
15971597
# less than or equal to self.ccp_alpha
15981598
return self.ccp_alpha < effective_alpha
15991599

1600-
cdef void after_pruning(self, unsigned char[:] in_subtree) noexcept nogil:
1600+
cdef void after_pruning(self, uint8_t[:] in_subtree) noexcept nogil:
16011601
"""Updates the number of leaves in subtree"""
16021602
for i in range(in_subtree.shape[0]):
16031603
if in_subtree[i]:
@@ -1627,7 +1627,7 @@ cdef struct CostComplexityPruningRecord:
16271627
intp_t node_idx
16281628
intp_t parent
16291629

1630-
cdef _cost_complexity_prune(unsigned char[:] leaves_in_subtree, # OUT
1630+
cdef _cost_complexity_prune(uint8_t[:] leaves_in_subtree, # OUT
16311631
Tree orig_tree,
16321632
_CCPPruneController controller):
16331633
"""Perform cost complexity pruning.
@@ -1640,7 +1640,7 @@ cdef _cost_complexity_prune(unsigned char[:] leaves_in_subtree, # OUT
16401640
16411641
Parameters
16421642
----------
1643-
leaves_in_subtree : unsigned char[:]
1643+
leaves_in_subtree : uint8_t[:]
16441644
Output for leaves of subtree
16451645
orig_tree : Tree
16461646
Original tree
@@ -1674,10 +1674,9 @@ cdef _cost_complexity_prune(unsigned char[:] leaves_in_subtree, # OUT
16741674
intp_t parent_idx
16751675

16761676
# candidate nodes that can be pruned
1677-
unsigned char[:] candidate_nodes = np.zeros(shape=n_nodes,
1678-
dtype=np.uint8)
1677+
uint8_t[:] candidate_nodes = np.zeros(shape=n_nodes, dtype=np.uint8)
16791678
# nodes in subtree
1680-
unsigned char[:] in_subtree = np.ones(shape=n_nodes, dtype=np.uint8)
1679+
uint8_t[:] in_subtree = np.ones(shape=n_nodes, dtype=np.uint8)
16811680
intp_t pruned_branch_node_idx
16821681
float64_t subtree_alpha
16831682
float64_t effective_alpha
@@ -1811,7 +1810,7 @@ def _build_pruned_tree_ccp(
18111810

18121811
cdef:
18131812
intp_t n_nodes = orig_tree.node_count
1814-
unsigned char[:] leaves_in_subtree = np.zeros(
1813+
uint8_t[:] leaves_in_subtree = np.zeros(
18151814
shape=n_nodes, dtype=np.uint8)
18161815

18171816
pruning_controller = _AlphaPruner(ccp_alpha=ccp_alpha)
@@ -1843,7 +1842,7 @@ def ccp_pruning_path(Tree orig_tree):
18431842
corresponding alpha value in ``ccp_alphas``.
18441843
"""
18451844
cdef:
1846-
unsigned char[:] leaves_in_subtree = np.zeros(
1845+
uint8_t[:] leaves_in_subtree = np.zeros(
18471846
shape=orig_tree.node_count, dtype=np.uint8)
18481847

18491848
path_finder = _PathFinder(orig_tree.node_count)
@@ -1876,7 +1875,7 @@ cdef struct BuildPrunedRecord:
18761875
cdef _build_pruned_tree(
18771876
Tree tree, # OUT
18781877
Tree orig_tree,
1879-
const unsigned char[:] leaves_in_subtree,
1878+
const uint8_t[:] leaves_in_subtree,
18801879
intp_t capacity
18811880
):
18821881
"""Build a pruned tree.
@@ -1890,7 +1889,7 @@ cdef _build_pruned_tree(
18901889
Location to place the pruned tree
18911890
orig_tree : Tree
18921891
Original tree
1893-
leaves_in_subtree : unsigned char memoryview, shape=(node_count, )
1892+
leaves_in_subtree : uint8_t memoryview, shape=(node_count, )
18941893
Boolean mask for leaves to include in subtree
18951894
capacity : intp_t
18961895
Number of nodes to initially allocate in pruned tree

sklearn/tree/_utils.pxd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
cimport numpy as cnp
77
from ._tree cimport Node
88
from ..neighbors._quad_tree cimport Cell
9-
from ..utils._typedefs cimport float32_t, float64_t, intp_t, int32_t, uint32_t
9+
from ..utils._typedefs cimport float32_t, float64_t, intp_t, uint8_t, int32_t, uint32_t
1010

1111
cdef enum:
1212
# Max value for our rand_r replacement (near the bottom).
@@ -26,7 +26,7 @@ ctypedef fused realloc_ptr:
2626
# Add pointer types here as needed.
2727
(float32_t*)
2828
(intp_t*)
29-
(unsigned char*)
29+
(uint8_t*)
3030
(WeightedPQueueRecord*)
3131
(float64_t*)
3232
(float64_t**)

sklearn/tree/_utils.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ def _any_isnan_axis0(const float32_t[:, :] X):
447447
intp_t i, j
448448
intp_t n_samples = X.shape[0]
449449
intp_t n_features = X.shape[1]
450-
unsigned char[::1] isnan_out = np.zeros(X.shape[1], dtype=np.bool_)
450+
uint8_t[::1] isnan_out = np.zeros(X.shape[1], dtype=np.bool_)
451451

452452
with nogil:
453453
for i in range(n_samples):

0 commit comments

Comments
 (0)