diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
index c336d1e5f8805..877fcdb09fe68 100644
--- a/asv_benchmarks/benchmarks/ensemble.py
+++ b/asv_benchmarks/benchmarks/ensemble.py
@@ -2,6 +2,7 @@
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
     RandomForestClassifier,
+    RandomForestRegressor,
 )
 
 from .common import Benchmark, Estimator, Predictor
@@ -9,8 +10,50 @@
     _20newsgroups_highdim_dataset,
     _20newsgroups_lowdim_dataset,
     _synth_classification_dataset,
+    _synth_regression_dataset,
+    _synth_regression_sparse_dataset,
 )
-from .utils import make_gen_classif_scorers
+from .utils import make_gen_classif_scorers, make_gen_reg_scorers
+
+
+class RandomForestRegressorBenchmark(Predictor, Estimator, Benchmark):
+    """
+    Benchmarks for RandomForestRegressor.
+    """
+
+    param_names = ["representation", "n_jobs"]
+    params = (["dense", "sparse"], Benchmark.n_jobs_vals)
+
+    def setup_cache(self):
+        super().setup_cache()
+
+    def make_data(self, params):
+        representation, n_jobs = params
+
+        if representation == "sparse":
+            data = _synth_regression_sparse_dataset()
+        else:
+            data = _synth_regression_dataset()
+
+        return data
+
+    def make_estimator(self, params):
+        representation, n_jobs = params
+
+        n_estimators = 500 if Benchmark.data_size == "large" else 100
+
+        estimator = RandomForestRegressor(
+            n_estimators=n_estimators,
+            min_samples_split=10,
+            max_features="log2",
+            n_jobs=n_jobs,
+            random_state=0,
+        )
+
+        return estimator
+
+    def make_scorers(self):
+        make_gen_reg_scorers(self)
 
 
 class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):
diff --git a/sklearn/tree/_partitioner.pxd b/sklearn/tree/_partitioner.pxd
new file mode 100644
index 0000000000000..fd4e7c721424b
--- /dev/null
+++ b/sklearn/tree/_partitioner.pxd
@@ -0,0 +1,114 @@
+from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
+
+# Constant to switch between algorithm non zero value extract algorithm
+# in SparsePartitioner
+cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
+
+
+# Introduce a fused-class to make it possible to share the split implementation
+# between the dense and sparse cases in the node_split_best and node_split_random
+# functions. The alternative would have been to use inheritance-based polymorphism
+# but it would have resulted in a ~10% overall tree fitting performance
+# degradation caused by the overhead frequent virtual method lookups.
+#ctypedef fused Partitioner:
+#    DensePartitioner
+#    SparsePartitioner
+
+
+ctypedef void (*InitNodeSplitFunction)(
+    Partitioner partitioner, intp_t start, intp_t end
+) noexcept nogil
+
+ctypedef void (*SortSamplesAndFeatureValuesFunction)(
+    Partitioner partitioner, intp_t current_feature
+) noexcept nogil
+
+ctypedef void (*FindMinMaxFunction)(
+    Partitioner partitioner,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil
+
+ctypedef void (*NextPFunction)(
+    Partitioner partitioner, intp_t* p_prev, intp_t* p
+) noexcept nogil
+
+ctypedef intp_t (*PartitionSamplesFunction)(
+    Partitioner partitioner, float64_t current_threshold
+) noexcept nogil
+
+ctypedef void (*PartitionSamplesFinalFunction)(
+    Partitioner partitioner,
+    intp_t best_pos,
+    float64_t best_threshold,
+    intp_t best_feature,
+    intp_t best_n_missing,
+) noexcept nogil
+
+
+cdef class Partitioner:
+    cdef:
+        intp_t[::1] samples
+        float32_t[::1] feature_values
+        intp_t start
+        intp_t end
+        intp_t n_missing
+        const unsigned char[::1] missing_values_in_feature_mask
+
+        inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
+        inline void sort_samples_and_feature_values(
+            self,
+            intp_t current_feature
+        ) noexcept nogil
+        inline void find_min_max(
+            self,
+            intp_t current_feature,
+            float32_t* min_feature_value_out,
+            float32_t* max_feature_value_out,
+        ) noexcept nogil
+        inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
+        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil        
+        inline void partition_samples_final(
+            self,
+            intp_t best_pos,
+            float64_t best_threshold,
+            intp_t best_feature,
+            intp_t best_n_missing,
+        ) noexcept nogil
+
+        InitNodeSplitFunction _init_node_split
+        SortSamplesAndFeatureValuesFunction _sort_samples_and_feature_values
+        FindMinMaxFunction _find_min_max
+        NextPFunction _next_p
+        PartitionSamplesFunction _partition_samples
+        PartitionSamplesFinalFunction _partition_samples_final
+
+
+cdef class DensePartitioner(Partitioner):
+    """Partitioner specialized for dense data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    cdef:
+        const float32_t[:, :] X
+
+
+cdef class SparsePartitioner(Partitioner):
+    """Partitioner specialized for sparse CSC data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    cdef:
+        const float32_t[::1] X_data
+        const int32_t[::1] X_indices
+        const int32_t[::1] X_indptr
+
+        intp_t n_total_samples
+
+        intp_t[::1] index_to_samples
+        intp_t[::1] sorted_samples
+
+        intp_t start_positive
+        intp_t end_negative
+        bint is_samples_sorted
diff --git a/sklearn/tree/_partitioner.pyx b/sklearn/tree/_partitioner.pyx
new file mode 100644
index 0000000000000..7f21e716272f4
--- /dev/null
+++ b/sklearn/tree/_partitioner.pyx
@@ -0,0 +1,698 @@
+from cython cimport final
+from libc.math cimport isnan, log
+from libc.stdlib cimport qsort
+from libc.string cimport memcpy
+from scipy.sparse import issparse
+
+import numpy as np
+
+from ._sort cimport sort, sparse_swap, swap, FEATURE_THRESHOLD
+
+
+cdef class Partitioner:
+    cdef:
+        inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
+            self._init_node_split(self, start, end)
+        
+        inline void sort_samples_and_feature_values(
+            self,
+            intp_t current_feature
+        ) noexcept nogil:
+            self._sort_samples_and_feature_values(self, current_feature)
+
+        inline void find_min_max(
+            self,
+            intp_t current_feature,
+            float32_t* min_feature_value_out,
+            float32_t* max_feature_value_out,
+        ) noexcept nogil:
+            self._find_min_max(self, current_feature, min_feature_value_out, max_feature_value_out)
+
+        inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
+            self._next_p(self, p_prev, p)
+
+        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
+            return self._partition_samples(self, current_threshold)
+        
+        inline void partition_samples_final(
+            self,
+            intp_t best_pos,
+            float64_t best_threshold,
+            intp_t best_feature,
+            intp_t best_n_missing,
+        ) noexcept nogil:
+            self._partition_samples_final(self, best_pos, best_threshold, best_feature, best_n_missing)
+
+
+@final
+cdef class DensePartitioner(Partitioner):
+    """Partitioner specialized for dense data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    def __init__(
+        self,
+        const float32_t[:, :] X,
+        intp_t[::1] samples,
+        float32_t[::1] feature_values,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ):
+        self.X = X
+        self.samples = samples
+        self.feature_values = feature_values
+        self.missing_values_in_feature_mask = missing_values_in_feature_mask
+
+        self._init_node_split = dense_init_node_split
+        self._sort_samples_and_feature_values = dense_sort_samples_and_feature_values
+        self._find_min_max = dense_find_min_max
+        self._next_p = dense_next_p
+        self._partition_samples = dense_partition_samples
+        self._partition_samples_final = dense_partition_samples_final
+
+
+cdef inline void dense_init_node_split(
+    Partitioner self, intp_t start, intp_t end
+) noexcept nogil:
+    """Initialize splitter at the beginning of node_split."""
+    self.start = start
+    self.end = end
+    self.n_missing = 0
+
+cdef inline void dense_sort_samples_and_feature_values(
+    Partitioner self, intp_t current_feature
+) noexcept nogil:
+    """Simultaneously sort based on the feature_values.
+
+    Missing values are stored at the end of feature_values.
+    The number of missing values observed in feature_values is stored
+    in self.n_missing.
+    """
+    cdef:
+        intp_t i, current_end
+        float32_t[::1] feature_values = self.feature_values
+        const float32_t[:, :] X = (<DensePartitioner>self).X
+        intp_t[::1] samples = self.samples
+        intp_t n_missing = 0
+        const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
+
+    # Sort samples along that feature; by
+    # copying the values into an array and
+    # sorting the array in a manner which utilizes the cache more
+    # effectively.
+    if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
+        i, current_end = self.start, self.end - 1
+        # Missing values are placed at the end and do not participate in the sorting.
+        while i <= current_end:
+            # Finds the right-most value that is not missing so that
+            # it can be swapped with missing values at its left.
+            if isnan(X[samples[current_end], current_feature]):
+                n_missing += 1
+                current_end -= 1
+                continue
+
+            # X[samples[current_end], current_feature] is a non-missing value
+            if isnan(X[samples[i], current_feature]):
+                samples[i], samples[current_end] = samples[current_end], samples[i]
+                n_missing += 1
+                current_end -= 1
+
+            feature_values[i] = X[samples[i], current_feature]
+            i += 1
+    else:
+        # When there are no missing values, we only need to copy the data into
+        # feature_values
+        for i in range(self.start, self.end):
+            feature_values[i] = X[samples[i], current_feature]
+
+    sort(&feature_values[self.start], &samples[self.start], self.end - self.start - n_missing)
+    self.n_missing = n_missing
+
+cdef inline void dense_find_min_max(
+    Partitioner self,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil:
+    """Find the minimum and maximum value for current_feature."""
+    cdef:
+        intp_t p
+        float32_t current_feature_value
+        const float32_t[:, :] X = (<DensePartitioner>self).X
+        intp_t[::1] samples = self.samples
+        float32_t min_feature_value = X[samples[self.start], current_feature]
+        float32_t max_feature_value = min_feature_value
+        float32_t[::1] feature_values = self.feature_values
+
+    feature_values[self.start] = min_feature_value
+
+    for p in range(self.start + 1, self.end):
+        current_feature_value = X[samples[p], current_feature]
+        feature_values[p] = current_feature_value
+
+        if current_feature_value < min_feature_value:
+            min_feature_value = current_feature_value
+        elif current_feature_value > max_feature_value:
+            max_feature_value = current_feature_value
+
+    min_feature_value_out[0] = min_feature_value
+    max_feature_value_out[0] = max_feature_value
+
+cdef inline void dense_next_p(
+    Partitioner self, intp_t* p_prev, intp_t* p
+) noexcept nogil:
+    """Compute the next p_prev and p for iteratiing over feature values.
+
+    The missing values are not included when iterating through the feature values.
+    """
+    cdef:
+        float32_t[::1] feature_values = self.feature_values
+        intp_t end_non_missing = self.end - self.n_missing
+
+    while (
+        p[0] + 1 < end_non_missing and
+        feature_values[p[0] + 1] <= feature_values[p[0]] + FEATURE_THRESHOLD
+    ):
+        p[0] += 1
+
+    p_prev[0] = p[0]
+
+    # By adding 1, we have
+    # (feature_values[p] >= end) or (feature_values[p] > feature_values[p - 1])
+    p[0] += 1
+
+cdef inline intp_t dense_partition_samples(
+    Partitioner self, float64_t current_threshold
+) noexcept nogil:
+    """Partition samples for feature_values at the current_threshold."""
+    cdef:
+        intp_t p = self.start
+        intp_t partition_end = self.end
+        intp_t[::1] samples = self.samples
+        float32_t[::1] feature_values = self.feature_values
+
+    while p < partition_end:
+        if feature_values[p] <= current_threshold:
+            p += 1
+        else:
+            partition_end -= 1
+
+            feature_values[p], feature_values[partition_end] = (
+                feature_values[partition_end], feature_values[p]
+            )
+            samples[p], samples[partition_end] = samples[partition_end], samples[p]
+
+    return partition_end
+
+cdef inline void dense_partition_samples_final(
+    Partitioner self,
+    intp_t best_pos,
+    float64_t best_threshold,
+    intp_t best_feature,
+    intp_t best_n_missing,
+) noexcept nogil:
+    """Partition samples for X at the best_threshold and best_feature.
+
+    If missing values are present, this method partitions `samples`
+    so that the `best_n_missing` missing values' indices are in the
+    right-most end of `samples`, that is `samples[end_non_missing:end]`.
+    """
+    cdef:
+        # Local invariance: start <= p <= partition_end <= end
+        intp_t start = self.start
+        intp_t p = start
+        intp_t end = self.end - 1
+        intp_t partition_end = end - best_n_missing
+        intp_t[::1] samples = self.samples
+        const float32_t[:, :] X = (<DensePartitioner>self).X
+        float32_t current_value
+
+    if best_n_missing != 0:
+        # Move samples with missing values to the end while partitioning the
+        # non-missing samples
+        while p < partition_end:
+            # Keep samples with missing values at the end
+            if isnan(X[samples[end], best_feature]):
+                end -= 1
+                continue
+
+            # Swap sample with missing values with the sample at the end
+            current_value = X[samples[p], best_feature]
+            if isnan(current_value):
+                samples[p], samples[end] = samples[end], samples[p]
+                end -= 1
+
+                # The swapped sample at the end is always a non-missing value, so
+                # we can continue the algorithm without checking for missingness.
+                current_value = X[samples[p], best_feature]
+
+            # Partition the non-missing samples
+            if current_value <= best_threshold:
+                p += 1
+            else:
+                samples[p], samples[partition_end] = samples[partition_end], samples[p]
+                partition_end -= 1
+    else:
+        # Partitioning routine when there are no missing values
+        while p < partition_end:
+            if X[samples[p], best_feature] <= best_threshold:
+                p += 1
+            else:
+                samples[p], samples[partition_end] = samples[partition_end], samples[p]
+                partition_end -= 1
+
+
+@final
+cdef class SparsePartitioner:
+    """Partitioner specialized for sparse CSC data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    def __init__(
+        self,
+        object X,
+        intp_t[::1] samples,
+        intp_t n_samples,
+        float32_t[::1] feature_values,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ):
+        if not (issparse(X) and X.format == "csc"):
+            raise ValueError("X should be in csc format")
+
+        self.samples = samples
+        self.feature_values = feature_values
+
+        # Initialize X
+        cdef intp_t n_total_samples = X.shape[0]
+
+        self.X_data = X.data
+        self.X_indices = X.indices
+        self.X_indptr = X.indptr
+        self.n_total_samples = n_total_samples
+
+        # Initialize auxiliary array used to perform split
+        self.index_to_samples = np.full(n_total_samples, fill_value=-1, dtype=np.intp)
+        self.sorted_samples = np.empty(n_samples, dtype=np.intp)
+
+        cdef intp_t p
+        for p in range(n_samples):
+            self.index_to_samples[samples[p]] = p
+
+        self.missing_values_in_feature_mask = missing_values_in_feature_mask
+
+        self._init_node_split = sparse_init_node_split
+        self._sort_samples_and_feature_values = sparse_sort_samples_and_feature_values
+        self._find_min_max = sparse_find_min_max
+        self._next_p = sparse_next_p
+        self._partition_samples = sparse_partition_samples
+        self._partition_samples_final = sparse_partition_samples_final
+
+
+cdef inline void sparse_init_node_split(Partitioner self, intp_t start, intp_t end) noexcept nogil:
+    """Initialize splitter at the beginning of node_split."""
+    self.start = start
+    self.end = end
+    (<SparsePartitioner>self).is_samples_sorted = 0
+    self.n_missing = 0
+
+
+cdef inline void sparse_sort_samples_and_feature_values(
+    Partitioner self, intp_t current_feature
+) noexcept nogil:
+    _sparse_sort_samples_and_feature_values(<SparsePartitioner>self, current_feature)
+
+
+cdef inline void _sparse_sort_samples_and_feature_values(
+    SparsePartitioner self, intp_t current_feature
+) noexcept nogil:
+    """Simultaneously sort based on the feature_values."""
+    cdef:
+        float32_t[::1] feature_values = self.feature_values
+        intp_t[::1] index_to_samples = self.index_to_samples
+        intp_t[::1] samples = self.samples
+
+    sparse_extract_nnz(self, current_feature)
+    # Sort the positive and negative parts of `feature_values`
+    sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
+    if self.start_positive < self.end:
+        sort(
+            &feature_values[self.start_positive],
+            &samples[self.start_positive],
+            self.end - self.start_positive
+        )
+
+    # Update index_to_samples to take into account the sort
+    for p in range(self.start, self.end_negative):
+        index_to_samples[samples[p]] = p
+    for p in range(self.start_positive, self.end):
+        index_to_samples[samples[p]] = p
+
+    # Add one or two zeros in feature_values, if there is any
+    if self.end_negative < self.start_positive:
+        self.start_positive -= 1
+        feature_values[self.start_positive] = 0.
+
+        if self.end_negative != self.start_positive:
+            feature_values[self.end_negative] = 0.
+            self.end_negative += 1
+
+    # XXX: When sparse supports missing values, this should be set to the
+    # number of missing values for current_feature
+    self.n_missing = 0
+
+
+cdef inline void sparse_find_min_max(
+    Partitioner self,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil:
+    _sparse_find_min_max(
+        <SparsePartitioner>self,
+        current_feature,
+        min_feature_value_out,
+        max_feature_value_out
+    )
+
+cdef inline void _sparse_find_min_max(
+    SparsePartitioner self,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil:
+    """Find the minimum and maximum value for current_feature."""
+    cdef:
+        intp_t p
+        float32_t current_feature_value, min_feature_value, max_feature_value
+        float32_t[::1] feature_values = self.feature_values
+
+    sparse_extract_nnz(self, current_feature)
+
+    if self.end_negative != self.start_positive:
+        # There is a zero
+        min_feature_value = 0
+        max_feature_value = 0
+    else:
+        min_feature_value = feature_values[self.start]
+        max_feature_value = min_feature_value
+
+    # Find min, max in feature_values[start:end_negative]
+    for p in range(self.start, self.end_negative):
+        current_feature_value = feature_values[p]
+
+        if current_feature_value < min_feature_value:
+            min_feature_value = current_feature_value
+        elif current_feature_value > max_feature_value:
+            max_feature_value = current_feature_value
+
+    # Update min, max given feature_values[start_positive:end]
+    for p in range(self.start_positive, self.end):
+        current_feature_value = feature_values[p]
+
+        if current_feature_value < min_feature_value:
+            min_feature_value = current_feature_value
+        elif current_feature_value > max_feature_value:
+            max_feature_value = current_feature_value
+
+    min_feature_value_out[0] = min_feature_value
+    max_feature_value_out[0] = max_feature_value
+
+
+cdef inline void sparse_next_p(Partitioner self, intp_t* p_prev, intp_t* p) noexcept nogil:
+    _sparse_next_p(<SparsePartitioner>self, p_prev, p)
+
+
+cdef inline void _sparse_next_p(SparsePartitioner self, intp_t* p_prev, intp_t* p) noexcept nogil:
+    """Compute the next p_prev and p for iteratiing over feature values."""
+    cdef:
+        intp_t p_next
+        float32_t[::1] feature_values = self.feature_values
+
+    if p[0] + 1 != self.end_negative:
+        p_next = p[0] + 1
+    else:
+        p_next = self.start_positive
+
+    while (p_next < self.end and
+            feature_values[p_next] <= feature_values[p[0]] + FEATURE_THRESHOLD):
+        p[0] = p_next
+        if p[0] + 1 != self.end_negative:
+            p_next = p[0] + 1
+        else:
+            p_next = self.start_positive
+
+    p_prev[0] = p[0]
+    p[0] = p_next
+
+
+cdef inline intp_t sparse_partition_samples(
+    Partitioner self, float64_t current_threshold
+) noexcept nogil:
+    """Partition samples for feature_values at the current_threshold."""
+    return sparse_partition(
+        <SparsePartitioner>self, current_threshold, (<SparsePartitioner>self).start_positive
+    )
+
+
+cdef inline void sparse_partition_samples_final(
+    Partitioner self,
+    intp_t best_pos,
+    float64_t best_threshold,
+    intp_t best_feature,
+    intp_t n_missing,
+) noexcept nogil:
+    """Partition samples for X at the best_threshold and best_feature."""
+    sparse_extract_nnz(<SparsePartitioner>self, best_feature)
+    sparse_partition(<SparsePartitioner>self, best_threshold, best_pos)
+
+
+cdef inline intp_t sparse_partition(SparsePartitioner self, float64_t threshold, intp_t zero_pos) noexcept nogil:
+    """Partition samples[start:end] based on threshold."""
+    cdef:
+        intp_t p, partition_end
+        intp_t[::1] index_to_samples = self.index_to_samples
+        float32_t[::1] feature_values = self.feature_values
+        intp_t[::1] samples = self.samples
+
+    if threshold < 0.:
+        p = self.start
+        partition_end = self.end_negative
+    elif threshold > 0.:
+        p = self.start_positive
+        partition_end = self.end
+    else:
+        # Data are already split
+        return zero_pos
+
+    while p < partition_end:
+        if feature_values[p] <= threshold:
+            p += 1
+
+        else:
+            partition_end -= 1
+
+            feature_values[p], feature_values[partition_end] = (
+                feature_values[partition_end], feature_values[p]
+            )
+            sparse_swap(index_to_samples, samples, p, partition_end)
+
+    return partition_end
+
+
+cdef inline void sparse_extract_nnz(SparsePartitioner self, intp_t feature) noexcept nogil:
+    """Extract and partition values for a given feature.
+
+    The extracted values are partitioned between negative values
+    feature_values[start:end_negative[0]] and positive values
+    feature_values[start_positive[0]:end].
+    The samples and index_to_samples are modified according to this
+    partition.
+
+    The extraction corresponds to the intersection between the arrays
+    X_indices[indptr_start:indptr_end] and samples[start:end].
+    This is done efficiently using either an index_to_samples based approach
+    or binary search based approach.
+
+    Parameters
+    ----------
+    feature : intp_t,
+        Index of the feature we want to extract non zero value.
+    """
+    cdef intp_t[::1] samples = self.samples
+    cdef float32_t[::1] feature_values = self.feature_values
+    cdef intp_t indptr_start = self.X_indptr[feature],
+    cdef intp_t indptr_end = self.X_indptr[feature + 1]
+    cdef intp_t n_indices = <intp_t>(indptr_end - indptr_start)
+    cdef intp_t n_samples = self.end - self.start
+    cdef intp_t[::1] index_to_samples = self.index_to_samples
+    cdef intp_t[::1] sorted_samples = self.sorted_samples
+    cdef const int32_t[::1] X_indices = self.X_indices
+    cdef const float32_t[::1] X_data = self.X_data
+
+    # Use binary search if n_samples * log(n_indices) <
+    # n_indices and index_to_samples approach otherwise.
+    # O(n_samples * log(n_indices)) is the running time of binary
+    # search and O(n_indices) is the running time of index_to_samples
+    # approach.
+    if ((1 - self.is_samples_sorted) * n_samples * log(n_samples) +
+            n_samples * log(n_indices) < EXTRACT_NNZ_SWITCH * n_indices):
+        extract_nnz_binary_search(X_indices, X_data,
+                                    indptr_start, indptr_end,
+                                    samples, self.start, self.end,
+                                    index_to_samples,
+                                    feature_values,
+                                    &self.end_negative, &self.start_positive,
+                                    sorted_samples, &self.is_samples_sorted)
+
+    # Using an index to samples  technique to extract non zero values
+    # index_to_samples is a mapping from X_indices to samples
+    else:
+        extract_nnz_index_to_samples(X_indices, X_data,
+                                        indptr_start, indptr_end,
+                                        samples, self.start, self.end,
+                                        index_to_samples,
+                                        feature_values,
+                                        &self.end_negative, &self.start_positive)
+
+
+cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:
+    """Comparison function for sort.
+
+    This must return an `int` as it is used by stdlib's qsort, which expects
+    an `int` return value.
+    """
+    return <int>((<intp_t*>a)[0] - (<intp_t*>b)[0])
+
+
+cdef inline void binary_search(const int32_t[::1] sorted_array,
+                               int32_t start, int32_t end,
+                               intp_t value, intp_t* index,
+                               int32_t* new_start) noexcept nogil:
+    """Return the index of value in the sorted array.
+
+    If not found, return -1. new_start is the last pivot + 1
+    """
+    cdef int32_t pivot
+    index[0] = -1
+    while start < end:
+        pivot = start + (end - start) / 2
+
+        if sorted_array[pivot] == value:
+            index[0] = pivot
+            start = pivot + 1
+            break
+
+        if sorted_array[pivot] < value:
+            start = pivot + 1
+        else:
+            end = pivot
+    new_start[0] = start
+
+
+cdef inline void extract_nnz_index_to_samples(const int32_t[::1] X_indices,
+                                              const float32_t[::1] X_data,
+                                              int32_t indptr_start,
+                                              int32_t indptr_end,
+                                              intp_t[::1] samples,
+                                              intp_t start,
+                                              intp_t end,
+                                              intp_t[::1] index_to_samples,
+                                              float32_t[::1] feature_values,
+                                              intp_t* end_negative,
+                                              intp_t* start_positive) noexcept nogil:
+    """Extract and partition values for a feature using index_to_samples.
+
+    Complexity is O(indptr_end - indptr_start).
+    """
+    cdef int32_t k
+    cdef intp_t index
+    cdef intp_t end_negative_ = start
+    cdef intp_t start_positive_ = end
+
+    for k in range(indptr_start, indptr_end):
+        if start <= index_to_samples[X_indices[k]] < end:
+            if X_data[k] > 0:
+                start_positive_ -= 1
+                feature_values[start_positive_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, start_positive_)
+
+            elif X_data[k] < 0:
+                feature_values[end_negative_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, end_negative_)
+                end_negative_ += 1
+
+    # Returned values
+    end_negative[0] = end_negative_
+    start_positive[0] = start_positive_
+
+
+cdef inline void extract_nnz_binary_search(const int32_t[::1] X_indices,
+                                           const float32_t[::1] X_data,
+                                           int32_t indptr_start,
+                                           int32_t indptr_end,
+                                           intp_t[::1] samples,
+                                           intp_t start,
+                                           intp_t end,
+                                           intp_t[::1] index_to_samples,
+                                           float32_t[::1] feature_values,
+                                           intp_t* end_negative,
+                                           intp_t* start_positive,
+                                           intp_t[::1] sorted_samples,
+                                           bint* is_samples_sorted) noexcept nogil:
+    """Extract and partition values for a given feature using binary search.
+
+    If n_samples = end - start and n_indices = indptr_end - indptr_start,
+    the complexity is
+
+        O((1 - is_samples_sorted[0]) * n_samples * log(n_samples) +
+          n_samples * log(n_indices)).
+    """
+    cdef intp_t n_samples
+
+    if not is_samples_sorted[0]:
+        n_samples = end - start
+        memcpy(&sorted_samples[start], &samples[start],
+               n_samples * sizeof(intp_t))
+        qsort(&sorted_samples[start], n_samples, sizeof(intp_t),
+              compare_SIZE_t)
+        is_samples_sorted[0] = 1
+
+    while (indptr_start < indptr_end and
+           sorted_samples[start] > X_indices[indptr_start]):
+        indptr_start += 1
+
+    while (indptr_start < indptr_end and
+           sorted_samples[end - 1] < X_indices[indptr_end - 1]):
+        indptr_end -= 1
+
+    cdef intp_t p = start
+    cdef intp_t index
+    cdef intp_t k
+    cdef intp_t end_negative_ = start
+    cdef intp_t start_positive_ = end
+
+    while (p < end and indptr_start < indptr_end):
+        # Find index of sorted_samples[p] in X_indices
+        binary_search(X_indices, indptr_start, indptr_end,
+                      sorted_samples[p], &k, &indptr_start)
+
+        if k != -1:
+            # If k != -1, we have found a non zero value
+
+            if X_data[k] > 0:
+                start_positive_ -= 1
+                feature_values[start_positive_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, start_positive_)
+
+            elif X_data[k] < 0:
+                feature_values[end_negative_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, end_negative_)
+                end_negative_ += 1
+        p += 1
+
+    # Returned values
+    end_negative[0] = end_negative_
+    start_positive[0] = start_positive_
diff --git a/sklearn/tree/_sort.pxd b/sklearn/tree/_sort.pxd
new file mode 100644
index 0000000000000..5a0b3d20d0f35
--- /dev/null
+++ b/sklearn/tree/_sort.pxd
@@ -0,0 +1,13 @@
+from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
+
+
+# Mitigate precision differences between 32 bit and 64 bit
+cdef float32_t FEATURE_THRESHOLD = 1e-7
+
+# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
+# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
+cdef void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil
+
+cdef void swap(float32_t* feature_values, intp_t* samples, intp_t i, intp_t j) noexcept nogil
+cdef void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
+    intp_t pos_1, intp_t pos_2) noexcept nogil
diff --git a/sklearn/tree/_sort.pyx b/sklearn/tree/_sort.pyx
new file mode 100644
index 0000000000000..9a9db6edf6e00
--- /dev/null
+++ b/sklearn/tree/_sort.pyx
@@ -0,0 +1,123 @@
+from ._utils cimport log
+
+
+cdef inline void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
+                             intp_t pos_1, intp_t pos_2) noexcept nogil:
+    """Swap sample pos_1 and pos_2 preserving sparse invariant."""
+    samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1]
+    index_to_samples[samples[pos_1]] = pos_1
+    index_to_samples[samples[pos_2]] = pos_2
+
+
+# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
+# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
+cdef inline void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
+    if n == 0:
+        return
+    cdef intp_t maxd = 2 * <intp_t>log(n)
+    introsort(feature_values, samples, n, maxd)
+
+
+# Introsort with median of 3 pivot selection and 3-way partition function
+# (robust to repeated elements, e.g. lots of zero features).
+cdef void introsort(float32_t* feature_values, intp_t *samples,
+                    intp_t n, intp_t maxd) noexcept nogil:
+    cdef float32_t pivot
+    cdef intp_t i, l, r
+
+    while n > 1:
+        if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
+            heapsort(feature_values, samples, n)
+            return
+        maxd -= 1
+
+        pivot = median3(feature_values, n)
+
+        # Three-way partition.
+        i = l = 0
+        r = n
+        while i < r:
+            if feature_values[i] < pivot:
+                swap(feature_values, samples, i, l)
+                i += 1
+                l += 1
+            elif feature_values[i] > pivot:
+                r -= 1
+                swap(feature_values, samples, i, r)
+            else:
+                i += 1
+
+        introsort(feature_values, samples, l, maxd)
+        feature_values += r
+        samples += r
+        n -= r
+
+
+cdef void heapsort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
+    cdef intp_t start, end
+
+    # heapify
+    start = (n - 2) / 2
+    end = n
+    while True:
+        sift_down(feature_values, samples, start, end)
+        if start == 0:
+            break
+        start -= 1
+
+    # sort by shrinking the heap, putting the max element immediately after it
+    end = n - 1
+    while end > 0:
+        swap(feature_values, samples, 0, end)
+        sift_down(feature_values, samples, 0, end)
+        end = end - 1
+
+
+cdef inline float32_t median3(float32_t* feature_values, intp_t n) noexcept nogil:
+    # Median of three pivot selection, after Bentley and McIlroy (1993).
+    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
+    cdef float32_t a = feature_values[0], b = feature_values[n / 2], c = feature_values[n - 1]
+    if a < b:
+        if b < c:
+            return b
+        elif a < c:
+            return c
+        else:
+            return a
+    elif b < c:
+        if a < c:
+            return a
+        else:
+            return c
+    else:
+        return b
+
+
+cdef inline void swap(float32_t* feature_values, intp_t* samples,
+                      intp_t i, intp_t j) noexcept nogil:
+    # Helper for sort
+    feature_values[i], feature_values[j] = feature_values[j], feature_values[i]
+    samples[i], samples[j] = samples[j], samples[i]
+
+
+cdef inline void sift_down(float32_t* feature_values, intp_t* samples,
+                           intp_t start, intp_t end) noexcept nogil:
+    # Restore heap order in feature_values[start:end] by moving the max element to start.
+    cdef intp_t child, maxind, root
+
+    root = start
+    while True:
+        child = root * 2 + 1
+
+        # find max of root, left child, right child
+        maxind = root
+        if child < end and feature_values[maxind] < feature_values[child]:
+            maxind = child
+        if child + 1 < end and feature_values[maxind] < feature_values[child + 1]:
+            maxind = child + 1
+
+        if maxind == root:
+            break
+        else:
+            swap(feature_values, samples, root, maxind)
+            root = maxind
diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index b630252b329f2..a55cf2786cbef 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -4,6 +4,7 @@
 # See _splitter.pyx for details.
 from libcpp.vector cimport vector
 
+from ._partitioner cimport Partitioner, DensePartitioner, SparsePartitioner
 from ._criterion cimport BaseCriterion, Criterion
 from ._tree cimport ParentInfo
 from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 8bf71765355b3..eb08ec34ea2a2 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -1,30 +1,20 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-from cython cimport final
-from libc.math cimport isnan
-from libc.stdlib cimport qsort
 from libc.string cimport memcpy
 
 from ._criterion cimport Criterion
-from ._utils cimport log
+from ._sort cimport FEATURE_THRESHOLD
 from ._utils cimport rand_int
 from ._utils cimport rand_uniform
 from ._utils cimport RAND_R_MAX
 from ..utils._typedefs cimport int8_t
 
 import numpy as np
-from scipy.sparse import issparse
 
 
 cdef float64_t INFINITY = np.inf
 
-# Mitigate precision differences between 32 bit and 64 bit
-cdef float32_t FEATURE_THRESHOLD = 1e-7
-
-# Constant to switch between algorithm non zero value extract algorithm
-# in SparsePartitioner
-cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
@@ -405,15 +395,6 @@ cdef inline void shift_missing_values_to_left_if_required(
         best.pos += best.n_missing
 
 
-# Introduce a fused-class to make it possible to share the split implementation
-# between the dense and sparse cases in the node_split_best and node_split_random
-# functions. The alternative would have been to use inheritance-based polymorphism
-# but it would have resulted in a ~10% overall tree fitting performance
-# degradation caused by the overhead frequent virtual method lookups.
-ctypedef fused Partitioner:
-    DensePartitioner
-    SparsePartitioner
-
 cdef inline intp_t node_split_best(
     Splitter splitter,
     Partitioner partitioner,
@@ -682,119 +663,6 @@ cdef inline intp_t node_split_best(
     return 0
 
 
-# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
-# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
-    if n == 0:
-        return
-    cdef intp_t maxd = 2 * <intp_t>log(n)
-    introsort(feature_values, samples, n, maxd)
-
-
-cdef inline void swap(float32_t* feature_values, intp_t* samples,
-                      intp_t i, intp_t j) noexcept nogil:
-    # Helper for sort
-    feature_values[i], feature_values[j] = feature_values[j], feature_values[i]
-    samples[i], samples[j] = samples[j], samples[i]
-
-
-cdef inline float32_t median3(float32_t* feature_values, intp_t n) noexcept nogil:
-    # Median of three pivot selection, after Bentley and McIlroy (1993).
-    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
-    cdef float32_t a = feature_values[0], b = feature_values[n / 2], c = feature_values[n - 1]
-    if a < b:
-        if b < c:
-            return b
-        elif a < c:
-            return c
-        else:
-            return a
-    elif b < c:
-        if a < c:
-            return a
-        else:
-            return c
-    else:
-        return b
-
-
-# Introsort with median of 3 pivot selection and 3-way partition function
-# (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(float32_t* feature_values, intp_t *samples,
-                    intp_t n, intp_t maxd) noexcept nogil:
-    cdef float32_t pivot
-    cdef intp_t i, l, r
-
-    while n > 1:
-        if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
-            heapsort(feature_values, samples, n)
-            return
-        maxd -= 1
-
-        pivot = median3(feature_values, n)
-
-        # Three-way partition.
-        i = l = 0
-        r = n
-        while i < r:
-            if feature_values[i] < pivot:
-                swap(feature_values, samples, i, l)
-                i += 1
-                l += 1
-            elif feature_values[i] > pivot:
-                r -= 1
-                swap(feature_values, samples, i, r)
-            else:
-                i += 1
-
-        introsort(feature_values, samples, l, maxd)
-        feature_values += r
-        samples += r
-        n -= r
-
-
-cdef inline void sift_down(float32_t* feature_values, intp_t* samples,
-                           intp_t start, intp_t end) noexcept nogil:
-    # Restore heap order in feature_values[start:end] by moving the max element to start.
-    cdef intp_t child, maxind, root
-
-    root = start
-    while True:
-        child = root * 2 + 1
-
-        # find max of root, left child, right child
-        maxind = root
-        if child < end and feature_values[maxind] < feature_values[child]:
-            maxind = child
-        if child + 1 < end and feature_values[maxind] < feature_values[child + 1]:
-            maxind = child + 1
-
-        if maxind == root:
-            break
-        else:
-            swap(feature_values, samples, root, maxind)
-            root = maxind
-
-
-cdef void heapsort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
-    cdef intp_t start, end
-
-    # heapify
-    start = (n - 2) / 2
-    end = n
-    while True:
-        sift_down(feature_values, samples, start, end)
-        if start == 0:
-            break
-        start -= 1
-
-    # sort by shrinking the heap, putting the max element immediately after it
-    end = n - 1
-    while end > 0:
-        swap(feature_values, samples, 0, end)
-        sift_down(feature_values, samples, 0, end)
-        end = end - 1
-
 cdef inline int node_split_random(
     Splitter splitter,
     Partitioner partitioner,
@@ -982,641 +850,6 @@ cdef inline int node_split_random(
     return 0
 
 
-@final
-cdef class DensePartitioner:
-    """Partitioner specialized for dense data.
-
-    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
-    """
-    cdef:
-        const float32_t[:, :] X
-        cdef intp_t[::1] samples
-        cdef float32_t[::1] feature_values
-        cdef intp_t start
-        cdef intp_t end
-        cdef intp_t n_missing
-        cdef const unsigned char[::1] missing_values_in_feature_mask
-
-    def __init__(
-        self,
-        const float32_t[:, :] X,
-        intp_t[::1] samples,
-        float32_t[::1] feature_values,
-        const unsigned char[::1] missing_values_in_feature_mask,
-    ):
-        self.X = X
-        self.samples = samples
-        self.feature_values = feature_values
-        self.missing_values_in_feature_mask = missing_values_in_feature_mask
-
-    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
-        """Initialize splitter at the beginning of node_split."""
-        self.start = start
-        self.end = end
-        self.n_missing = 0
-
-    cdef inline void sort_samples_and_feature_values(
-        self, intp_t current_feature
-    ) noexcept nogil:
-        """Simultaneously sort based on the feature_values.
-
-        Missing values are stored at the end of feature_values.
-        The number of missing values observed in feature_values is stored
-        in self.n_missing.
-        """
-        cdef:
-            intp_t i, current_end
-            float32_t[::1] feature_values = self.feature_values
-            const float32_t[:, :] X = self.X
-            intp_t[::1] samples = self.samples
-            intp_t n_missing = 0
-            const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
-
-        # Sort samples along that feature; by
-        # copying the values into an array and
-        # sorting the array in a manner which utilizes the cache more
-        # effectively.
-        if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
-            i, current_end = self.start, self.end - 1
-            # Missing values are placed at the end and do not participate in the sorting.
-            while i <= current_end:
-                # Finds the right-most value that is not missing so that
-                # it can be swapped with missing values at its left.
-                if isnan(X[samples[current_end], current_feature]):
-                    n_missing += 1
-                    current_end -= 1
-                    continue
-
-                # X[samples[current_end], current_feature] is a non-missing value
-                if isnan(X[samples[i], current_feature]):
-                    samples[i], samples[current_end] = samples[current_end], samples[i]
-                    n_missing += 1
-                    current_end -= 1
-
-                feature_values[i] = X[samples[i], current_feature]
-                i += 1
-        else:
-            # When there are no missing values, we only need to copy the data into
-            # feature_values
-            for i in range(self.start, self.end):
-                feature_values[i] = X[samples[i], current_feature]
-
-        sort(&feature_values[self.start], &samples[self.start], self.end - self.start - n_missing)
-        self.n_missing = n_missing
-
-    cdef inline void find_min_max(
-        self,
-        intp_t current_feature,
-        float32_t* min_feature_value_out,
-        float32_t* max_feature_value_out,
-    ) noexcept nogil:
-        """Find the minimum and maximum value for current_feature."""
-        cdef:
-            intp_t p
-            float32_t current_feature_value
-            const float32_t[:, :] X = self.X
-            intp_t[::1] samples = self.samples
-            float32_t min_feature_value = X[samples[self.start], current_feature]
-            float32_t max_feature_value = min_feature_value
-            float32_t[::1] feature_values = self.feature_values
-
-        feature_values[self.start] = min_feature_value
-
-        for p in range(self.start + 1, self.end):
-            current_feature_value = X[samples[p], current_feature]
-            feature_values[p] = current_feature_value
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        min_feature_value_out[0] = min_feature_value
-        max_feature_value_out[0] = max_feature_value
-
-    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
-        """Compute the next p_prev and p for iteratiing over feature values.
-
-        The missing values are not included when iterating through the feature values.
-        """
-        cdef:
-            float32_t[::1] feature_values = self.feature_values
-            intp_t end_non_missing = self.end - self.n_missing
-
-        while (
-            p[0] + 1 < end_non_missing and
-            feature_values[p[0] + 1] <= feature_values[p[0]] + FEATURE_THRESHOLD
-        ):
-            p[0] += 1
-
-        p_prev[0] = p[0]
-
-        # By adding 1, we have
-        # (feature_values[p] >= end) or (feature_values[p] > feature_values[p - 1])
-        p[0] += 1
-
-    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
-        """Partition samples for feature_values at the current_threshold."""
-        cdef:
-            intp_t p = self.start
-            intp_t partition_end = self.end
-            intp_t[::1] samples = self.samples
-            float32_t[::1] feature_values = self.feature_values
-
-        while p < partition_end:
-            if feature_values[p] <= current_threshold:
-                p += 1
-            else:
-                partition_end -= 1
-
-                feature_values[p], feature_values[partition_end] = (
-                    feature_values[partition_end], feature_values[p]
-                )
-                samples[p], samples[partition_end] = samples[partition_end], samples[p]
-
-        return partition_end
-
-    cdef inline void partition_samples_final(
-        self,
-        intp_t best_pos,
-        float64_t best_threshold,
-        intp_t best_feature,
-        intp_t best_n_missing,
-    ) noexcept nogil:
-        """Partition samples for X at the best_threshold and best_feature.
-
-        If missing values are present, this method partitions `samples`
-        so that the `best_n_missing` missing values' indices are in the
-        right-most end of `samples`, that is `samples[end_non_missing:end]`.
-        """
-        cdef:
-            # Local invariance: start <= p <= partition_end <= end
-            intp_t start = self.start
-            intp_t p = start
-            intp_t end = self.end - 1
-            intp_t partition_end = end - best_n_missing
-            intp_t[::1] samples = self.samples
-            const float32_t[:, :] X = self.X
-            float32_t current_value
-
-        if best_n_missing != 0:
-            # Move samples with missing values to the end while partitioning the
-            # non-missing samples
-            while p < partition_end:
-                # Keep samples with missing values at the end
-                if isnan(X[samples[end], best_feature]):
-                    end -= 1
-                    continue
-
-                # Swap sample with missing values with the sample at the end
-                current_value = X[samples[p], best_feature]
-                if isnan(current_value):
-                    samples[p], samples[end] = samples[end], samples[p]
-                    end -= 1
-
-                    # The swapped sample at the end is always a non-missing value, so
-                    # we can continue the algorithm without checking for missingness.
-                    current_value = X[samples[p], best_feature]
-
-                # Partition the non-missing samples
-                if current_value <= best_threshold:
-                    p += 1
-                else:
-                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
-                    partition_end -= 1
-        else:
-            # Partitioning routine when there are no missing values
-            while p < partition_end:
-                if X[samples[p], best_feature] <= best_threshold:
-                    p += 1
-                else:
-                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
-                    partition_end -= 1
-
-
-@final
-cdef class SparsePartitioner:
-    """Partitioner specialized for sparse CSC data.
-
-    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
-    """
-    cdef intp_t[::1] samples
-    cdef float32_t[::1] feature_values
-    cdef intp_t start
-    cdef intp_t end
-    cdef intp_t n_missing
-    cdef const unsigned char[::1] missing_values_in_feature_mask
-
-    cdef const float32_t[::1] X_data
-    cdef const int32_t[::1] X_indices
-    cdef const int32_t[::1] X_indptr
-
-    cdef intp_t n_total_samples
-
-    cdef intp_t[::1] index_to_samples
-    cdef intp_t[::1] sorted_samples
-
-    cdef intp_t start_positive
-    cdef intp_t end_negative
-    cdef bint is_samples_sorted
-
-    def __init__(
-        self,
-        object X,
-        intp_t[::1] samples,
-        intp_t n_samples,
-        float32_t[::1] feature_values,
-        const unsigned char[::1] missing_values_in_feature_mask,
-    ):
-        if not (issparse(X) and X.format == "csc"):
-            raise ValueError("X should be in csc format")
-
-        self.samples = samples
-        self.feature_values = feature_values
-
-        # Initialize X
-        cdef intp_t n_total_samples = X.shape[0]
-
-        self.X_data = X.data
-        self.X_indices = X.indices
-        self.X_indptr = X.indptr
-        self.n_total_samples = n_total_samples
-
-        # Initialize auxiliary array used to perform split
-        self.index_to_samples = np.full(n_total_samples, fill_value=-1, dtype=np.intp)
-        self.sorted_samples = np.empty(n_samples, dtype=np.intp)
-
-        cdef intp_t p
-        for p in range(n_samples):
-            self.index_to_samples[samples[p]] = p
-
-        self.missing_values_in_feature_mask = missing_values_in_feature_mask
-
-    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
-        """Initialize splitter at the beginning of node_split."""
-        self.start = start
-        self.end = end
-        self.is_samples_sorted = 0
-        self.n_missing = 0
-
-    cdef inline void sort_samples_and_feature_values(
-        self, intp_t current_feature
-    ) noexcept nogil:
-        """Simultaneously sort based on the feature_values."""
-        cdef:
-            float32_t[::1] feature_values = self.feature_values
-            intp_t[::1] index_to_samples = self.index_to_samples
-            intp_t[::1] samples = self.samples
-
-        self.extract_nnz(current_feature)
-        # Sort the positive and negative parts of `feature_values`
-        sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
-        if self.start_positive < self.end:
-            sort(
-                &feature_values[self.start_positive],
-                &samples[self.start_positive],
-                self.end - self.start_positive
-            )
-
-        # Update index_to_samples to take into account the sort
-        for p in range(self.start, self.end_negative):
-            index_to_samples[samples[p]] = p
-        for p in range(self.start_positive, self.end):
-            index_to_samples[samples[p]] = p
-
-        # Add one or two zeros in feature_values, if there is any
-        if self.end_negative < self.start_positive:
-            self.start_positive -= 1
-            feature_values[self.start_positive] = 0.
-
-            if self.end_negative != self.start_positive:
-                feature_values[self.end_negative] = 0.
-                self.end_negative += 1
-
-        # XXX: When sparse supports missing values, this should be set to the
-        # number of missing values for current_feature
-        self.n_missing = 0
-
-    cdef inline void find_min_max(
-        self,
-        intp_t current_feature,
-        float32_t* min_feature_value_out,
-        float32_t* max_feature_value_out,
-    ) noexcept nogil:
-        """Find the minimum and maximum value for current_feature."""
-        cdef:
-            intp_t p
-            float32_t current_feature_value, min_feature_value, max_feature_value
-            float32_t[::1] feature_values = self.feature_values
-
-        self.extract_nnz(current_feature)
-
-        if self.end_negative != self.start_positive:
-            # There is a zero
-            min_feature_value = 0
-            max_feature_value = 0
-        else:
-            min_feature_value = feature_values[self.start]
-            max_feature_value = min_feature_value
-
-        # Find min, max in feature_values[start:end_negative]
-        for p in range(self.start, self.end_negative):
-            current_feature_value = feature_values[p]
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        # Update min, max given feature_values[start_positive:end]
-        for p in range(self.start_positive, self.end):
-            current_feature_value = feature_values[p]
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        min_feature_value_out[0] = min_feature_value
-        max_feature_value_out[0] = max_feature_value
-
-    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
-        """Compute the next p_prev and p for iteratiing over feature values."""
-        cdef:
-            intp_t p_next
-            float32_t[::1] feature_values = self.feature_values
-
-        if p[0] + 1 != self.end_negative:
-            p_next = p[0] + 1
-        else:
-            p_next = self.start_positive
-
-        while (p_next < self.end and
-                feature_values[p_next] <= feature_values[p[0]] + FEATURE_THRESHOLD):
-            p[0] = p_next
-            if p[0] + 1 != self.end_negative:
-                p_next = p[0] + 1
-            else:
-                p_next = self.start_positive
-
-        p_prev[0] = p[0]
-        p[0] = p_next
-
-    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
-        """Partition samples for feature_values at the current_threshold."""
-        return self._partition(current_threshold, self.start_positive)
-
-    cdef inline void partition_samples_final(
-        self,
-        intp_t best_pos,
-        float64_t best_threshold,
-        intp_t best_feature,
-        intp_t n_missing,
-    ) noexcept nogil:
-        """Partition samples for X at the best_threshold and best_feature."""
-        self.extract_nnz(best_feature)
-        self._partition(best_threshold, best_pos)
-
-    cdef inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil:
-        """Partition samples[start:end] based on threshold."""
-        cdef:
-            intp_t p, partition_end
-            intp_t[::1] index_to_samples = self.index_to_samples
-            float32_t[::1] feature_values = self.feature_values
-            intp_t[::1] samples = self.samples
-
-        if threshold < 0.:
-            p = self.start
-            partition_end = self.end_negative
-        elif threshold > 0.:
-            p = self.start_positive
-            partition_end = self.end
-        else:
-            # Data are already split
-            return zero_pos
-
-        while p < partition_end:
-            if feature_values[p] <= threshold:
-                p += 1
-
-            else:
-                partition_end -= 1
-
-                feature_values[p], feature_values[partition_end] = (
-                    feature_values[partition_end], feature_values[p]
-                )
-                sparse_swap(index_to_samples, samples, p, partition_end)
-
-        return partition_end
-
-    cdef inline void extract_nnz(self, intp_t feature) noexcept nogil:
-        """Extract and partition values for a given feature.
-
-        The extracted values are partitioned between negative values
-        feature_values[start:end_negative[0]] and positive values
-        feature_values[start_positive[0]:end].
-        The samples and index_to_samples are modified according to this
-        partition.
-
-        The extraction corresponds to the intersection between the arrays
-        X_indices[indptr_start:indptr_end] and samples[start:end].
-        This is done efficiently using either an index_to_samples based approach
-        or binary search based approach.
-
-        Parameters
-        ----------
-        feature : intp_t,
-            Index of the feature we want to extract non zero value.
-        """
-        cdef intp_t[::1] samples = self.samples
-        cdef float32_t[::1] feature_values = self.feature_values
-        cdef intp_t indptr_start = self.X_indptr[feature],
-        cdef intp_t indptr_end = self.X_indptr[feature + 1]
-        cdef intp_t n_indices = <intp_t>(indptr_end - indptr_start)
-        cdef intp_t n_samples = self.end - self.start
-        cdef intp_t[::1] index_to_samples = self.index_to_samples
-        cdef intp_t[::1] sorted_samples = self.sorted_samples
-        cdef const int32_t[::1] X_indices = self.X_indices
-        cdef const float32_t[::1] X_data = self.X_data
-
-        # Use binary search if n_samples * log(n_indices) <
-        # n_indices and index_to_samples approach otherwise.
-        # O(n_samples * log(n_indices)) is the running time of binary
-        # search and O(n_indices) is the running time of index_to_samples
-        # approach.
-        if ((1 - self.is_samples_sorted) * n_samples * log(n_samples) +
-                n_samples * log(n_indices) < EXTRACT_NNZ_SWITCH * n_indices):
-            extract_nnz_binary_search(X_indices, X_data,
-                                      indptr_start, indptr_end,
-                                      samples, self.start, self.end,
-                                      index_to_samples,
-                                      feature_values,
-                                      &self.end_negative, &self.start_positive,
-                                      sorted_samples, &self.is_samples_sorted)
-
-        # Using an index to samples  technique to extract non zero values
-        # index_to_samples is a mapping from X_indices to samples
-        else:
-            extract_nnz_index_to_samples(X_indices, X_data,
-                                         indptr_start, indptr_end,
-                                         samples, self.start, self.end,
-                                         index_to_samples,
-                                         feature_values,
-                                         &self.end_negative, &self.start_positive)
-
-
-cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:
-    """Comparison function for sort.
-
-    This must return an `int` as it is used by stdlib's qsort, which expects
-    an `int` return value.
-    """
-    return <int>((<intp_t*>a)[0] - (<intp_t*>b)[0])
-
-
-cdef inline void binary_search(const int32_t[::1] sorted_array,
-                               int32_t start, int32_t end,
-                               intp_t value, intp_t* index,
-                               int32_t* new_start) noexcept nogil:
-    """Return the index of value in the sorted array.
-
-    If not found, return -1. new_start is the last pivot + 1
-    """
-    cdef int32_t pivot
-    index[0] = -1
-    while start < end:
-        pivot = start + (end - start) / 2
-
-        if sorted_array[pivot] == value:
-            index[0] = pivot
-            start = pivot + 1
-            break
-
-        if sorted_array[pivot] < value:
-            start = pivot + 1
-        else:
-            end = pivot
-    new_start[0] = start
-
-
-cdef inline void extract_nnz_index_to_samples(const int32_t[::1] X_indices,
-                                              const float32_t[::1] X_data,
-                                              int32_t indptr_start,
-                                              int32_t indptr_end,
-                                              intp_t[::1] samples,
-                                              intp_t start,
-                                              intp_t end,
-                                              intp_t[::1] index_to_samples,
-                                              float32_t[::1] feature_values,
-                                              intp_t* end_negative,
-                                              intp_t* start_positive) noexcept nogil:
-    """Extract and partition values for a feature using index_to_samples.
-
-    Complexity is O(indptr_end - indptr_start).
-    """
-    cdef int32_t k
-    cdef intp_t index
-    cdef intp_t end_negative_ = start
-    cdef intp_t start_positive_ = end
-
-    for k in range(indptr_start, indptr_end):
-        if start <= index_to_samples[X_indices[k]] < end:
-            if X_data[k] > 0:
-                start_positive_ -= 1
-                feature_values[start_positive_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, start_positive_)
-
-            elif X_data[k] < 0:
-                feature_values[end_negative_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, end_negative_)
-                end_negative_ += 1
-
-    # Returned values
-    end_negative[0] = end_negative_
-    start_positive[0] = start_positive_
-
-
-cdef inline void extract_nnz_binary_search(const int32_t[::1] X_indices,
-                                           const float32_t[::1] X_data,
-                                           int32_t indptr_start,
-                                           int32_t indptr_end,
-                                           intp_t[::1] samples,
-                                           intp_t start,
-                                           intp_t end,
-                                           intp_t[::1] index_to_samples,
-                                           float32_t[::1] feature_values,
-                                           intp_t* end_negative,
-                                           intp_t* start_positive,
-                                           intp_t[::1] sorted_samples,
-                                           bint* is_samples_sorted) noexcept nogil:
-    """Extract and partition values for a given feature using binary search.
-
-    If n_samples = end - start and n_indices = indptr_end - indptr_start,
-    the complexity is
-
-        O((1 - is_samples_sorted[0]) * n_samples * log(n_samples) +
-          n_samples * log(n_indices)).
-    """
-    cdef intp_t n_samples
-
-    if not is_samples_sorted[0]:
-        n_samples = end - start
-        memcpy(&sorted_samples[start], &samples[start],
-               n_samples * sizeof(intp_t))
-        qsort(&sorted_samples[start], n_samples, sizeof(intp_t),
-              compare_SIZE_t)
-        is_samples_sorted[0] = 1
-
-    while (indptr_start < indptr_end and
-           sorted_samples[start] > X_indices[indptr_start]):
-        indptr_start += 1
-
-    while (indptr_start < indptr_end and
-           sorted_samples[end - 1] < X_indices[indptr_end - 1]):
-        indptr_end -= 1
-
-    cdef intp_t p = start
-    cdef intp_t index
-    cdef intp_t k
-    cdef intp_t end_negative_ = start
-    cdef intp_t start_positive_ = end
-
-    while (p < end and indptr_start < indptr_end):
-        # Find index of sorted_samples[p] in X_indices
-        binary_search(X_indices, indptr_start, indptr_end,
-                      sorted_samples[p], &k, &indptr_start)
-
-        if k != -1:
-            # If k != -1, we have found a non zero value
-
-            if X_data[k] > 0:
-                start_positive_ -= 1
-                feature_values[start_positive_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, start_positive_)
-
-            elif X_data[k] < 0:
-                feature_values[end_negative_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, end_negative_)
-                end_negative_ += 1
-        p += 1
-
-    # Returned values
-    end_negative[0] = end_negative_
-    start_positive[0] = start_positive_
-
-
-cdef inline void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
-                             intp_t pos_1, intp_t pos_2) noexcept nogil:
-    """Swap sample pos_1 and pos_2 preserving sparse invariant."""
-    samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1]
-    index_to_samples[samples[pos_1]] = pos_1
-    index_to_samples[samples[pos_2]] = pos_2
-
-
 cdef class BestSplitter(Splitter):
     """Splitter for finding the best split on dense data."""
     cdef DensePartitioner partitioner
diff --git a/sklearn/tree/meson.build b/sklearn/tree/meson.build
index 0fff299e32205..8ed696cd2481e 100644
--- a/sklearn/tree/meson.build
+++ b/sklearn/tree/meson.build
@@ -2,9 +2,15 @@ tree_extension_metadata = {
   '_tree':
     {'sources': ['_tree.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},
+  '_sort':
+    {'sources': ['_sort.pyx'],
+     'override_options': ['cython_language=cpp', 'optimization=3']},
   '_splitter':
     {'sources': ['_splitter.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},
+  '_partitioner':
+    {'sources': ['_partitioner.pyx'],
+     'override_options': ['cython_language=cpp', 'optimization=3']},
   '_criterion':
     {'sources': ['_criterion.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},