Skip to content

Commit 8b7089f

Browse files
committed
pr feedback
1 parent 53926c5 commit 8b7089f

File tree

1 file changed

+42
-72
lines changed

1 file changed

+42
-72
lines changed

datafusion/common/src/pruning.rs

Lines changed: 42 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use std::collections::HashSet;
2222
use std::sync::Arc;
2323

2424
use crate::stats::Precision;
25-
use crate::ScalarValue;
2625
use crate::{Column, Statistics};
26+
use crate::{ColumnStatistics, ScalarValue};
2727

2828
/// A source of runtime statistical information to [`PruningPredicate`]s.
2929
///
@@ -171,15 +171,15 @@ impl PartitionPruningStatistics {
171171
) -> Self {
172172
let num_containers = partition_values.len();
173173
let partition_schema = Arc::new(Schema::new(partition_fields));
174-
let mut partition_valeus_by_column =
174+
let mut partition_values_by_column =
175175
vec![vec![]; partition_schema.fields().len()];
176176
for partition_value in partition_values {
177177
for (i, value) in partition_value.into_iter().enumerate() {
178-
partition_valeus_by_column[i].push(value);
178+
partition_values_by_column[i].push(value);
179179
}
180180
}
181181
Self {
182-
partition_values: partition_valeus_by_column,
182+
partition_values: partition_values_by_column,
183183
num_containers,
184184
partition_schema,
185185
}
@@ -225,16 +225,12 @@ impl PruningStatistics for PartitionPruningStatistics {
225225
) -> Option<BooleanArray> {
226226
let index = self.partition_schema.index_of(column.name()).ok()?;
227227
let partition_values = self.partition_values.get(index)?;
228-
let mut contained = Vec::with_capacity(self.partition_values.len());
229-
for partition_value in partition_values {
230-
let contained_value = if values.contains(partition_value) {
231-
Some(true)
232-
} else {
233-
Some(false)
234-
};
235-
contained.push(contained_value);
236-
}
237-
let array = BooleanArray::from(contained);
228+
let array = BooleanArray::from(
229+
partition_values
230+
.iter()
231+
.map(|pv| Some(values.contains(pv)))
232+
.collect::<Vec<_>>(),
233+
);
238234
Some(array)
239235
}
240236
}
@@ -258,73 +254,47 @@ impl PrunableStatistics {
258254
pub fn new(statistics: Vec<Arc<Statistics>>, schema: SchemaRef) -> Self {
259255
Self { statistics, schema }
260256
}
261-
}
262257

263-
impl PruningStatistics for PrunableStatistics {
264-
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
258+
fn get_exact_column_statistics(
259+
&self,
260+
column: &Column,
261+
get_stat: impl Fn(&ColumnStatistics) -> &Precision<ScalarValue>,
262+
) -> Option<ArrayRef> {
265263
let index = self.schema.index_of(column.name()).ok()?;
266-
if self.statistics.iter().any(|s| {
264+
let mut has_value = false;
265+
match ScalarValue::iter_to_array(self.statistics.iter().map(|s| {
267266
s.column_statistics
268267
.get(index)
269-
.is_some_and(|stat| stat.min_value.is_exact().unwrap_or(false))
270-
}) {
271-
match ScalarValue::iter_to_array(self.statistics.iter().map(|s| {
272-
s.column_statistics
273-
.get(index)
274-
.and_then(|stat| {
275-
if let Precision::Exact(min) = &stat.min_value {
276-
Some(min.clone())
277-
} else {
278-
None
279-
}
280-
})
281-
.unwrap_or(ScalarValue::Null)
282-
})) {
283-
Ok(array) => Some(array),
284-
Err(_) => {
285-
log::warn!(
286-
"Failed to convert min values to array for column {}",
287-
column.name()
288-
);
289-
None
290-
}
268+
.and_then(|stat| {
269+
if let Precision::Exact(min) = get_stat(&stat) {
270+
has_value = true;
271+
Some(min.clone())
272+
} else {
273+
None
274+
}
275+
})
276+
.unwrap_or(ScalarValue::Null)
277+
})) {
278+
// If there is any non-null value and no errors, return the array
279+
Ok(array) => has_value.then_some(array),
280+
Err(_) => {
281+
log::warn!(
282+
"Failed to convert min values to array for column {}",
283+
column.name()
284+
);
285+
None
291286
}
292-
} else {
293-
None
294287
}
295288
}
289+
}
290+
291+
impl PruningStatistics for PrunableStatistics {
292+
fn min_values(&self, column: &Column) -> Option<ArrayRef> {
293+
self.get_exact_column_statistics(column, |stat| &stat.min_value)
294+
}
296295

297296
fn max_values(&self, column: &Column) -> Option<ArrayRef> {
298-
let index = self.schema.index_of(column.name()).ok()?;
299-
if self.statistics.iter().any(|s| {
300-
s.column_statistics
301-
.get(index)
302-
.is_some_and(|stat| stat.max_value.is_exact().unwrap_or(false))
303-
}) {
304-
match ScalarValue::iter_to_array(self.statistics.iter().map(|s| {
305-
s.column_statistics
306-
.get(index)
307-
.and_then(|stat| {
308-
if let Precision::Exact(max) = &stat.max_value {
309-
Some(max.clone())
310-
} else {
311-
None
312-
}
313-
})
314-
.unwrap_or(ScalarValue::Null)
315-
})) {
316-
Ok(array) => Some(array),
317-
Err(_) => {
318-
log::warn!(
319-
"Failed to convert max values to array for column {}",
320-
column.name()
321-
);
322-
None
323-
}
324-
}
325-
} else {
326-
None
327-
}
297+
self.get_exact_column_statistics(column, |stat| &stat.max_value)
328298
}
329299

330300
fn num_containers(&self) -> usize {

0 commit comments

Comments
 (0)