@@ -22,8 +22,8 @@ use std::collections::HashSet;
22
22
use std:: sync:: Arc ;
23
23
24
24
use crate :: stats:: Precision ;
25
- use crate :: ScalarValue ;
26
25
use crate :: { Column , Statistics } ;
26
+ use crate :: { ColumnStatistics , ScalarValue } ;
27
27
28
28
/// A source of runtime statistical information to [`PruningPredicate`]s.
29
29
///
@@ -171,15 +171,15 @@ impl PartitionPruningStatistics {
171
171
) -> Self {
172
172
let num_containers = partition_values. len ( ) ;
173
173
let partition_schema = Arc :: new ( Schema :: new ( partition_fields) ) ;
174
- let mut partition_valeus_by_column =
174
+ let mut partition_values_by_column =
175
175
vec ! [ vec![ ] ; partition_schema. fields( ) . len( ) ] ;
176
176
for partition_value in partition_values {
177
177
for ( i, value) in partition_value. into_iter ( ) . enumerate ( ) {
178
- partition_valeus_by_column [ i] . push ( value) ;
178
+ partition_values_by_column [ i] . push ( value) ;
179
179
}
180
180
}
181
181
Self {
182
- partition_values : partition_valeus_by_column ,
182
+ partition_values : partition_values_by_column ,
183
183
num_containers,
184
184
partition_schema,
185
185
}
@@ -225,16 +225,12 @@ impl PruningStatistics for PartitionPruningStatistics {
225
225
) -> Option < BooleanArray > {
226
226
let index = self . partition_schema . index_of ( column. name ( ) ) . ok ( ) ?;
227
227
let partition_values = self . partition_values . get ( index) ?;
228
- let mut contained = Vec :: with_capacity ( self . partition_values . len ( ) ) ;
229
- for partition_value in partition_values {
230
- let contained_value = if values. contains ( partition_value) {
231
- Some ( true )
232
- } else {
233
- Some ( false )
234
- } ;
235
- contained. push ( contained_value) ;
236
- }
237
- let array = BooleanArray :: from ( contained) ;
228
+ let array = BooleanArray :: from (
229
+ partition_values
230
+ . iter ( )
231
+ . map ( |pv| Some ( values. contains ( pv) ) )
232
+ . collect :: < Vec < _ > > ( ) ,
233
+ ) ;
238
234
Some ( array)
239
235
}
240
236
}
@@ -258,73 +254,47 @@ impl PrunableStatistics {
258
254
pub fn new ( statistics : Vec < Arc < Statistics > > , schema : SchemaRef ) -> Self {
259
255
Self { statistics, schema }
260
256
}
261
- }
262
257
263
- impl PruningStatistics for PrunableStatistics {
264
- fn min_values ( & self , column : & Column ) -> Option < ArrayRef > {
258
+ fn get_exact_column_statistics (
259
+ & self ,
260
+ column : & Column ,
261
+ get_stat : impl Fn ( & ColumnStatistics ) -> & Precision < ScalarValue > ,
262
+ ) -> Option < ArrayRef > {
265
263
let index = self . schema . index_of ( column. name ( ) ) . ok ( ) ?;
266
- if self . statistics . iter ( ) . any ( |s| {
264
+ let mut has_value = false ;
265
+ match ScalarValue :: iter_to_array ( self . statistics . iter ( ) . map ( |s| {
267
266
s. column_statistics
268
267
. get ( index)
269
- . is_some_and ( |stat| stat. min_value . is_exact ( ) . unwrap_or ( false ) )
270
- } ) {
271
- match ScalarValue :: iter_to_array ( self . statistics . iter ( ) . map ( |s| {
272
- s. column_statistics
273
- . get ( index)
274
- . and_then ( |stat| {
275
- if let Precision :: Exact ( min) = & stat. min_value {
276
- Some ( min. clone ( ) )
277
- } else {
278
- None
279
- }
280
- } )
281
- . unwrap_or ( ScalarValue :: Null )
282
- } ) ) {
283
- Ok ( array) => Some ( array) ,
284
- Err ( _) => {
285
- log:: warn!(
286
- "Failed to convert min values to array for column {}" ,
287
- column. name( )
288
- ) ;
289
- None
290
- }
268
+ . and_then ( |stat| {
269
+ if let Precision :: Exact ( min) = get_stat ( & stat) {
270
+ has_value = true ;
271
+ Some ( min. clone ( ) )
272
+ } else {
273
+ None
274
+ }
275
+ } )
276
+ . unwrap_or ( ScalarValue :: Null )
277
+ } ) ) {
278
+ // If there is any non-null value and no errors, return the array
279
+ Ok ( array) => has_value. then_some ( array) ,
280
+ Err ( _) => {
281
+ log:: warn!(
282
+ "Failed to convert min values to array for column {}" ,
283
+ column. name( )
284
+ ) ;
285
+ None
291
286
}
292
- } else {
293
- None
294
287
}
295
288
}
289
+ }
290
+
291
+ impl PruningStatistics for PrunableStatistics {
292
+ fn min_values ( & self , column : & Column ) -> Option < ArrayRef > {
293
+ self . get_exact_column_statistics ( column, |stat| & stat. min_value )
294
+ }
296
295
297
296
fn max_values ( & self , column : & Column ) -> Option < ArrayRef > {
298
- let index = self . schema . index_of ( column. name ( ) ) . ok ( ) ?;
299
- if self . statistics . iter ( ) . any ( |s| {
300
- s. column_statistics
301
- . get ( index)
302
- . is_some_and ( |stat| stat. max_value . is_exact ( ) . unwrap_or ( false ) )
303
- } ) {
304
- match ScalarValue :: iter_to_array ( self . statistics . iter ( ) . map ( |s| {
305
- s. column_statistics
306
- . get ( index)
307
- . and_then ( |stat| {
308
- if let Precision :: Exact ( max) = & stat. max_value {
309
- Some ( max. clone ( ) )
310
- } else {
311
- None
312
- }
313
- } )
314
- . unwrap_or ( ScalarValue :: Null )
315
- } ) ) {
316
- Ok ( array) => Some ( array) ,
317
- Err ( _) => {
318
- log:: warn!(
319
- "Failed to convert max values to array for column {}" ,
320
- column. name( )
321
- ) ;
322
- None
323
- }
324
- }
325
- } else {
326
- None
327
- }
297
+ self . get_exact_column_statistics ( column, |stat| & stat. max_value )
328
298
}
329
299
330
300
fn num_containers ( & self ) -> usize {
0 commit comments