15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- use arrow:: array:: { Array , UInt64Array } ;
18
+ use arrow:: array:: { Array , NullArray , UInt64Array } ;
19
19
use arrow:: array:: { ArrayRef , BooleanArray } ;
20
20
use arrow:: datatypes:: { FieldRef , Schema , SchemaRef } ;
21
21
use std:: collections:: HashSet ;
@@ -173,7 +173,10 @@ impl PartitionPruningStatistics {
173
173
let num_containers = partition_values. len ( ) ;
174
174
let partition_schema = Arc :: new ( Schema :: new ( partition_fields) ) ;
175
175
let mut partition_values_by_column =
176
- vec ! [ vec![ ] ; partition_schema. fields( ) . len( ) ] ;
176
+ vec ! [
177
+ Vec :: with_capacity( partition_values. len( ) ) ;
178
+ partition_schema. fields( ) . len( )
179
+ ] ;
177
180
for partition_value in partition_values {
178
181
for ( i, value) in partition_value. into_iter ( ) . enumerate ( ) {
179
182
partition_values_by_column[ i] . push ( value) ;
@@ -182,7 +185,13 @@ impl PartitionPruningStatistics {
182
185
Ok ( Self {
183
186
partition_values : partition_values_by_column
184
187
. into_iter ( )
185
- . map ( |v| ScalarValue :: iter_to_array ( v) )
188
+ . map ( |v| {
189
+ if v. is_empty ( ) {
190
+ Ok ( Arc :: new ( NullArray :: new ( 0 ) ) as ArrayRef )
191
+ } else {
192
+ ScalarValue :: iter_to_array ( v)
193
+ }
194
+ } )
186
195
. collect :: < Result < Vec < _ > , _ > > ( ) ?,
187
196
num_containers,
188
197
partition_schema,
@@ -193,7 +202,18 @@ impl PartitionPruningStatistics {
193
202
impl PruningStatistics for PartitionPruningStatistics {
194
203
fn min_values ( & self , column : & Column ) -> Option < ArrayRef > {
195
204
let index = self . partition_schema . index_of ( column. name ( ) ) . ok ( ) ?;
196
- self . partition_values . get ( index) . map ( |v| Arc :: clone ( v) )
205
+ self . partition_values
206
+ . get ( index)
207
+ . map ( |v| {
208
+ if v. is_empty ( ) || v. null_count ( ) == v. len ( ) {
209
+ // If the array is empty or all nulls, return None
210
+ None
211
+ } else {
212
+ // Otherwise, return the array as is
213
+ Some ( Arc :: clone ( v) )
214
+ }
215
+ } )
216
+ . flatten ( )
197
217
}
198
218
199
219
fn max_values ( & self , column : & Column ) -> Option < ArrayRef > {
@@ -219,10 +239,20 @@ impl PruningStatistics for PartitionPruningStatistics {
219
239
) -> Option < BooleanArray > {
220
240
let index = self . partition_schema . index_of ( column. name ( ) ) . ok ( ) ?;
221
241
let array = self . partition_values . get ( index) ?;
222
- let values_array = ScalarValue :: iter_to_array ( values. iter ( ) . cloned ( ) ) . ok ( ) ?;
223
- let boolean_array =
224
- arrow:: compute:: kernels:: cmp:: eq ( array, & values_array) . ok ( ) ?;
225
- if boolean_array. null_count ( ) == boolean_array. len ( ) {
242
+ let boolean_arrays = values
243
+ . iter ( )
244
+ . map ( |v| {
245
+ let arrow_value = v. to_scalar ( ) ?;
246
+ arrow:: compute:: kernels:: cmp:: eq ( array, & arrow_value)
247
+ } )
248
+ . collect :: < Result < Vec < _ > , _ > > ( )
249
+ . ok ( ) ?;
250
+ let boolean_array = boolean_arrays. into_iter ( ) . reduce ( |acc, arr| {
251
+ arrow:: compute:: kernels:: boolean:: and ( & acc, & arr)
252
+ . expect ( "arrays are known to have equal lengths" )
253
+ } ) ?;
254
+ // If the boolean array is empty or all null values, return None
255
+ if boolean_array. is_empty ( ) || boolean_array. null_count ( ) == boolean_array. len ( ) {
226
256
None
227
257
} else {
228
258
Some ( boolean_array)
@@ -549,9 +579,7 @@ mod tests {
549
579
550
580
// Contained values are all empty
551
581
let values = HashSet :: from ( [ ScalarValue :: from ( 1i32 ) ] ) ;
552
- let contained_a = partition_stats. contained ( & column_a, & values) ;
553
- let expected_contained_a = BooleanArray :: from ( Vec :: < Option < bool > > :: new ( ) ) ;
554
- assert_eq ! ( contained_a, Some ( expected_contained_a) ) ;
582
+ assert ! ( partition_stats. contained( & column_a, & values) . is_none( ) ) ;
555
583
}
556
584
557
585
#[ test]
0 commit comments