@@ -51,25 +51,29 @@ impl ArrayFunctions {
51
51
} else {
52
52
let values = array. values ( ) ;
53
53
let values = values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
54
- let values = values. value_slice (
55
- array. value_offset ( i) as usize ,
56
- array. value_length ( i) as usize ,
57
- ) . to_vec ( ) ;
54
+ let values = values
55
+ . value_slice (
56
+ array. value_offset ( i) as usize ,
57
+ array. value_length ( i) as usize ,
58
+ )
59
+ . to_vec ( ) ;
58
60
let u = values. unique ( ) ;
59
61
// TODO check how nulls are treated here
60
62
u. iter ( ) . for_each ( |x| b. values ( ) . append_value ( * x) . unwrap ( ) ) ;
61
63
}
62
64
}
63
65
Ok ( b. finish ( ) )
64
66
}
65
- pub fn array_except < T > ( a : & ListArray , b : & ListArray ) -> Result < ListArray , ArrowError >
67
+ pub fn array_except < T > ( a : & ListArray , b : & ListArray ) -> Result < ListArray , ArrowError >
66
68
where
67
69
T : ArrowPrimitiveType + ArrowNumericType ,
68
70
T :: Native : std:: cmp:: PartialEq < T :: Native > + std:: cmp:: Ord ,
69
71
{
70
72
// check that lengths of both arrays are equal
71
73
if a. len ( ) != b. len ( ) {
72
- return Err ( ArrowError :: ComputeError ( "Expected array a and b to have the same length" . to_string ( ) ) )
74
+ return Err ( ArrowError :: ComputeError (
75
+ "Expected array a and b to have the same length" . to_string ( ) ,
76
+ ) ) ;
73
77
}
74
78
let values_builder: PrimitiveBuilder < T > = PrimitiveBuilder :: new ( a. values ( ) . len ( ) ) ;
75
79
let mut c = ListBuilder :: new ( values_builder) ;
@@ -80,17 +84,21 @@ impl ArrayFunctions {
80
84
c. append ( true ) ?
81
85
} else {
82
86
let a_values = a. values ( ) ;
83
- let a_values = a_values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
84
- let a_values = a_values. value_slice (
85
- a. value_offset ( i) as usize ,
86
- a. value_length ( i) as usize ,
87
- ) . to_vec ( ) ;
87
+ let a_values = a_values
88
+ . as_any ( )
89
+ . downcast_ref :: < PrimitiveArray < T > > ( )
90
+ . unwrap ( ) ;
91
+ let a_values = a_values
92
+ . value_slice ( a. value_offset ( i) as usize , a. value_length ( i) as usize )
93
+ . to_vec ( ) ;
88
94
let b_values = b. values ( ) ;
89
- let b_values = b_values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
90
- let b_values = b_values. value_slice (
91
- b. value_offset ( i) as usize ,
92
- b. value_length ( i) as usize ,
93
- ) . to_vec ( ) ;
95
+ let b_values = b_values
96
+ . as_any ( )
97
+ . downcast_ref :: < PrimitiveArray < T > > ( )
98
+ . unwrap ( ) ;
99
+ let b_values = b_values
100
+ . value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize )
101
+ . to_vec ( ) ;
94
102
95
103
let u = a_values. uniq ( b_values) ;
96
104
// TODO check how nulls are treated here
@@ -100,14 +108,16 @@ impl ArrayFunctions {
100
108
}
101
109
Ok ( c. finish ( ) )
102
110
}
103
- pub fn array_intersect < T > ( a : & ListArray , b : & ListArray ) -> Result < ListArray , ArrowError >
111
+ pub fn array_intersect < T > ( a : & ListArray , b : & ListArray ) -> Result < ListArray , ArrowError >
104
112
where
105
113
T : ArrowPrimitiveType + ArrowNumericType ,
106
114
T :: Native : std:: cmp:: PartialEq < T :: Native > + std:: cmp:: Ord ,
107
115
{
108
116
// check that lengths of both arrays are equal
109
117
if a. len ( ) != b. len ( ) {
110
- return Err ( ArrowError :: ComputeError ( "Expected array a and b to have the same length" . to_string ( ) ) )
118
+ return Err ( ArrowError :: ComputeError (
119
+ "Expected array a and b to have the same length" . to_string ( ) ,
120
+ ) ) ;
111
121
}
112
122
let values_builder: PrimitiveBuilder < T > = PrimitiveBuilder :: new ( a. values ( ) . len ( ) ) ;
113
123
let mut c = ListBuilder :: new ( values_builder) ;
@@ -118,17 +128,21 @@ impl ArrayFunctions {
118
128
c. append ( true ) ?
119
129
} else {
120
130
let a_values = a. values ( ) ;
121
- let a_values = a_values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
122
- let a_values = a_values. value_slice (
123
- a. value_offset ( i) as usize ,
124
- a. value_length ( i) as usize ,
125
- ) . to_vec ( ) ;
131
+ let a_values = a_values
132
+ . as_any ( )
133
+ . downcast_ref :: < PrimitiveArray < T > > ( )
134
+ . unwrap ( ) ;
135
+ let a_values = a_values
136
+ . value_slice ( a. value_offset ( i) as usize , a. value_length ( i) as usize )
137
+ . to_vec ( ) ;
126
138
let b_values = b. values ( ) ;
127
- let b_values = b_values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
128
- let b_values = b_values. value_slice (
129
- b. value_offset ( i) as usize ,
130
- b. value_length ( i) as usize ,
131
- ) . to_vec ( ) ;
139
+ let b_values = b_values
140
+ . as_any ( )
141
+ . downcast_ref :: < PrimitiveArray < T > > ( )
142
+ . unwrap ( ) ;
143
+ let b_values = b_values
144
+ . value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize )
145
+ . to_vec ( ) ;
132
146
133
147
let u = a_values. intersect ( b_values) ;
134
148
// TODO check how nulls are treated here
@@ -250,7 +264,7 @@ impl ArrayFunctions {
250
264
}
251
265
252
266
/// TODO: extract repetitive code and share with other array fns that use `array_tool` crate
253
- pub fn array_repeat < T > ( array : & ListArray , count : i32 ) -> Result < ListArray , ArrowError >
267
+ pub fn array_repeat < T > ( array : & ListArray , count : i32 ) -> Result < ListArray , ArrowError >
254
268
where
255
269
T : ArrowPrimitiveType + ArrowNumericType ,
256
270
T :: Native : std:: cmp:: PartialEq < T :: Native > + std:: cmp:: Ord ,
@@ -265,10 +279,12 @@ impl ArrayFunctions {
265
279
} else {
266
280
let values = array. values ( ) ;
267
281
let values = values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
268
- let values = values. value_slice (
269
- array. value_offset ( i) as usize ,
270
- array. value_length ( i) as usize ,
271
- ) . to_vec ( ) ;
282
+ let values = values
283
+ . value_slice (
284
+ array. value_offset ( i) as usize ,
285
+ array. value_length ( i) as usize ,
286
+ )
287
+ . to_vec ( ) ;
272
288
273
289
let u = values. times ( count) ;
274
290
// TODO check how nulls are treated here
@@ -279,8 +295,8 @@ impl ArrayFunctions {
279
295
Ok ( c. finish ( ) )
280
296
}
281
297
282
- /// Sorts the input array in ascending order.
283
- ///
298
+ /// Sorts the input array in ascending order.
299
+ ///
284
300
/// TODO: document null treatment, and make it behave like Spark does.
285
301
fn array_sort < T > ( array : & ListArray ) -> Result < ListArray , ArrowError >
286
302
where
@@ -310,14 +326,16 @@ impl ArrayFunctions {
310
326
}
311
327
Ok ( b. finish ( ) )
312
328
}
313
- pub fn array_union < T > ( a : & ListArray , b : & ListArray ) -> Result < ListArray , ArrowError >
329
+ pub fn array_union < T > ( a : & ListArray , b : & ListArray ) -> Result < ListArray , ArrowError >
314
330
where
315
331
T : ArrowPrimitiveType + ArrowNumericType ,
316
332
T :: Native : std:: cmp:: PartialEq < T :: Native > + std:: cmp:: Ord ,
317
333
{
318
334
// check that lengths of both arrays are equal
319
335
if a. len ( ) != b. len ( ) {
320
- return Err ( ArrowError :: ComputeError ( "Expected array a and b to have the same length" . to_string ( ) ) )
336
+ return Err ( ArrowError :: ComputeError (
337
+ "Expected array a and b to have the same length" . to_string ( ) ,
338
+ ) ) ;
321
339
}
322
340
let values_builder: PrimitiveBuilder < T > = PrimitiveBuilder :: new ( a. values ( ) . len ( ) ) ;
323
341
let mut c = ListBuilder :: new ( values_builder) ;
@@ -328,17 +346,21 @@ impl ArrayFunctions {
328
346
c. append ( true ) ?
329
347
} else {
330
348
let a_values = a. values ( ) ;
331
- let a_values = a_values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
332
- let a_values = a_values. value_slice (
333
- a. value_offset ( i) as usize ,
334
- a. value_length ( i) as usize ,
335
- ) . to_vec ( ) ;
349
+ let a_values = a_values
350
+ . as_any ( )
351
+ . downcast_ref :: < PrimitiveArray < T > > ( )
352
+ . unwrap ( ) ;
353
+ let a_values = a_values
354
+ . value_slice ( a. value_offset ( i) as usize , a. value_length ( i) as usize )
355
+ . to_vec ( ) ;
336
356
let b_values = b. values ( ) ;
337
- let b_values = b_values. as_any ( ) . downcast_ref :: < PrimitiveArray < T > > ( ) . unwrap ( ) ;
338
- let b_values = b_values. value_slice (
339
- b. value_offset ( i) as usize ,
340
- b. value_length ( i) as usize ,
341
- ) . to_vec ( ) ;
357
+ let b_values = b_values
358
+ . as_any ( )
359
+ . downcast_ref :: < PrimitiveArray < T > > ( )
360
+ . unwrap ( ) ;
361
+ let b_values = b_values
362
+ . value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize )
363
+ . to_vec ( ) ;
342
364
343
365
let u = a_values. union ( b_values) ;
344
366
// TODO check how nulls are treated here
@@ -370,6 +392,7 @@ mod tests {
370
392
use arrow:: array_data:: * ;
371
393
use arrow:: buffer:: Buffer ;
372
394
use arrow:: datatypes:: * ;
395
+ use std:: sync:: Arc ;
373
396
374
397
#[ test]
375
398
fn test_array_contains_i32s ( ) {
@@ -500,7 +523,10 @@ mod tests {
500
523
501
524
let b = ArrayFunctions :: array_remove :: < Int64Type > ( & list_array, 2 ) . unwrap ( ) ;
502
525
let values = b. values ( ) ;
503
- let values = values. as_any ( ) . downcast_ref :: < PrimitiveArray < Int64Type > > ( ) . unwrap ( ) ;
526
+ let values = values
527
+ . as_any ( )
528
+ . downcast_ref :: < PrimitiveArray < Int64Type > > ( )
529
+ . unwrap ( ) ;
504
530
505
531
assert_eq ! ( 6 , b. len( ) ) ;
506
532
assert_eq ! ( 13 , values. len( ) ) ;
@@ -531,7 +557,56 @@ mod tests {
531
557
532
558
let b = ArrayFunctions :: array_sort :: < Int64Type > ( & list_array) . unwrap ( ) ;
533
559
let values = b. values ( ) ;
534
- let values = values. as_any ( ) . downcast_ref :: < PrimitiveArray < Int64Type > > ( ) . unwrap ( ) ;
560
+ let values = values
561
+ . as_any ( )
562
+ . downcast_ref :: < PrimitiveArray < Int64Type > > ( )
563
+ . unwrap ( ) ;
564
+
565
+ assert_eq ! ( 6 , b. len( ) ) ;
566
+ assert_eq ! ( 16 , values. len( ) ) ;
567
+ assert_eq ! ( 0 , b. value_offset( 0 ) ) ;
568
+ assert_eq ! ( 3 , b. value_offset( 1 ) ) ;
569
+ assert_eq ! ( 6 , b. value_offset( 2 ) ) ;
570
+ assert_eq ! ( 8 , b. value_offset( 3 ) ) ;
571
+ assert_eq ! ( 12 , b. value_offset( 4 ) ) ;
572
+ assert_eq ! ( 14 , b. value_offset( 5 ) ) ;
573
+
574
+ let expected = Int64Array :: from ( vec ! [ 0 , 0 , 0 , 1 , 1 , 2 , 3 , 4 , 1 , 2 , 3 , 5 , 2 , 3 , 3 , 8 ] ) ;
575
+ for i in 0 ..b. len ( ) {
576
+ let x = values. value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize ) ;
577
+ let d = expected. value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize ) ;
578
+ assert_eq ! ( x, d) ;
579
+ }
580
+ }
581
+
582
+ #[ test]
583
+ fn test_array_union ( ) {
584
+ // Construct a value array
585
+ let value_data =
586
+ Int64Array :: from ( vec ! [ 0 , 0 , 0 , 1 , 2 , 1 , 3 , 4 , 5 , 1 , 3 , 2 , 3 , 2 , 8 , 3 ] ) . data ( ) ;
587
+
588
+ let value_offsets = Buffer :: from ( & [ 0 , 3 , 6 , 8 , 12 , 14 , 16 ] . to_byte_slice ( ) ) ;
589
+
590
+ let value_data =
591
+ Int64Array :: from ( vec ! [ 0 , 0 , 0 , 1 , 2 , 1 , 3 , 4 , 5 , 1 , 3 , 2 , 3 , 2 , 8 , 3 ] ) . data ( ) ;
592
+
593
+ let value_offsets = Buffer :: from ( & [ 0 , 3 , 6 , 8 , 12 , 14 , 16 ] . to_byte_slice ( ) ) ;
594
+
595
+ // Construct a list array from the above two
596
+ let list_data_type = DataType :: List ( Box :: new ( DataType :: Int64 ) ) ;
597
+ let list_data = ArrayData :: builder ( list_data_type. clone ( ) )
598
+ . len ( 6 )
599
+ . add_buffer ( value_offsets. clone ( ) )
600
+ . add_child_data ( value_data. clone ( ) )
601
+ . build ( ) ;
602
+ let list_array = ListArray :: from ( list_data) ;
603
+
604
+ let b = ArrayFunctions :: array_sort :: < Int64Type > ( & list_array) . unwrap ( ) ;
605
+ let values = b. values ( ) ;
606
+ let values = values
607
+ . as_any ( )
608
+ . downcast_ref :: < PrimitiveArray < Int64Type > > ( )
609
+ . unwrap ( ) ;
535
610
536
611
assert_eq ! ( 6 , b. len( ) ) ;
537
612
assert_eq ! ( 16 , values. len( ) ) ;
@@ -544,14 +619,8 @@ mod tests {
544
619
545
620
let expected = Int64Array :: from ( vec ! [ 0 , 0 , 0 , 1 , 1 , 2 , 3 , 4 , 1 , 2 , 3 , 5 , 2 , 3 , 3 , 8 ] ) ;
546
621
for i in 0 ..b. len ( ) {
547
- let x = values. value_slice (
548
- b. value_offset ( i) as usize ,
549
- b. value_length ( i) as usize
550
- ) ;
551
- let d = expected. value_slice (
552
- b. value_offset ( i) as usize ,
553
- b. value_length ( i) as usize
554
- ) ;
622
+ let x = values. value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize ) ;
623
+ let d = expected. value_slice ( b. value_offset ( i) as usize , b. value_length ( i) as usize ) ;
555
624
assert_eq ! ( x, d) ;
556
625
}
557
626
}
0 commit comments