Skip to content
This repository was archived by the owner on Dec 29, 2021. It is now read-only.

Commit c3980af

Browse files
committed
update to more recent branch, with temporal support
1 parent 70159b2 commit c3980af

File tree

5 files changed

+141
-70
lines changed

5 files changed

+141
-70
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ authors = ["Neville Dipale <nevilledips@gmail.com>"]
55
edition = "2018"
66

77
[dependencies]
8-
arrow = { git = "https://github.com/nevi-me/arrow", branch="rust/json-reader"}
8+
arrow = { git = "https://github.com/nevi-me/arrow", branch="arrow-4386"}
99
# arrow = { path = "../../arrow/rust/arrow"}
1010
num = "0.2"
1111
num-traits = "0.2"

src/functions/aggregate.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use arrow::array::Array;
22
use arrow::array::{Int64Array, PrimitiveArray};
3-
use arrow::array_ops;
3+
use arrow::compute;
44
use arrow::datatypes::ArrowNumericType;
55
use arrow::datatypes::ArrowPrimitiveType;
66
use arrow::datatypes::Int64Type;
@@ -16,7 +16,7 @@ impl AggregateFunctions {
1616
{
1717
arrays
1818
.iter()
19-
.map(|array| array_ops::max(array).unwrap())
19+
.map(|array| compute::max(array).unwrap())
2020
.max()
2121
}
2222
pub fn min<T>(arrays: Vec<&PrimitiveArray<T>>) -> Option<T::Native>
@@ -26,14 +26,14 @@ impl AggregateFunctions {
2626
{
2727
arrays
2828
.iter()
29-
.map(|array| array_ops::max(array).unwrap())
29+
.map(|array| compute::max(array).unwrap())
3030
.max()
3131
}
3232
// pub fn avg<T>(array: &PrimitiveArray<T>) -> Option<f64>
3333
// where
3434
// T: ArrowNumericType
3535
// {
36-
// let sum = array_ops::sum(array);
36+
// let sum = compute::sum(array);
3737
// match sum {
3838
// None => None,
3939
// Some(sum) => {
@@ -67,7 +67,7 @@ impl AggregateFunctions {
6767
let mut sum = T::default_value();
6868
arrays
6969
.iter()
70-
.for_each(|array| sum = sum + array_ops::sum(array).unwrap_or(T::default_value()));
70+
.for_each(|array| sum = sum + compute::sum(array).unwrap_or(T::default_value()));
7171

7272
Some(sum)
7373
}

src/functions/array.rs

Lines changed: 126 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,29 @@ impl ArrayFunctions {
5151
} else {
5252
let values = array.values();
5353
let values = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
54-
let values = values.value_slice(
55-
array.value_offset(i) as usize,
56-
array.value_length(i) as usize,
57-
).to_vec();
54+
let values = values
55+
.value_slice(
56+
array.value_offset(i) as usize,
57+
array.value_length(i) as usize,
58+
)
59+
.to_vec();
5860
let u = values.unique();
5961
// TODO check how nulls are treated here
6062
u.iter().for_each(|x| b.values().append_value(*x).unwrap());
6163
}
6264
}
6365
Ok(b.finish())
6466
}
65-
pub fn array_except<T>(a: &ListArray, b: &ListArray) -> Result<ListArray, ArrowError>
67+
pub fn array_except<T>(a: &ListArray, b: &ListArray) -> Result<ListArray, ArrowError>
6668
where
6769
T: ArrowPrimitiveType + ArrowNumericType,
6870
T::Native: std::cmp::PartialEq<T::Native> + std::cmp::Ord,
6971
{
7072
// check that lengths of both arrays are equal
7173
if a.len() != b.len() {
72-
return Err(ArrowError::ComputeError("Expected array a and b to have the same length".to_string()))
74+
return Err(ArrowError::ComputeError(
75+
"Expected array a and b to have the same length".to_string(),
76+
));
7377
}
7478
let values_builder: PrimitiveBuilder<T> = PrimitiveBuilder::new(a.values().len());
7579
let mut c = ListBuilder::new(values_builder);
@@ -80,17 +84,21 @@ impl ArrayFunctions {
8084
c.append(true)?
8185
} else {
8286
let a_values = a.values();
83-
let a_values = a_values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
84-
let a_values = a_values.value_slice(
85-
a.value_offset(i) as usize,
86-
a.value_length(i) as usize,
87-
).to_vec();
87+
let a_values = a_values
88+
.as_any()
89+
.downcast_ref::<PrimitiveArray<T>>()
90+
.unwrap();
91+
let a_values = a_values
92+
.value_slice(a.value_offset(i) as usize, a.value_length(i) as usize)
93+
.to_vec();
8894
let b_values = b.values();
89-
let b_values = b_values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
90-
let b_values = b_values.value_slice(
91-
b.value_offset(i) as usize,
92-
b.value_length(i) as usize,
93-
).to_vec();
95+
let b_values = b_values
96+
.as_any()
97+
.downcast_ref::<PrimitiveArray<T>>()
98+
.unwrap();
99+
let b_values = b_values
100+
.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize)
101+
.to_vec();
94102

95103
let u = a_values.uniq(b_values);
96104
// TODO check how nulls are treated here
@@ -100,14 +108,16 @@ impl ArrayFunctions {
100108
}
101109
Ok(c.finish())
102110
}
103-
pub fn array_intersect<T>(a: &ListArray, b: &ListArray) -> Result<ListArray, ArrowError>
111+
pub fn array_intersect<T>(a: &ListArray, b: &ListArray) -> Result<ListArray, ArrowError>
104112
where
105113
T: ArrowPrimitiveType + ArrowNumericType,
106114
T::Native: std::cmp::PartialEq<T::Native> + std::cmp::Ord,
107115
{
108116
// check that lengths of both arrays are equal
109117
if a.len() != b.len() {
110-
return Err(ArrowError::ComputeError("Expected array a and b to have the same length".to_string()))
118+
return Err(ArrowError::ComputeError(
119+
"Expected array a and b to have the same length".to_string(),
120+
));
111121
}
112122
let values_builder: PrimitiveBuilder<T> = PrimitiveBuilder::new(a.values().len());
113123
let mut c = ListBuilder::new(values_builder);
@@ -118,17 +128,21 @@ impl ArrayFunctions {
118128
c.append(true)?
119129
} else {
120130
let a_values = a.values();
121-
let a_values = a_values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
122-
let a_values = a_values.value_slice(
123-
a.value_offset(i) as usize,
124-
a.value_length(i) as usize,
125-
).to_vec();
131+
let a_values = a_values
132+
.as_any()
133+
.downcast_ref::<PrimitiveArray<T>>()
134+
.unwrap();
135+
let a_values = a_values
136+
.value_slice(a.value_offset(i) as usize, a.value_length(i) as usize)
137+
.to_vec();
126138
let b_values = b.values();
127-
let b_values = b_values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
128-
let b_values = b_values.value_slice(
129-
b.value_offset(i) as usize,
130-
b.value_length(i) as usize,
131-
).to_vec();
139+
let b_values = b_values
140+
.as_any()
141+
.downcast_ref::<PrimitiveArray<T>>()
142+
.unwrap();
143+
let b_values = b_values
144+
.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize)
145+
.to_vec();
132146

133147
let u = a_values.intersect(b_values);
134148
// TODO check how nulls are treated here
@@ -250,7 +264,7 @@ impl ArrayFunctions {
250264
}
251265

252266
/// TODO: extract repetitive code and share with other array fns that use `array_tool` crate
253-
pub fn array_repeat<T>(array: &ListArray, count: i32) -> Result<ListArray, ArrowError>
267+
pub fn array_repeat<T>(array: &ListArray, count: i32) -> Result<ListArray, ArrowError>
254268
where
255269
T: ArrowPrimitiveType + ArrowNumericType,
256270
T::Native: std::cmp::PartialEq<T::Native> + std::cmp::Ord,
@@ -265,10 +279,12 @@ impl ArrayFunctions {
265279
} else {
266280
let values = array.values();
267281
let values = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
268-
let values = values.value_slice(
269-
array.value_offset(i) as usize,
270-
array.value_length(i) as usize,
271-
).to_vec();
282+
let values = values
283+
.value_slice(
284+
array.value_offset(i) as usize,
285+
array.value_length(i) as usize,
286+
)
287+
.to_vec();
272288

273289
let u = values.times(count);
274290
// TODO check how nulls are treated here
@@ -279,8 +295,8 @@ impl ArrayFunctions {
279295
Ok(c.finish())
280296
}
281297

282-
/// Sorts the input array in ascending order.
283-
///
298+
/// Sorts the input array in ascending order.
299+
///
284300
/// TODO: document null treatment, and make it behave like Spark does.
285301
fn array_sort<T>(array: &ListArray) -> Result<ListArray, ArrowError>
286302
where
@@ -310,14 +326,16 @@ impl ArrayFunctions {
310326
}
311327
Ok(b.finish())
312328
}
313-
pub fn array_union<T>(a: &ListArray, b: &ListArray) -> Result<ListArray, ArrowError>
329+
pub fn array_union<T>(a: &ListArray, b: &ListArray) -> Result<ListArray, ArrowError>
314330
where
315331
T: ArrowPrimitiveType + ArrowNumericType,
316332
T::Native: std::cmp::PartialEq<T::Native> + std::cmp::Ord,
317333
{
318334
// check that lengths of both arrays are equal
319335
if a.len() != b.len() {
320-
return Err(ArrowError::ComputeError("Expected array a and b to have the same length".to_string()))
336+
return Err(ArrowError::ComputeError(
337+
"Expected array a and b to have the same length".to_string(),
338+
));
321339
}
322340
let values_builder: PrimitiveBuilder<T> = PrimitiveBuilder::new(a.values().len());
323341
let mut c = ListBuilder::new(values_builder);
@@ -328,17 +346,21 @@ impl ArrayFunctions {
328346
c.append(true)?
329347
} else {
330348
let a_values = a.values();
331-
let a_values = a_values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
332-
let a_values = a_values.value_slice(
333-
a.value_offset(i) as usize,
334-
a.value_length(i) as usize,
335-
).to_vec();
349+
let a_values = a_values
350+
.as_any()
351+
.downcast_ref::<PrimitiveArray<T>>()
352+
.unwrap();
353+
let a_values = a_values
354+
.value_slice(a.value_offset(i) as usize, a.value_length(i) as usize)
355+
.to_vec();
336356
let b_values = b.values();
337-
let b_values = b_values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
338-
let b_values = b_values.value_slice(
339-
b.value_offset(i) as usize,
340-
b.value_length(i) as usize,
341-
).to_vec();
357+
let b_values = b_values
358+
.as_any()
359+
.downcast_ref::<PrimitiveArray<T>>()
360+
.unwrap();
361+
let b_values = b_values
362+
.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize)
363+
.to_vec();
342364

343365
let u = a_values.union(b_values);
344366
// TODO check how nulls are treated here
@@ -370,6 +392,7 @@ mod tests {
370392
use arrow::array_data::*;
371393
use arrow::buffer::Buffer;
372394
use arrow::datatypes::*;
395+
use std::sync::Arc;
373396

374397
#[test]
375398
fn test_array_contains_i32s() {
@@ -500,7 +523,10 @@ mod tests {
500523

501524
let b = ArrayFunctions::array_remove::<Int64Type>(&list_array, 2).unwrap();
502525
let values = b.values();
503-
let values = values.as_any().downcast_ref::<PrimitiveArray<Int64Type>>().unwrap();
526+
let values = values
527+
.as_any()
528+
.downcast_ref::<PrimitiveArray<Int64Type>>()
529+
.unwrap();
504530

505531
assert_eq!(6, b.len());
506532
assert_eq!(13, values.len());
@@ -531,7 +557,56 @@ mod tests {
531557

532558
let b = ArrayFunctions::array_sort::<Int64Type>(&list_array).unwrap();
533559
let values = b.values();
534-
let values = values.as_any().downcast_ref::<PrimitiveArray<Int64Type>>().unwrap();
560+
let values = values
561+
.as_any()
562+
.downcast_ref::<PrimitiveArray<Int64Type>>()
563+
.unwrap();
564+
565+
assert_eq!(6, b.len());
566+
assert_eq!(16, values.len());
567+
assert_eq!(0, b.value_offset(0));
568+
assert_eq!(3, b.value_offset(1));
569+
assert_eq!(6, b.value_offset(2));
570+
assert_eq!(8, b.value_offset(3));
571+
assert_eq!(12, b.value_offset(4));
572+
assert_eq!(14, b.value_offset(5));
573+
574+
let expected = Int64Array::from(vec![0, 0, 0, 1, 1, 2, 3, 4, 1, 2, 3, 5, 2, 3, 3, 8]);
575+
for i in 0..b.len() {
576+
let x = values.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize);
577+
let d = expected.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize);
578+
assert_eq!(x, d);
579+
}
580+
}
581+
582+
#[test]
583+
fn test_array_union() {
584+
// Construct a value array
585+
let value_data =
586+
Int64Array::from(vec![0, 0, 0, 1, 2, 1, 3, 4, 5, 1, 3, 2, 3, 2, 8, 3]).data();
587+
588+
let value_offsets = Buffer::from(&[0, 3, 6, 8, 12, 14, 16].to_byte_slice());
589+
590+
let value_data =
591+
Int64Array::from(vec![0, 0, 0, 1, 2, 1, 3, 4, 5, 1, 3, 2, 3, 2, 8, 3]).data();
592+
593+
let value_offsets = Buffer::from(&[0, 3, 6, 8, 12, 14, 16].to_byte_slice());
594+
595+
// Construct a list array from the above two
596+
let list_data_type = DataType::List(Box::new(DataType::Int64));
597+
let list_data = ArrayData::builder(list_data_type.clone())
598+
.len(6)
599+
.add_buffer(value_offsets.clone())
600+
.add_child_data(value_data.clone())
601+
.build();
602+
let list_array = ListArray::from(list_data);
603+
604+
let b = ArrayFunctions::array_sort::<Int64Type>(&list_array).unwrap();
605+
let values = b.values();
606+
let values = values
607+
.as_any()
608+
.downcast_ref::<PrimitiveArray<Int64Type>>()
609+
.unwrap();
535610

536611
assert_eq!(6, b.len());
537612
assert_eq!(16, values.len());
@@ -544,14 +619,8 @@ mod tests {
544619

545620
let expected = Int64Array::from(vec![0, 0, 0, 1, 1, 2, 3, 4, 1, 2, 3, 5, 2, 3, 3, 8]);
546621
for i in 0..b.len() {
547-
let x = values.value_slice(
548-
b.value_offset(i) as usize,
549-
b.value_length(i) as usize
550-
);
551-
let d = expected.value_slice(
552-
b.value_offset(i) as usize,
553-
b.value_length(i) as usize
554-
);
622+
let x = values.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize);
623+
let d = expected.value_slice(b.value_offset(i) as usize, b.value_length(i) as usize);
555624
assert_eq!(x, d);
556625
}
557626
}

src/functions/scalar.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use arrow::array::*;
2-
use arrow::array_ops;
2+
use arrow::compute;
33
use arrow::builder::*;
44
use arrow::datatypes::*;
55
use arrow::error::ArrowError;
@@ -29,7 +29,7 @@ impl ScalarFunctions {
2929
{
3030
left.iter()
3131
.zip(right.iter())
32-
.map(|(a, b)| array_ops::add(a, b).into())
32+
.map(|(a, b)| compute::add(a, b).into())
3333
.collect()
3434
}
3535
/// Subtract two columns of `PrimitiveArray` type together
@@ -47,7 +47,7 @@ impl ScalarFunctions {
4747
{
4848
left.iter()
4949
.zip(right.iter())
50-
.map(|(a, b)| array_ops::subtract(a, b).into())
50+
.map(|(a, b)| compute::subtract(a, b).into())
5151
.collect()
5252
}
5353
pub fn divide<T>(
@@ -64,7 +64,7 @@ impl ScalarFunctions {
6464
{
6565
left.iter()
6666
.zip(right.iter())
67-
.map(|(a, b)| array_ops::divide(a, b).into())
67+
.map(|(a, b)| compute::divide(a, b).into())
6868
.collect()
6969
}
7070
pub fn multiply<T>(
@@ -81,7 +81,7 @@ impl ScalarFunctions {
8181
{
8282
left.iter()
8383
.zip(right.iter())
84-
.map(|(a, b)| array_ops::multiply(a, b).into())
84+
.map(|(a, b)| compute::multiply(a, b).into())
8585
.collect()
8686
}
8787

0 commit comments

Comments
 (0)