Skip to content

Commit 7a5155c

Browse files
authored
Add support for Utf8View in arrow_string::length (#6345)
* Add support for Utf8View in arrow_string::length #6305 * Cargo fmt.
1 parent b368437 commit 7a5155c

File tree

1 file changed

+32
-3
lines changed

1 file changed

+32
-3
lines changed

arrow-string/src/length.rs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ fn bit_length_impl<P: ArrowPrimitiveType>(
4848
/// For list array, length is the number of elements in each list.
4949
/// For string array and binary array, length is the number of bytes of each value.
5050
///
51-
/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray,
51+
/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray,
5252
/// or DictionaryArray with above Arrays as values
5353
/// * length of null is null.
5454
pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
@@ -74,6 +74,14 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
7474
let list = array.as_string::<i64>();
7575
Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
7676
}
77+
DataType::Utf8View => {
78+
let list = array.as_string_view();
79+
let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
80+
Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
81+
v.into(),
82+
list.nulls().cloned(),
83+
)))
84+
}
7785
DataType::Binary => {
7886
let list = array.as_binary::<i32>();
7987
Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
@@ -147,9 +155,15 @@ mod tests {
147155

148156
fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
149157
// a large array
150-
let values = ["one", "on", "o", ""];
158+
let values = [
159+
"one",
160+
"on",
161+
"o",
162+
"",
163+
"this is a longer string to test string array with",
164+
];
151165
let values = values.into_iter().cycle().take(4096).collect();
152-
let expected = [3, 2, 1, 0].into_iter().cycle().take(4096).collect();
166+
let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
153167

154168
vec![
155169
(vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
@@ -210,6 +224,21 @@ mod tests {
210224
})
211225
}
212226

227+
#[test]
228+
fn length_test_string_view() {
229+
length_cases_string()
230+
.into_iter()
231+
.for_each(|(input, len, expected)| {
232+
let array = StringViewArray::from(input);
233+
let result = length(&array).unwrap();
234+
assert_eq!(len, result.len());
235+
let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
236+
expected.iter().enumerate().for_each(|(i, value)| {
237+
assert_eq!(*value, result.value(i));
238+
});
239+
})
240+
}
241+
213242
#[test]
214243
fn length_test_binary() {
215244
let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];

0 commit comments

Comments
 (0)