|
36 | 36 | crate::schema::{
|
37 | 37 | ARROW_EXT_TYPE_BITMAP, ARROW_EXT_TYPE_EMPTY_ARRAY, ARROW_EXT_TYPE_EMPTY_MAP,
|
38 | 38 | ARROW_EXT_TYPE_GEOGRAPHY, ARROW_EXT_TYPE_GEOMETRY, ARROW_EXT_TYPE_INTERVAL,
|
39 |
| - ARROW_EXT_TYPE_VARIANT, EXTENSION_KEY, |
| 39 | + ARROW_EXT_TYPE_VARIANT, ARROW_EXT_TYPE_VECTOR, EXTENSION_KEY, |
40 | 40 | },
|
41 | 41 | arrow_array::{
|
42 | 42 | Array as ArrowArray, BinaryArray, BooleanArray, Date32Array, Decimal128Array,
|
@@ -93,6 +93,7 @@ pub enum Value {
|
93 | 93 | Geometry(String),
|
94 | 94 | Geography(String),
|
95 | 95 | Interval(String),
|
| 96 | + Vector(Vec<f32>), |
96 | 97 | }
|
97 | 98 |
|
98 | 99 | impl Value {
|
@@ -145,6 +146,7 @@ impl Value {
|
145 | 146 | Self::Variant(_) => DataType::Variant,
|
146 | 147 | Self::Geometry(_) => DataType::Geometry,
|
147 | 148 | Self::Geography(_) => DataType::Geography,
|
| 149 | + Self::Vector(v) => DataType::Vector(v.len() as u64), |
148 | 150 | }
|
149 | 151 | }
|
150 | 152 | }
|
@@ -229,7 +231,7 @@ impl TryFrom<(&DataType, String)> for Value {
|
229 | 231 | DataType::Geometry => Ok(Self::Geometry(v)),
|
230 | 232 | DataType::Geography => Ok(Self::Geography(v)),
|
231 | 233 | DataType::Interval => Ok(Self::Interval(v)),
|
232 |
| - DataType::Array(_) | DataType::Map(_) | DataType::Tuple(_) => { |
| 234 | + DataType::Array(_) | DataType::Map(_) | DataType::Tuple(_) | DataType::Vector(_) => { |
233 | 235 | let mut reader = Cursor::new(v.as_str());
|
234 | 236 | let decoder = ValueDecoder {};
|
235 | 237 | decoder.read_field(t, &mut reader)
|
@@ -329,6 +331,50 @@ impl TryFrom<(&ArrowField, &Arc<dyn ArrowArray>, usize)> for Value {
|
329 | 331 | None => Err(ConvertError::new("geography", format!("{array:?}")).into()),
|
330 | 332 | }
|
331 | 333 | }
|
| 334 | + ARROW_EXT_TYPE_VECTOR => { |
| 335 | + if field.is_nullable() && array.is_null(seq) { |
| 336 | + return Ok(Value::Null); |
| 337 | + } |
| 338 | + match field.data_type() { |
| 339 | + ArrowDataType::FixedSizeList(_, dimension) => { |
| 340 | + match array |
| 341 | + .as_any() |
| 342 | + .downcast_ref::<arrow_array::FixedSizeListArray>() |
| 343 | + { |
| 344 | + Some(inner_array) => { |
| 345 | + match inner_array |
| 346 | + .value(seq) |
| 347 | + .as_any() |
| 348 | + .downcast_ref::<Float32Array>() |
| 349 | + { |
| 350 | + Some(inner_array) => { |
| 351 | + let dimension = *dimension as usize; |
| 352 | + let mut values = Vec::with_capacity(dimension); |
| 353 | + for i in 0..dimension { |
| 354 | + let value = inner_array.value(i); |
| 355 | + values.push(value); |
| 356 | + } |
| 357 | + Ok(Value::Vector(values)) |
| 358 | + } |
| 359 | + None => Err(ConvertError::new( |
| 360 | + "vector float32", |
| 361 | + format!("{inner_array:?}"), |
| 362 | + ) |
| 363 | + .into()), |
| 364 | + } |
| 365 | + } |
| 366 | + None => { |
| 367 | + Err(ConvertError::new("vector", format!("{array:?}")).into()) |
| 368 | + } |
| 369 | + } |
| 370 | + } |
| 371 | + arrow_type => Err(ConvertError::new( |
| 372 | + "vector", |
| 373 | + format!("Unsupported Arrow type: {arrow_type:?}"), |
| 374 | + ) |
| 375 | + .into()), |
| 376 | + } |
| 377 | + } |
332 | 378 | _ => Err(ConvertError::new(
|
333 | 379 | "extension",
|
334 | 380 | format!("Unsupported extension datatype for arrow field: {field:?}"),
|
@@ -890,6 +936,17 @@ fn encode_value(f: &mut std::fmt::Formatter<'_>, val: &Value, raw: bool) -> std:
|
890 | 936 | write!(f, ")")?;
|
891 | 937 | Ok(())
|
892 | 938 | }
|
| 939 | + Value::Vector(vals) => { |
| 940 | + write!(f, "[")?; |
| 941 | + for (i, val) in vals.iter().enumerate() { |
| 942 | + if i > 0 { |
| 943 | + write!(f, ",")?; |
| 944 | + } |
| 945 | + write!(f, "{val}")?; |
| 946 | + } |
| 947 | + write!(f, "]")?; |
| 948 | + Ok(()) |
| 949 | + } |
893 | 950 | }
|
894 | 951 | }
|
895 | 952 |
|
@@ -1608,6 +1665,7 @@ impl ValueDecoder {
|
1608 | 1665 | DataType::Array(inner_ty) => self.read_array(inner_ty.as_ref(), reader),
|
1609 | 1666 | DataType::Map(inner_ty) => self.read_map(inner_ty.as_ref(), reader),
|
1610 | 1667 | DataType::Tuple(inner_tys) => self.read_tuple(inner_tys.as_ref(), reader),
|
| 1668 | + DataType::Vector(dimension) => self.read_vector(*dimension as usize, reader), |
1611 | 1669 | DataType::Nullable(inner_ty) => self.read_nullable(inner_ty.as_ref(), reader),
|
1612 | 1670 | }
|
1613 | 1671 | }
|
@@ -1812,6 +1870,26 @@ impl ValueDecoder {
|
1812 | 1870 | Ok(Value::Array(vals))
|
1813 | 1871 | }
|
1814 | 1872 |
|
| 1873 | + fn read_vector<R: AsRef<[u8]>>( |
| 1874 | + &self, |
| 1875 | + dimension: usize, |
| 1876 | + reader: &mut Cursor<R>, |
| 1877 | + ) -> Result<Value> { |
| 1878 | + let mut vals = Vec::with_capacity(dimension); |
| 1879 | + reader.must_ignore_byte(b'[')?; |
| 1880 | + for idx in 0..dimension { |
| 1881 | + let _ = reader.ignore_white_spaces(); |
| 1882 | + if idx > 0 { |
| 1883 | + reader.must_ignore_byte(b',')?; |
| 1884 | + } |
| 1885 | + let _ = reader.ignore_white_spaces(); |
| 1886 | + let val: f32 = reader.read_float_text()?; |
| 1887 | + vals.push(val); |
| 1888 | + } |
| 1889 | + reader.must_ignore_byte(b']')?; |
| 1890 | + Ok(Value::Vector(vals)) |
| 1891 | + } |
| 1892 | + |
1815 | 1893 | fn read_map<R: AsRef<[u8]>>(&self, ty: &DataType, reader: &mut Cursor<R>) -> Result<Value> {
|
1816 | 1894 | const KEY: usize = 0;
|
1817 | 1895 | const VALUE: usize = 1;
|
|
0 commit comments