Skip to content

Commit fbc3716

Browse files
authored
fix: add support for Decimal and Uuid datum conversion (#1346)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> N/A ## What changes are included in this PR? <!-- Provide a summary of the modifications in this PR. List the main changes such as new features, bug fixes, refactoring, or any other updates. --> This PR is to add support for Iceberg datum to Arrow datum conversion for `Decimal` and `Uuid` types. ## Are these changes tested? <!-- Specify what test covers (unit test, integration test, etc.). If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Yes
1 parent 05969b3 commit fbc3716

File tree

1 file changed

+121
-7
lines changed

1 file changed

+121
-7
lines changed

crates/iceberg/src/arrow/schema.rs

Lines changed: 121 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020
use std::collections::HashMap;
2121
use std::sync::Arc;
2222

23-
use arrow_array::types::{
24-
Decimal128Type, TimestampMicrosecondType, validate_decimal_precision_and_scale,
25-
};
23+
use arrow_array::types::{Decimal128Type, validate_decimal_precision_and_scale};
2624
use arrow_array::{
27-
BooleanArray, Date32Array, Datum as ArrowDatum, Float32Array, Float64Array, Int32Array,
28-
Int64Array, PrimitiveArray, Scalar, StringArray, TimestampMicrosecondArray,
25+
BooleanArray, Date32Array, Datum as ArrowDatum, Decimal128Array, FixedSizeBinaryArray,
26+
Float32Array, Float64Array, Int32Array, Int64Array, Scalar, StringArray,
27+
TimestampMicrosecondArray,
2928
};
3029
use arrow_schema::{DataType, Field, Fields, Schema as ArrowSchema, TimeUnit};
3130
use num_bigint::BigInt;
@@ -677,9 +676,19 @@ pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Arc<dyn ArrowDatum + Send
677676
Ok(Arc::new(TimestampMicrosecondArray::new_scalar(*value)))
678677
}
679678
(PrimitiveType::Timestamptz, PrimitiveLiteral::Long(value)) => Ok(Arc::new(Scalar::new(
680-
PrimitiveArray::<TimestampMicrosecondType>::new(vec![*value; 1].into(), None)
681-
.with_timezone("UTC"),
679+
TimestampMicrosecondArray::new(vec![*value; 1].into(), None).with_timezone_utc(),
682680
))),
681+
(PrimitiveType::Decimal { precision, scale }, PrimitiveLiteral::Int128(value)) => {
682+
let array = Decimal128Array::from_value(*value, 1)
683+
.with_precision_and_scale(*precision as _, *scale as _)
684+
.unwrap();
685+
Ok(Arc::new(Scalar::new(array)))
686+
}
687+
(PrimitiveType::Uuid, PrimitiveLiteral::UInt128(value)) => {
688+
let bytes = Uuid::from_u128(*value).into_bytes();
689+
let array = FixedSizeBinaryArray::try_from_iter(vec![bytes].into_iter()).unwrap();
690+
Ok(Arc::new(Scalar::new(array)))
691+
}
683692

684693
(primitive_type, _) => Err(Error::new(
685694
ErrorKind::FeatureUnsupported,
@@ -1007,6 +1016,7 @@ mod tests {
10071016
use std::sync::Arc;
10081017

10091018
use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
1019+
use rust_decimal::Decimal;
10101020

10111021
use super::*;
10121022
use crate::spec::{Literal, Schema};
@@ -1706,4 +1716,108 @@ mod tests {
17061716
assert_eq!(iceberg_type, arrow_type_to_type(&arrow_type).unwrap());
17071717
}
17081718
}
1719+
1720+
#[test]
1721+
fn test_datum_conversion() {
1722+
{
1723+
let datum = Datum::bool(true);
1724+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1725+
let (array, is_scalar) = arrow_datum.get();
1726+
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
1727+
assert!(is_scalar);
1728+
assert!(array.value(0));
1729+
}
1730+
{
1731+
let datum = Datum::int(42);
1732+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1733+
let (array, is_scalar) = arrow_datum.get();
1734+
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1735+
assert!(is_scalar);
1736+
assert_eq!(array.value(0), 42);
1737+
}
1738+
{
1739+
let datum = Datum::long(42);
1740+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1741+
let (array, is_scalar) = arrow_datum.get();
1742+
let array = array.as_any().downcast_ref::<Int64Array>().unwrap();
1743+
assert!(is_scalar);
1744+
assert_eq!(array.value(0), 42);
1745+
}
1746+
{
1747+
let datum = Datum::float(42.42);
1748+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1749+
let (array, is_scalar) = arrow_datum.get();
1750+
let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
1751+
assert!(is_scalar);
1752+
assert_eq!(array.value(0), 42.42);
1753+
}
1754+
{
1755+
let datum = Datum::double(42.42);
1756+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1757+
let (array, is_scalar) = arrow_datum.get();
1758+
let array = array.as_any().downcast_ref::<Float64Array>().unwrap();
1759+
assert!(is_scalar);
1760+
assert_eq!(array.value(0), 42.42);
1761+
}
1762+
{
1763+
let datum = Datum::string("abc");
1764+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1765+
let (array, is_scalar) = arrow_datum.get();
1766+
let array = array.as_any().downcast_ref::<StringArray>().unwrap();
1767+
assert!(is_scalar);
1768+
assert_eq!(array.value(0), "abc");
1769+
}
1770+
{
1771+
let datum = Datum::date(42);
1772+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1773+
let (array, is_scalar) = arrow_datum.get();
1774+
let array = array.as_any().downcast_ref::<Date32Array>().unwrap();
1775+
assert!(is_scalar);
1776+
assert_eq!(array.value(0), 42);
1777+
}
1778+
{
1779+
let datum = Datum::timestamp_micros(42);
1780+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1781+
let (array, is_scalar) = arrow_datum.get();
1782+
let array = array
1783+
.as_any()
1784+
.downcast_ref::<TimestampMicrosecondArray>()
1785+
.unwrap();
1786+
assert!(is_scalar);
1787+
assert_eq!(array.value(0), 42);
1788+
}
1789+
{
1790+
let datum = Datum::timestamptz_micros(42);
1791+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1792+
let (array, is_scalar) = arrow_datum.get();
1793+
let array = array
1794+
.as_any()
1795+
.downcast_ref::<TimestampMicrosecondArray>()
1796+
.unwrap();
1797+
assert!(is_scalar);
1798+
assert_eq!(array.timezone(), Some("+00:00"));
1799+
assert_eq!(array.value(0), 42);
1800+
}
1801+
{
1802+
let datum = Datum::decimal_with_precision(Decimal::new(123, 2), 30).unwrap();
1803+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1804+
let (array, is_scalar) = arrow_datum.get();
1805+
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
1806+
assert!(is_scalar);
1807+
assert_eq!(array.precision(), 30);
1808+
assert_eq!(array.scale(), 2);
1809+
assert_eq!(array.value(0), 123);
1810+
}
1811+
{
1812+
let datum = Datum::uuid_from_str("42424242-4242-4242-4242-424242424242").unwrap();
1813+
let arrow_datum = get_arrow_datum(&datum).unwrap();
1814+
let (array, is_scalar) = arrow_datum.get();
1815+
let array = array
1816+
.as_any()
1817+
.downcast_ref::<FixedSizeBinaryArray>()
1818+
.unwrap();
1819+
assert!(is_scalar);
1820+
assert_eq!(array.value(0), [66u8; 16]);
1821+
}
1822+
}
17091823
}

0 commit comments

Comments
 (0)