From f25b499f81150d624c03afc162cf16a47345a714 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Thu, 2 Oct 2025 22:15:45 -0400 Subject: [PATCH 01/26] [Variant] Support variant to `Decimal32/64/128/256` --- .../src/type_conversion.rs | 52 ++++++++++- parquet-variant-compute/src/variant_get.rs | 88 ++++++++++++++++++- .../src/variant_to_arrow.rs | 70 ++++++++++++++- 3 files changed, 201 insertions(+), 9 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 5dda1855297a..a2e09e065f1c 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -18,7 +18,7 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. use arrow::datatypes::{self, ArrowPrimitiveType}; -use parquet_variant::Variant; +use parquet_variant::{Variant, VariantDecimal4}; /// Options for controlling the behavior of `cast_to_variant_with_options`. #[derive(Debug, Clone, PartialEq, Eq)] @@ -61,6 +61,50 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16); impl_primitive_from_variant!(datatypes::Float32Type, as_f32); impl_primitive_from_variant!(datatypes::Float64Type, as_f64); +pub(crate) fn scale_variant_decimal4(variant: &VariantDecimal4, output_scale: i8) -> Option { + let input_scale = variant.scale() as i8; + if input_scale == output_scale { + Some(variant.integer()) + } else if input_scale < output_scale { + scale_up_variant_decimal4(variant, output_scale) + } else { + scale_down_variant_decimal4(variant, output_scale) + } +} + +fn scale_up_variant_decimal4(variant: &VariantDecimal4, output_scale: i8) -> Option { + // scale_up means output has more fractional digits than input + // multiply integer by 10^(output_scale - input_scale) + let input_scale = variant.scale() as i8; + let delta_scale = output_scale - input_scale; + let mul = 10i32.checked_pow(delta_scale as u32)?; + variant.integer().checked_mul(mul) +} + +fn scale_down_variant_decimal4(variant: &VariantDecimal4, output_scale: i8) -> Option { + // scale_down means output has fewer fractional digits than input + // divide by 10^(input_scale - output_scale) with rounding + let input_scale = variant.scale() as i8; + let delta_scale = input_scale - output_scale; + let div = 10i32.checked_pow(delta_scale as u32)?; + + let v = variant.integer(); + let d = v.checked_div(div)?; + let r = v % div; + + // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast + let half = div.checked_div(2)?; + let half_neg = half.checked_neg()?; + + let adjusted = match v >= 0 { + true if r >= half => d.checked_add(1)?, + false if r <= half_neg => d.checked_sub(1)?, + _ => d, + }; + + Some(adjusted) +} + /// Convert the value at a specific index in the given array into a `Variant`. macro_rules! non_generic_conversion_single_value { ($array:expr, $cast_fn:expr, $index:expr) => {{ @@ -109,8 +153,10 @@ macro_rules! decimal_to_variant_decimal { let (v, scale) = if *$scale < 0 { // For negative scale, we need to multiply the value by 10^|scale| // For example: 123 with scale -2 becomes 12300 with scale 0 - let multiplier = <$value_type>::pow(10, (-*$scale) as u32); - (<$value_type>::checked_mul($v, multiplier), 0u8) + let v = (10 as $value_type) + .checked_pow((-*$scale) as u32) + .and_then(|m| m.checked_mul($v)); + (v, 0u8) } else { (Some($v), *$scale as u8) }; diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 977507af42a3..1faad017978f 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -300,15 +300,15 @@ mod test { use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; use crate::VariantArray; use arrow::array::{ - Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Float32Array, Float64Array, - Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray, + Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Decimal32Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray, }; use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; use arrow::datatypes::DataType::{Int16, Int32, Int64}; use arrow_schema::{DataType, Field, FieldRef, Fields}; use chrono::DateTime; - use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES}; + use parquet_variant::{Variant, VariantDecimal4, VariantPath, EMPTY_VARIANT_METADATA_BYTES}; fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) { // Create input array from JSON string @@ -2674,4 +2674,86 @@ mod test { Arc::new(struct_array) } + + #[test] + fn get_decimal32_unshredded_var_scales_to_scale2() { + // Build unshredded variant values with different scales + let mut builder = crate::VariantArrayBuilder::new(4); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())); // 12.34 + builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 3).unwrap())); // 1.234 + builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 0).unwrap())); // 1234 + builder.append_null(); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal32(9, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 9); + assert_eq!(result.scale(), 2); + assert_eq!(result.value(0), 1234); + assert_eq!(result.value(1), 123); + assert_eq!(result.value(2), 123400); + assert!(result.is_null(3)); + } + + #[test] + fn get_decimal32_unshredded_scale_down_rounding() { + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 3).unwrap())); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal32(9, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.value(0), 124); + assert_eq!(result.value(1), 125); + assert_eq!(result.value(2), -124); + assert_eq!(result.value(3), -125); + } + + #[test] + fn get_decimal32_precision_overflow_safe() { + // Exceed Decimal32 max precision (9) after scaling + let mut builder = crate::VariantArrayBuilder::new(1); + // 999,999,999 (9 digits) at scale 0 -> to scale 2 becomes 99,999,999,900 (11 digits) overflows + builder.append_variant(Variant::from( + VariantDecimal4::try_new(999_999_999, 0).unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal32(9, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert!(result.is_null(0)); + } + + #[test] + fn get_decimal32_precision_overflow_unsafe_errors() { + let mut builder = crate::VariantArrayBuilder::new(1); + builder.append_variant(Variant::from( + VariantDecimal4::try_new(999_999_999, 0).unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal32(9, 2), true); + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let options = GetOptions::new() + .with_as_type(Some(FieldRef::from(field))) + .with_cast_options(cast_options); + let err = variant_get(&variant_array, options).unwrap_err(); + + assert!(err.to_string().contains("Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4")); + } } diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 50249aa63d20..959747880a6c 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -17,11 +17,13 @@ use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; use arrow::compute::CastOptions; -use arrow::datatypes::{self, ArrowPrimitiveType, DataType}; +use arrow::datatypes::{ + self, is_validate_decimal32_precision, ArrowPrimitiveType, DataType, Decimal32Type, +}; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; -use crate::type_conversion::PrimitiveFromVariant; +use crate::type_conversion::{scale_variant_decimal4, PrimitiveFromVariant}; use crate::{VariantArray, VariantValueArrayBuilder}; use std::sync::Arc; @@ -49,6 +51,7 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> { /// with casting of leaf values to specific types. pub(crate) enum VariantToArrowRowBuilder<'a> { Primitive(PrimitiveVariantToArrowRowBuilder<'a>), + Decimal32(VariantToDecimal32ArrowRowBuilder<'a>), BinaryVariant(VariantToBinaryVariantArrowRowBuilder), // Path extraction wrapper - contains a boxed enum for any of the above @@ -113,6 +116,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.append_null(), + Decimal32(b) => b.append_null(), BinaryVariant(b) => b.append_null(), WithPath(path_builder) => path_builder.append_null(), } @@ -122,6 +126,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.append_value(&value), + Decimal32(b) => b.append_value(&value), BinaryVariant(b) => b.append_value(value), WithPath(path_builder) => path_builder.append_value(value), } @@ -131,6 +136,7 @@ impl<'a> VariantToArrowRowBuilder<'a> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.finish(), + Decimal32(b) => b.finish(), BinaryVariant(b) => b.finish(), WithPath(path_builder) => path_builder.finish(), } @@ -235,6 +241,9 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>( "Converting unshredded variant arrays to arrow lists".to_string(), )); } + Some(DataType::Decimal32(precision, scale)) => Decimal32( + VariantToDecimal32ArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), Some(data_type) => { let builder = make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; @@ -299,7 +308,7 @@ fn get_type_name() -> &'static str { /// Builder for converting variant values to primitive values pub(crate) struct VariantToPrimitiveArrowRowBuilder<'a, T: PrimitiveFromVariant> { - builder: arrow::array::PrimitiveBuilder, + builder: PrimitiveBuilder, cast_options: &'a CastOptions<'a>, } @@ -342,6 +351,61 @@ impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> { } } +/// Builder for converting variant values to Decimal32 values +pub(crate) struct VariantToDecimal32ArrowRowBuilder<'a> { + builder: PrimitiveBuilder, + cast_options: &'a CastOptions<'a>, + precision: u8, + scale: i8, +} + +impl<'a> VariantToDecimal32ArrowRowBuilder<'a> { + fn new( + cast_options: &'a CastOptions<'a>, + capacity: usize, + precision: u8, + scale: i8, + ) -> Result { + let builder = PrimitiveBuilder::::with_capacity(capacity) + .with_precision_and_scale(precision, scale)?; + Ok(Self { + builder, + cast_options, + precision, + scale, + }) + } + + fn append_null(&mut self) -> Result<()> { + self.builder.append_null(); + Ok(()) + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + let maybe_scaled = value + .as_decimal4() + .and_then(|decimal| scale_variant_decimal4(&decimal, self.scale)) + .filter(|scaled| is_validate_decimal32_precision(*scaled, self.precision)); + + if let Some(scaled) = maybe_scaled { + self.builder.append_value(scaled); + Ok(true) + } else if self.cast_options.safe { + self.builder.append_null(); + Ok(false) + } else { + Err(ArrowError::CastError(format!( + "Failed to cast to Decimal32(precision={}, scale={}) from variant {:?}", + self.precision, self.scale, value + ))) + } + } + + fn finish(mut self) -> Result { + Ok(Arc::new(self.builder.finish())) + } +} + /// Builder for creating VariantArray output (for path extraction without type conversion) pub(crate) struct VariantToBinaryVariantArrowRowBuilder { metadata: BinaryViewArray, From 7a32191fe61d1adfe60fcf8948dc9d3fa60cedf5 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Sat, 4 Oct 2025 10:16:02 -0400 Subject: [PATCH 02/26] Simplify logic --- .../src/type_conversion.rs | 71 +++++++++---------- .../src/variant_to_arrow.rs | 9 +-- 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index a2e09e065f1c..0e9fb8d0f7d4 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,7 +17,7 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::datatypes::{self, ArrowPrimitiveType}; +use arrow::datatypes::{self, is_validate_decimal32_precision, ArrowPrimitiveType}; use parquet_variant::{Variant, VariantDecimal4}; /// Options for controlling the behavior of `cast_to_variant_with_options`. @@ -61,48 +61,45 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16); impl_primitive_from_variant!(datatypes::Float32Type, as_f32); impl_primitive_from_variant!(datatypes::Float64Type, as_f64); -pub(crate) fn scale_variant_decimal4(variant: &VariantDecimal4, output_scale: i8) -> Option { +pub(crate) fn scale_variant_decimal( + variant: &VariantDecimal4, + output_scale: i8, + precision: u8, +) -> Option { let input_scale = variant.scale() as i8; - if input_scale == output_scale { + let scaled = if input_scale == output_scale { Some(variant.integer()) } else if input_scale < output_scale { - scale_up_variant_decimal4(variant, output_scale) + // scale_up means output has more fractional digits than input + // multiply integer by 10^(output_scale - input_scale) + let input_scale = variant.scale() as i8; + let delta_scale = output_scale - input_scale; + let mul = 10i32.checked_pow(delta_scale as u32)?; + variant.integer().checked_mul(mul) } else { - scale_down_variant_decimal4(variant, output_scale) - } -} - -fn scale_up_variant_decimal4(variant: &VariantDecimal4, output_scale: i8) -> Option { - // scale_up means output has more fractional digits than input - // multiply integer by 10^(output_scale - input_scale) - let input_scale = variant.scale() as i8; - let delta_scale = output_scale - input_scale; - let mul = 10i32.checked_pow(delta_scale as u32)?; - variant.integer().checked_mul(mul) -} - -fn scale_down_variant_decimal4(variant: &VariantDecimal4, output_scale: i8) -> Option { - // scale_down means output has fewer fractional digits than input - // divide by 10^(input_scale - output_scale) with rounding - let input_scale = variant.scale() as i8; - let delta_scale = input_scale - output_scale; - let div = 10i32.checked_pow(delta_scale as u32)?; - - let v = variant.integer(); - let d = v.checked_div(div)?; - let r = v % div; - - // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast - let half = div.checked_div(2)?; - let half_neg = half.checked_neg()?; - - let adjusted = match v >= 0 { - true if r >= half => d.checked_add(1)?, - false if r <= half_neg => d.checked_sub(1)?, - _ => d, + // scale_down means output has fewer fractional digits than input + // divide by 10^(input_scale - output_scale) with rounding + let input_scale = variant.scale() as i8; + let delta_scale = input_scale - output_scale; + let div = 10i32.checked_pow(delta_scale as u32)?; + + let v = variant.integer(); + let d = v.checked_div(div)?; + let r = v % div; + + // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast + let half = div.checked_div(2)?; + let half_neg = half.checked_neg()?; + + let adjusted = match v >= 0 { + true if r >= half => d.checked_add(1)?, + false if r <= half_neg => d.checked_sub(1)?, + _ => d, + }; + Some(adjusted) }; - Some(adjusted) + scaled.filter(|v| is_validate_decimal32_precision(*v, precision)) } /// Convert the value at a specific index in the given array into a `Variant`. diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 959747880a6c..6bc2a6535958 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -17,13 +17,11 @@ use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; use arrow::compute::CastOptions; -use arrow::datatypes::{ - self, is_validate_decimal32_precision, ArrowPrimitiveType, DataType, Decimal32Type, -}; +use arrow::datatypes::{self, ArrowPrimitiveType, DataType, Decimal32Type}; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; -use crate::type_conversion::{scale_variant_decimal4, PrimitiveFromVariant}; +use crate::type_conversion::{scale_variant_decimal, PrimitiveFromVariant}; use crate::{VariantArray, VariantValueArrayBuilder}; use std::sync::Arc; @@ -384,8 +382,7 @@ impl<'a> VariantToDecimal32ArrowRowBuilder<'a> { fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { let maybe_scaled = value .as_decimal4() - .and_then(|decimal| scale_variant_decimal4(&decimal, self.scale)) - .filter(|scaled| is_validate_decimal32_precision(*scaled, self.precision)); + .and_then(|decimal| scale_variant_decimal(&decimal, self.scale, self.precision)); if let Some(scaled) = maybe_scaled { self.builder.append_value(scaled); From 02d29dec58d7f00ada0e06ee55bb23056e8970e9 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Sat, 4 Oct 2025 11:08:42 -0400 Subject: [PATCH 03/26] Using macro to generalize --- .../src/type_conversion.rs | 82 +++++++++---------- .../src/variant_to_arrow.rs | 15 +++- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 0e9fb8d0f7d4..62360328d54a 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,8 +17,8 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::datatypes::{self, is_validate_decimal32_precision, ArrowPrimitiveType}; -use parquet_variant::{Variant, VariantDecimal4}; +use arrow::datatypes::{self, ArrowPrimitiveType}; +use parquet_variant::Variant; /// Options for controlling the behavior of `cast_to_variant_with_options`. #[derive(Debug, Clone, PartialEq, Eq)] @@ -61,46 +61,46 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16); impl_primitive_from_variant!(datatypes::Float32Type, as_f32); impl_primitive_from_variant!(datatypes::Float64Type, as_f64); -pub(crate) fn scale_variant_decimal( - variant: &VariantDecimal4, - output_scale: i8, - precision: u8, -) -> Option { - let input_scale = variant.scale() as i8; - let scaled = if input_scale == output_scale { - Some(variant.integer()) - } else if input_scale < output_scale { - // scale_up means output has more fractional digits than input - // multiply integer by 10^(output_scale - input_scale) - let input_scale = variant.scale() as i8; - let delta_scale = output_scale - input_scale; - let mul = 10i32.checked_pow(delta_scale as u32)?; - variant.integer().checked_mul(mul) - } else { - // scale_down means output has fewer fractional digits than input - // divide by 10^(input_scale - output_scale) with rounding - let input_scale = variant.scale() as i8; - let delta_scale = input_scale - output_scale; - let div = 10i32.checked_pow(delta_scale as u32)?; - - let v = variant.integer(); - let d = v.checked_div(div)?; - let r = v % div; - - // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast - let half = div.checked_div(2)?; - let half_neg = half.checked_neg()?; - - let adjusted = match v >= 0 { - true if r >= half => d.checked_add(1)?, - false if r <= half_neg => d.checked_sub(1)?, - _ => d, - }; - Some(adjusted) - }; - - scaled.filter(|v| is_validate_decimal32_precision(*v, precision)) +macro_rules! scale_variant_decimal { + ($variant:expr, $variant_method:ident, $output_scale:expr, $int_ty:ty, $validate:path, $precision:expr) => {{ + (|| -> Option<$int_ty> { + let variant = $variant.$variant_method()?; + let input_scale = variant.scale() as i8; + let ten: $int_ty = 10 as $int_ty; + + let scaled: Option<$int_ty> = if input_scale == $output_scale { + Some(variant.integer()) + } else if input_scale < $output_scale { + // scale_up means output has more fractional digits than input + // multiply integer by 10^(output_scale - input_scale) + let delta = ($output_scale - input_scale) as u32; + let mul = ten.checked_pow(delta)?; + variant.integer().checked_mul(mul) + } else { + // scale_down means output has fewer fractional digits than input + // divide by 10^(input_scale - output_scale) with rounding + let delta = (input_scale - $output_scale) as u32; + let div = ten.checked_pow(delta)?; + let v = variant.integer(); + let d = v.checked_div(div)?; + let r = v % div; + + let half = div.checked_div(2)?; + let half_neg = half.checked_neg()?; + + let adjusted = match v >= 0 { + true if r >= half => d.checked_add(1)?, + false if r <= half_neg => d.checked_sub(1)?, + _ => d, + }; + Some(adjusted) + }; + + scaled.filter(|v| $validate(*v, $precision)) + })() + }}; } +pub(crate) use scale_variant_decimal; /// Convert the value at a specific index in the given array into a `Variant`. macro_rules! non_generic_conversion_single_value { diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 6bc2a6535958..01522a6f9c77 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -17,7 +17,9 @@ use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; use arrow::compute::CastOptions; -use arrow::datatypes::{self, ArrowPrimitiveType, DataType, Decimal32Type}; +use arrow::datatypes::{ + self, is_validate_decimal32_precision, ArrowPrimitiveType, DataType, Decimal32Type, +}; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; @@ -380,9 +382,14 @@ impl<'a> VariantToDecimal32ArrowRowBuilder<'a> { } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - let maybe_scaled = value - .as_decimal4() - .and_then(|decimal| scale_variant_decimal(&decimal, self.scale, self.precision)); + let maybe_scaled = scale_variant_decimal!( + value, + as_decimal4, + self.scale, + i32, + is_validate_decimal32_precision, + self.precision + ); if let Some(scaled) = maybe_scaled { self.builder.append_value(scaled); From f498db56dbde1f2ab297f6bc78d82709e939fbd9 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Sat, 4 Oct 2025 12:19:41 -0400 Subject: [PATCH 04/26] Support i256 and Decimal256 --- .../src/type_conversion.rs | 23 +- parquet-variant-compute/src/variant_get.rs | 305 +++++++++++++++++- .../src/variant_to_arrow.rs | 225 ++++++++++++- 3 files changed, 523 insertions(+), 30 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 62360328d54a..d329a0439394 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -62,35 +62,36 @@ impl_primitive_from_variant!(datatypes::Float32Type, as_f32); impl_primitive_from_variant!(datatypes::Float64Type, as_f64); macro_rules! scale_variant_decimal { - ($variant:expr, $variant_method:ident, $output_scale:expr, $int_ty:ty, $validate:path, $precision:expr) => {{ - (|| -> Option<$int_ty> { + ($variant:expr, $variant_method:ident, $to_int_ty:expr, $output_scale:expr, $precision:expr, $validate:path) => {{ + (|| -> Option<_> { let variant = $variant.$variant_method()?; let input_scale = variant.scale() as i8; - let ten: $int_ty = 10 as $int_ty; + let variant = $to_int_ty(variant.integer()); + let ten = $to_int_ty(10); - let scaled: Option<$int_ty> = if input_scale == $output_scale { - Some(variant.integer()) + let scaled = if input_scale == $output_scale { + Some(variant) } else if input_scale < $output_scale { // scale_up means output has more fractional digits than input // multiply integer by 10^(output_scale - input_scale) let delta = ($output_scale - input_scale) as u32; let mul = ten.checked_pow(delta)?; - variant.integer().checked_mul(mul) + variant.checked_mul(mul) } else { // scale_down means output has fewer fractional digits than input // divide by 10^(input_scale - output_scale) with rounding let delta = (input_scale - $output_scale) as u32; let div = ten.checked_pow(delta)?; - let v = variant.integer(); + let v = variant; let d = v.checked_div(div)?; let r = v % div; - let half = div.checked_div(2)?; + let half = div.checked_div($to_int_ty(2))?; let half_neg = half.checked_neg()?; - let adjusted = match v >= 0 { - true if r >= half => d.checked_add(1)?, - false if r <= half_neg => d.checked_sub(1)?, + let adjusted = match v >= $to_int_ty(0) { + true if r >= half => d.checked_add($to_int_ty(1))?, + false if r <= half_neg => d.checked_sub($to_int_ty(1))?, _ => d, }; Some(adjusted) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 1faad017978f..91d04f5f5cd0 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -300,15 +300,23 @@ mod test { use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; use crate::VariantArray; use arrow::array::{ - Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Decimal32Array, Float32Array, - Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray, + Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Decimal128Array, Decimal256Array, + Decimal32Array, Decimal64Array, Float32Array, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, StringArray, StructArray, }; use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; + use arrow::datatypes::i256; use arrow::datatypes::DataType::{Int16, Int32, Int64}; - use arrow_schema::{DataType, Field, FieldRef, Fields}; + use arrow_schema::{ + DataType, Field, FieldRef, Fields, DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, + DECIMAL64_MAX_PRECISION, + }; use chrono::DateTime; - use parquet_variant::{Variant, VariantDecimal4, VariantPath, EMPTY_VARIANT_METADATA_BYTES}; + use parquet_variant::{ + Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantPath, + EMPTY_VARIANT_METADATA_BYTES, + }; fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) { // Create input array from JSON string @@ -2675,6 +2683,18 @@ mod test { Arc::new(struct_array) } + macro_rules! max_unscaled_value { + (32, $precision:expr) => { + (u32::pow(10, $precision as u32) - 1) as i32 + }; + (64, $precision:expr) => { + (u64::pow(10, $precision as u32) - 1) as i64 + }; + (128, $precision:expr) => { + (u128::pow(10, $precision as u32) - 1) as i128 + }; + } + #[test] fn get_decimal32_unshredded_var_scales_to_scale2() { // Build unshredded variant values with different scales @@ -2722,9 +2742,8 @@ mod test { fn get_decimal32_precision_overflow_safe() { // Exceed Decimal32 max precision (9) after scaling let mut builder = crate::VariantArrayBuilder::new(1); - // 999,999,999 (9 digits) at scale 0 -> to scale 2 becomes 99,999,999,900 (11 digits) overflows builder.append_variant(Variant::from( - VariantDecimal4::try_new(999_999_999, 0).unwrap(), + VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2740,7 +2759,7 @@ mod test { fn get_decimal32_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal4::try_new(999_999_999, 0).unwrap(), + VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2754,6 +2773,276 @@ mod test { .with_cast_options(cast_options); let err = variant_get(&variant_array, options).unwrap_err(); - assert!(err.to_string().contains("Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4")); + assert!(err + .to_string() + .contains("Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4")); + } + + #[test] + fn get_decimal64_unshredded_var_scales_to_scale2() { + let mut builder = crate::VariantArrayBuilder::new(4); + builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 2).unwrap())); // 12.34 + builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 3).unwrap())); // 1.234 + builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 0).unwrap())); // 1234 + builder.append_null(); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal64(18, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 18); + assert_eq!(result.scale(), 2); + assert_eq!(result.value(0), 1234); + assert_eq!(result.value(1), 123); + assert_eq!(result.value(2), 123400); + assert!(result.is_null(3)); + } + + #[test] + fn get_decimal64_unshredded_scale_down_rounding() { + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(-1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(-1245, 3).unwrap())); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal64(18, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.value(0), 124); + assert_eq!(result.value(1), 125); + assert_eq!(result.value(2), -124); + assert_eq!(result.value(3), -125); + } + + #[test] + fn get_decimal64_precision_overflow_safe() { + // Exceed Decimal64 max precision (18) after scaling + let mut builder = crate::VariantArrayBuilder::new(1); + builder.append_variant(Variant::from( + VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 0).unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal64(18, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert!(result.is_null(0)); + } + + #[test] + fn get_decimal64_precision_overflow_unsafe_errors() { + let mut builder = crate::VariantArrayBuilder::new(1); + builder.append_variant(Variant::from( + VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 0).unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal64(18, 2), true); + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let options = GetOptions::new() + .with_as_type(Some(FieldRef::from(field))) + .with_cast_options(cast_options); + let err = variant_get(&variant_array, options).unwrap_err(); + + assert!(err + .to_string() + .contains("Failed to cast to Decimal64(precision=18, scale=2) from variant Decimal8")); + } + + #[test] + fn get_decimal128_unshredded_var_scales_to_scale2() { + let mut builder = crate::VariantArrayBuilder::new(4); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 2).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 0).unwrap())); + builder.append_null(); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal128(38, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 38); + assert_eq!(result.scale(), 2); + assert_eq!(result.value(0), 1234); + assert_eq!(result.value(1), 123); + assert_eq!(result.value(2), 123400); + assert!(result.is_null(3)); + } + + #[test] + fn get_decimal128_unshredded_scale_down_rounding() { + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 3).unwrap())); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal128(38, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.value(0), 124); + assert_eq!(result.value(1), 125); + assert_eq!(result.value(2), -124); + assert_eq!(result.value(3), -125); + } + + #[test] + fn get_decimal128_precision_overflow_safe() { + // Exceed Decimal128 max precision (38) after scaling + let mut builder = crate::VariantArrayBuilder::new(1); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) + .unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal128(38, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert!(result.is_null(0)); + } + + #[test] + fn get_decimal128_precision_overflow_unsafe_errors() { + let mut builder = crate::VariantArrayBuilder::new(1); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) + .unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal128(38, 2), true); + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let options = GetOptions::new() + .with_as_type(Some(FieldRef::from(field))) + .with_cast_options(cast_options); + let err = variant_get(&variant_array, options).unwrap_err(); + + assert!(err.to_string().contains( + "Failed to cast to Decimal128(precision=38, scale=2) from variant Decimal16" + )); + } + + #[test] + fn get_decimal256_unshredded_var_scales_to_scale2_from_decimal16() { + // Build unshredded variant values with different scales using Decimal16 source + let mut builder = crate::VariantArrayBuilder::new(4); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 2).unwrap())); // 12.34 + builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 3).unwrap())); // 1.234 + builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 0).unwrap())); // 1234 + builder.append_null(); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal256(76, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 76); + assert_eq!(result.scale(), 2); + assert_eq!(result.value(0), i256::from_i128(1234)); + assert_eq!(result.value(1), i256::from_i128(123)); + assert_eq!(result.value(2), i256::from_i128(123400)); + assert!(result.is_null(3)); + } + + #[test] + fn get_decimal256_unshredded_scale_down_rounding() { + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 3).unwrap())); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal256(76, 2), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.value(0), i256::from_i128(124)); + assert_eq!(result.value(1), i256::from_i128(125)); + assert_eq!(result.value(2), i256::from_i128(-124)); + assert_eq!(result.value(3), i256::from_i128(-125)); + } + + #[test] + fn get_decimal256_precision_overflow_safe() { + // Exceed Decimal128 max precision (38) after scaling + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 1) + .unwrap(), + )); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) + .unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal256(76, 39), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + // Input is Decimal16 with integer = 10^38-1 and scale = 1, target scale = 39 + // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38 + let base = i256::from_i128(10); + let factor = base.checked_pow(38).unwrap(); + let expected = i256::from_i128(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION)) + .checked_mul(factor) + .unwrap(); + assert_eq!(result.value(0), expected); + assert!(result.is_null(1)); + } + + #[test] + fn get_decimal256_precision_overflow_unsafe_errors() { + // Exceed Decimal128 max precision (38) after scaling + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 1) + .unwrap(), + )); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) + .unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal256(76, 39), true); + let cast_options = CastOptions { + safe: false, + ..Default::default() + }; + let options = GetOptions::new() + .with_as_type(Some(FieldRef::from(field))) + .with_cast_options(cast_options); + let err = variant_get(&variant_array, options).unwrap_err(); + + assert!(err.to_string().contains( + "Failed to cast to Decimal256(precision=76, scale=39) from variant Decimal16" + )); } } diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 01522a6f9c77..d4e619fa7c79 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -18,7 +18,9 @@ use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; use arrow::compute::CastOptions; use arrow::datatypes::{ - self, is_validate_decimal32_precision, ArrowPrimitiveType, DataType, Decimal32Type, + self, i256, is_validate_decimal256_precision, is_validate_decimal32_precision, + is_validate_decimal64_precision, is_validate_decimal_precision, ArrowPrimitiveType, DataType, + Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, }; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; @@ -43,6 +45,10 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> { Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>), Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>), Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>), + Decimal32(VariantToDecimal32ArrowRowBuilder<'a>), + Decimal64(VariantToDecimal64ArrowRowBuilder<'a>), + Decimal128(VariantToDecimal128ArrowRowBuilder<'a>), + Decimal256(VariantToDecimal256ArrowRowBuilder<'a>), } /// Builder for converting variant values into strongly typed Arrow arrays. @@ -51,7 +57,6 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> { /// with casting of leaf values to specific types. pub(crate) enum VariantToArrowRowBuilder<'a> { Primitive(PrimitiveVariantToArrowRowBuilder<'a>), - Decimal32(VariantToDecimal32ArrowRowBuilder<'a>), BinaryVariant(VariantToBinaryVariantArrowRowBuilder), // Path extraction wrapper - contains a boxed enum for any of the above @@ -73,6 +78,10 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { Float16(b) => b.append_null(), Float32(b) => b.append_null(), Float64(b) => b.append_null(), + Decimal32(b) => b.append_null(), + Decimal64(b) => b.append_null(), + Decimal128(b) => b.append_null(), + Decimal256(b) => b.append_null(), } } @@ -90,6 +99,10 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { Float16(b) => b.append_value(value), Float32(b) => b.append_value(value), Float64(b) => b.append_value(value), + Decimal32(b) => b.append_value(value), + Decimal64(b) => b.append_value(value), + Decimal128(b) => b.append_value(value), + Decimal256(b) => b.append_value(value), } } @@ -107,6 +120,10 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> { Float16(b) => b.finish(), Float32(b) => b.finish(), Float64(b) => b.finish(), + Decimal32(b) => b.finish(), + Decimal64(b) => b.finish(), + Decimal128(b) => b.finish(), + Decimal256(b) => b.finish(), } } } @@ -116,7 +133,6 @@ impl<'a> VariantToArrowRowBuilder<'a> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.append_null(), - Decimal32(b) => b.append_null(), BinaryVariant(b) => b.append_null(), WithPath(path_builder) => path_builder.append_null(), } @@ -126,7 +142,6 @@ impl<'a> VariantToArrowRowBuilder<'a> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.append_value(&value), - Decimal32(b) => b.append_value(&value), BinaryVariant(b) => b.append_value(value), WithPath(path_builder) => path_builder.append_value(value), } @@ -136,7 +151,6 @@ impl<'a> VariantToArrowRowBuilder<'a> { use VariantToArrowRowBuilder::*; match self { Primitive(b) => b.finish(), - Decimal32(b) => b.finish(), BinaryVariant(b) => b.finish(), WithPath(path_builder) => path_builder.finish(), } @@ -196,6 +210,24 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>( cast_options, capacity, )), + DataType::Decimal32(precision, scale) => Decimal32(VariantToDecimal32ArrowRowBuilder::new( + cast_options, + capacity, + *precision, + *scale, + )?), + DataType::Decimal64(precision, scale) => Decimal64(VariantToDecimal64ArrowRowBuilder::new( + cast_options, + capacity, + *precision, + *scale, + )?), + DataType::Decimal128(precision, scale) => Decimal128( + VariantToDecimal128ArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), + DataType::Decimal256(precision, scale) => Decimal256( + VariantToDecimal256ArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), _ if data_type.is_primitive() => { return Err(ArrowError::NotYetImplemented(format!( "Primitive data_type {data_type:?} not yet implemented" @@ -241,9 +273,6 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>( "Converting unshredded variant arrays to arrow lists".to_string(), )); } - Some(DataType::Decimal32(precision, scale)) => Decimal32( - VariantToDecimal32ArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), Some(data_type) => { let builder = make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?; @@ -385,10 +414,10 @@ impl<'a> VariantToDecimal32ArrowRowBuilder<'a> { let maybe_scaled = scale_variant_decimal!( value, as_decimal4, + |x: i32| x, self.scale, - i32, - is_validate_decimal32_precision, - self.precision + self.precision, + is_validate_decimal32_precision ); if let Some(scaled) = maybe_scaled { @@ -410,6 +439,180 @@ impl<'a> VariantToDecimal32ArrowRowBuilder<'a> { } } +pub(crate) struct VariantToDecimal64ArrowRowBuilder<'a> { + builder: PrimitiveBuilder, + cast_options: &'a CastOptions<'a>, + precision: u8, + scale: i8, +} + +impl<'a> VariantToDecimal64ArrowRowBuilder<'a> { + fn new( + cast_options: &'a CastOptions<'a>, + capacity: usize, + precision: u8, + scale: i8, + ) -> Result { + let builder = PrimitiveBuilder::::with_capacity(capacity) + .with_precision_and_scale(precision, scale)?; + Ok(Self { + builder, + cast_options, + precision, + scale, + }) + } + + fn append_null(&mut self) -> Result<()> { + self.builder.append_null(); + Ok(()) + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + let maybe_scaled = scale_variant_decimal!( + value, + as_decimal8, + |x: i64| x, + self.scale, + self.precision, + is_validate_decimal64_precision + ); + + if let Some(scaled) = maybe_scaled { + self.builder.append_value(scaled); + Ok(true) + } else if self.cast_options.safe { + self.builder.append_null(); + Ok(false) + } else { + Err(ArrowError::CastError(format!( + "Failed to cast to Decimal64(precision={}, scale={}) from variant {:?}", + self.precision, self.scale, value + ))) + } + } + + fn finish(mut self) -> Result { + Ok(Arc::new(self.builder.finish())) + } +} + +pub(crate) struct VariantToDecimal128ArrowRowBuilder<'a> { + builder: PrimitiveBuilder, + cast_options: &'a CastOptions<'a>, + precision: u8, + scale: i8, +} + +impl<'a> VariantToDecimal128ArrowRowBuilder<'a> { + fn new( + cast_options: &'a CastOptions<'a>, + capacity: usize, + precision: u8, + scale: i8, + ) -> Result { + let builder = PrimitiveBuilder::::with_capacity(capacity) + .with_precision_and_scale(precision, scale)?; + Ok(Self { + builder, + cast_options, + precision, + scale, + }) + } + + fn append_null(&mut self) -> Result<()> { + self.builder.append_null(); + Ok(()) + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + let maybe_scaled = scale_variant_decimal!( + value, + as_decimal16, + |x: i128| x, + self.scale, + self.precision, + is_validate_decimal_precision + ); + + if let Some(scaled) = maybe_scaled { + self.builder.append_value(scaled); + Ok(true) + } else if self.cast_options.safe { + self.builder.append_null(); + Ok(false) + } else { + Err(ArrowError::CastError(format!( + "Failed to cast to Decimal128(precision={}, scale={}) from variant {:?}", + self.precision, self.scale, value + ))) + } + } + + fn finish(mut self) -> Result { + Ok(Arc::new(self.builder.finish())) + } +} + +pub(crate) struct VariantToDecimal256ArrowRowBuilder<'a> { + builder: PrimitiveBuilder, + cast_options: &'a CastOptions<'a>, + precision: u8, + scale: i8, +} + +impl<'a> VariantToDecimal256ArrowRowBuilder<'a> { + fn new( + cast_options: &'a CastOptions<'a>, + capacity: usize, + precision: u8, + scale: i8, + ) -> Result { + let builder = PrimitiveBuilder::::with_capacity(capacity) + .with_precision_and_scale(precision, scale)?; + Ok(Self { + builder, + cast_options, + precision, + scale, + }) + } + + fn append_null(&mut self) -> Result<()> { + self.builder.append_null(); + Ok(()) + } + + fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { + let maybe_scaled = scale_variant_decimal!( + value, + as_decimal16, + i256::from_i128, + self.scale, + self.precision, + is_validate_decimal256_precision + ); + + if let Some(scaled) = maybe_scaled { + self.builder.append_value(scaled); + Ok(true) + } else if self.cast_options.safe { + self.builder.append_null(); + Ok(false) + } else { + Err(ArrowError::CastError(format!( + "Failed to cast to Decimal256(precision={}, scale={}) from variant {:?}", + self.precision, self.scale, value + ))) + } + } + + fn finish(mut self) -> Result { + Ok(Arc::new(self.builder.finish())) + } +} + /// Builder for creating VariantArray output (for path extraction without type conversion) pub(crate) struct VariantToBinaryVariantArrowRowBuilder { metadata: BinaryViewArray, From 964e45a15c76689da8bbe017d3cbb5a1134fdaac Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Sat, 4 Oct 2025 12:57:37 -0400 Subject: [PATCH 05/26] Simplify decimal builders --- .../src/variant_to_arrow.rs | 397 ++++++------------ 1 file changed, 135 insertions(+), 262 deletions(-) diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index d4e619fa7c79..07034abcd788 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -45,10 +45,10 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> { Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>), Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>), Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>), - Decimal32(VariantToDecimal32ArrowRowBuilder<'a>), - Decimal64(VariantToDecimal64ArrowRowBuilder<'a>), - Decimal128(VariantToDecimal128ArrowRowBuilder<'a>), - Decimal256(VariantToDecimal256ArrowRowBuilder<'a>), + Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>), + Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>), + Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>), + Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>), } /// Builder for converting variant values into strongly typed Arrow arrays. @@ -165,80 +165,75 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>( ) -> Result> { use PrimitiveVariantToArrowRowBuilder::*; - let builder = match data_type { - DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new( - cast_options, - capacity, - )), - DataType::Decimal32(precision, scale) => Decimal32(VariantToDecimal32ArrowRowBuilder::new( - cast_options, - capacity, - *precision, - *scale, - )?), - DataType::Decimal64(precision, scale) => Decimal64(VariantToDecimal64ArrowRowBuilder::new( - cast_options, - capacity, - *precision, - *scale, - )?), - DataType::Decimal128(precision, scale) => Decimal128( - VariantToDecimal128ArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), - DataType::Decimal256(precision, scale) => Decimal256( - VariantToDecimal256ArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, - ), - _ if data_type.is_primitive() => { - return Err(ArrowError::NotYetImplemented(format!( - "Primitive data_type {data_type:?} not yet implemented" - ))); - } - _ => { - return Err(ArrowError::InvalidArgumentError(format!( - "Not a primitive type: {data_type:?}" - ))); - } - }; + let builder = + match data_type { + DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new( + cast_options, + capacity, + )), + DataType::Decimal32(precision, scale) => Decimal32( + VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), + DataType::Decimal64(precision, scale) => Decimal64( + VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), + DataType::Decimal128(precision, scale) => Decimal128( + VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), + DataType::Decimal256(precision, scale) => Decimal256( + VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?, + ), + _ if data_type.is_primitive() => { + return Err(ArrowError::NotYetImplemented(format!( + "Primitive data_type {data_type:?} not yet implemented" + ))); + } + _ => { + return Err(ArrowError::InvalidArgumentError(format!( + "Not a primitive type: {data_type:?}" + ))); + } + }; Ok(builder) } @@ -380,196 +375,80 @@ impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> { } } -/// Builder for converting variant values to Decimal32 values -pub(crate) struct VariantToDecimal32ArrowRowBuilder<'a> { - builder: PrimitiveBuilder, - cast_options: &'a CastOptions<'a>, - precision: u8, - scale: i8, -} - -impl<'a> VariantToDecimal32ArrowRowBuilder<'a> { - fn new( - cast_options: &'a CastOptions<'a>, - capacity: usize, - precision: u8, +// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct parameters +pub(crate) trait VariantDecimalScaler: datatypes::DecimalType { + fn scale_from_variant( + value: &Variant<'_, '_>, scale: i8, - ) -> Result { - let builder = PrimitiveBuilder::::with_capacity(capacity) - .with_precision_and_scale(precision, scale)?; - Ok(Self { - builder, - cast_options, - precision, - scale, - }) - } - - fn append_null(&mut self) -> Result<()> { - self.builder.append_null(); - Ok(()) - } - - fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - let maybe_scaled = scale_variant_decimal!( - value, - as_decimal4, - |x: i32| x, - self.scale, - self.precision, - is_validate_decimal32_precision - ); - - if let Some(scaled) = maybe_scaled { - self.builder.append_value(scaled); - Ok(true) - } else if self.cast_options.safe { - self.builder.append_null(); - Ok(false) - } else { - Err(ArrowError::CastError(format!( - "Failed to cast to Decimal32(precision={}, scale={}) from variant {:?}", - self.precision, self.scale, value - ))) - } - } - - fn finish(mut self) -> Result { - Ok(Arc::new(self.builder.finish())) - } -} - -pub(crate) struct VariantToDecimal64ArrowRowBuilder<'a> { - builder: PrimitiveBuilder, - cast_options: &'a CastOptions<'a>, - precision: u8, - scale: i8, -} - -impl<'a> VariantToDecimal64ArrowRowBuilder<'a> { - fn new( - cast_options: &'a CastOptions<'a>, - capacity: usize, precision: u8, - scale: i8, - ) -> Result { - let builder = PrimitiveBuilder::::with_capacity(capacity) - .with_precision_and_scale(precision, scale)?; - Ok(Self { - builder, - cast_options, - precision, - scale, - }) - } - - fn append_null(&mut self) -> Result<()> { - self.builder.append_null(); - Ok(()) - } - - fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - let maybe_scaled = scale_variant_decimal!( - value, - as_decimal8, - |x: i64| x, - self.scale, - self.precision, - is_validate_decimal64_precision - ); - - if let Some(scaled) = maybe_scaled { - self.builder.append_value(scaled); - Ok(true) - } else if self.cast_options.safe { - self.builder.append_null(); - Ok(false) - } else { - Err(ArrowError::CastError(format!( - "Failed to cast to Decimal64(precision={}, scale={}) from variant {:?}", - self.precision, self.scale, value - ))) - } - } - - fn finish(mut self) -> Result { - Ok(Arc::new(self.builder.finish())) - } + ) -> Option<::Native>; } -pub(crate) struct VariantToDecimal128ArrowRowBuilder<'a> { - builder: PrimitiveBuilder, - cast_options: &'a CastOptions<'a>, - precision: u8, - scale: i8, -} - -impl<'a> VariantToDecimal128ArrowRowBuilder<'a> { - fn new( - cast_options: &'a CastOptions<'a>, - capacity: usize, - precision: u8, - scale: i8, - ) -> Result { - let builder = PrimitiveBuilder::::with_capacity(capacity) - .with_precision_and_scale(precision, scale)?; - Ok(Self { - builder, - cast_options, - precision, - scale, - }) - } - - fn append_null(&mut self) -> Result<()> { - self.builder.append_null(); - Ok(()) - } - - fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - let maybe_scaled = scale_variant_decimal!( - value, - as_decimal16, - |x: i128| x, - self.scale, - self.precision, - is_validate_decimal_precision - ); - - if let Some(scaled) = maybe_scaled { - self.builder.append_value(scaled); - Ok(true) - } else if self.cast_options.safe { - self.builder.append_null(); - Ok(false) - } else { - Err(ArrowError::CastError(format!( - "Failed to cast to Decimal128(precision={}, scale={}) from variant {:?}", - self.precision, self.scale, value - ))) +macro_rules! impl_variant_decimal_scaler { + ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => { + impl VariantDecimalScaler for $t { + fn scale_from_variant( + value: &Variant<'_, '_>, + scale: i8, + precision: u8, + ) -> Option<::Native> { + scale_variant_decimal!( + value, + $variant_method, + $to_native, + scale, + precision, + $validate + ) + } } - } - - fn finish(mut self) -> Result { - Ok(Arc::new(self.builder.finish())) - } + }; } -pub(crate) struct VariantToDecimal256ArrowRowBuilder<'a> { - builder: PrimitiveBuilder, +impl_variant_decimal_scaler!( + Decimal32Type, + as_decimal4, + |x: i32| x, + is_validate_decimal32_precision +); +impl_variant_decimal_scaler!( + Decimal64Type, + as_decimal8, + |x: i64| x, + is_validate_decimal64_precision +); +impl_variant_decimal_scaler!( + Decimal128Type, + as_decimal16, + |x: i128| x, + is_validate_decimal_precision +); +impl_variant_decimal_scaler!( + Decimal256Type, + as_decimal16, + i256::from_i128, + is_validate_decimal256_precision +); + +/// Builder for converting variant values to arrow Decimal values +pub(crate) struct VariantToDecimalArrowRowBuilder< + 'a, + T: datatypes::DecimalType + VariantDecimalScaler, +> { + builder: PrimitiveBuilder, cast_options: &'a CastOptions<'a>, precision: u8, scale: i8, } -impl<'a> VariantToDecimal256ArrowRowBuilder<'a> { +impl<'a, T: datatypes::DecimalType + VariantDecimalScaler> VariantToDecimalArrowRowBuilder<'a, T> { fn new( cast_options: &'a CastOptions<'a>, capacity: usize, precision: u8, scale: i8, ) -> Result { - let builder = PrimitiveBuilder::::with_capacity(capacity) + let builder = PrimitiveBuilder::::with_capacity(capacity) .with_precision_and_scale(precision, scale)?; Ok(Self { builder, @@ -585,16 +464,7 @@ impl<'a> VariantToDecimal256ArrowRowBuilder<'a> { } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - let maybe_scaled = scale_variant_decimal!( - value, - as_decimal16, - i256::from_i128, - self.scale, - self.precision, - is_validate_decimal256_precision - ); - - if let Some(scaled) = maybe_scaled { + if let Some(scaled) = T::scale_from_variant(value, self.scale, self.precision) { self.builder.append_value(scaled); Ok(true) } else if self.cast_options.safe { @@ -602,8 +472,11 @@ impl<'a> VariantToDecimal256ArrowRowBuilder<'a> { Ok(false) } else { Err(ArrowError::CastError(format!( - "Failed to cast to Decimal256(precision={}, scale={}) from variant {:?}", - self.precision, self.scale, value + "Failed to cast to {}(precision={}, scale={}) from variant {:?}", + T::PREFIX, + self.precision, + self.scale, + value ))) } } From 6f39a2a4df8199d8c0754c4ab8ad7012b0e0ac71 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Sat, 4 Oct 2025 13:17:39 -0400 Subject: [PATCH 06/26] fmt --- parquet-variant-compute/src/variant_get.rs | 22 +++++++++---------- .../src/variant_to_arrow.rs | 8 +++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index c94c670ffa41..3e218623b706 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -23,9 +23,9 @@ use arrow::{ use arrow_schema::{ArrowError, DataType, FieldRef}; use parquet_variant::{VariantPath, VariantPathElement}; +use crate::VariantArray; use crate::variant_array::BorrowedShreddingState; use crate::variant_to_arrow::make_variant_to_arrow_row_builder; -use crate::VariantArray; use arrow::array::AsArray; use std::sync::Arc; @@ -295,27 +295,27 @@ impl<'a> GetOptions<'a> { mod test { use std::sync::Arc; - use super::{variant_get, GetOptions}; + use super::{GetOptions, variant_get}; + use crate::VariantArray; use crate::json_to_variant; use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; - use crate::VariantArray; use arrow::array::{ - Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Decimal128Array, Decimal256Array, - Decimal32Array, Decimal64Array, Float32Array, Float64Array, Int16Array, Int32Array, - Int64Array, Int8Array, StringArray, StructArray, + Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Decimal32Array, Decimal64Array, + Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int8Array, Int16Array, + Int32Array, Int64Array, StringArray, StructArray, }; use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; - use arrow::datatypes::i256; use arrow::datatypes::DataType::{Int16, Int32, Int64}; + use arrow::datatypes::i256; use arrow_schema::{ - DataType, Field, FieldRef, Fields, - DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, + DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION, DataType, + Field, FieldRef, Fields, }; use chrono::DateTime; use parquet_variant::{ - Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantPath, - EMPTY_VARIANT_METADATA_BYTES, + EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, + VariantPath, }; fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) { diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 07034abcd788..1760bdf41892 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -18,14 +18,14 @@ use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; use arrow::compute::CastOptions; use arrow::datatypes::{ - self, i256, is_validate_decimal256_precision, is_validate_decimal32_precision, - is_validate_decimal64_precision, is_validate_decimal_precision, ArrowPrimitiveType, DataType, - Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, + self, ArrowPrimitiveType, DataType, Decimal32Type, Decimal64Type, Decimal128Type, + Decimal256Type, i256, is_validate_decimal_precision, is_validate_decimal32_precision, + is_validate_decimal64_precision, is_validate_decimal256_precision, }; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; -use crate::type_conversion::{scale_variant_decimal, PrimitiveFromVariant}; +use crate::type_conversion::{PrimitiveFromVariant, scale_variant_decimal}; use crate::{VariantArray, VariantValueArrayBuilder}; use std::sync::Arc; From 8f0f53ca9699d8beae8639454abf633205f99608 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Sat, 4 Oct 2025 13:26:29 -0400 Subject: [PATCH 07/26] Add comment --- parquet-variant-compute/src/type_conversion.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index d329a0439394..fbc668c7897b 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -86,9 +86,9 @@ macro_rules! scale_variant_decimal { let d = v.checked_div(div)?; let r = v % div; + // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast let half = div.checked_div($to_int_ty(2))?; let half_neg = half.checked_neg()?; - let adjusted = match v >= $to_int_ty(0) { true if r >= half => d.checked_add($to_int_ty(1))?, false if r <= half_neg => d.checked_sub($to_int_ty(1))?, From d88fd7fc52500202743f50dcfd72cb11e8a1ab13 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Mon, 6 Oct 2025 13:55:30 -0400 Subject: [PATCH 08/26] assert precision and scale in tests --- parquet-variant-compute/src/variant_get.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 3e218623b706..b9f2a34d50b4 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2737,6 +2737,8 @@ mod test { let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.precision(), 9); + assert_eq!(result.scale(), 2); assert_eq!(result.value(0), 124); assert_eq!(result.value(1), 125); assert_eq!(result.value(2), -124); @@ -2821,6 +2823,8 @@ mod test { let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.precision(), 18); + assert_eq!(result.scale(), 2); assert_eq!(result.value(0), 124); assert_eq!(result.value(1), 125); assert_eq!(result.value(2), -124); @@ -2905,6 +2909,8 @@ mod test { let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.precision(), 38); + assert_eq!(result.scale(), 2); assert_eq!(result.value(0), 124); assert_eq!(result.value(1), 125); assert_eq!(result.value(2), -124); @@ -2990,6 +2996,8 @@ mod test { let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.precision(), 76); + assert_eq!(result.scale(), 2); assert_eq!(result.value(0), i256::from_i128(124)); assert_eq!(result.value(1), i256::from_i128(125)); assert_eq!(result.value(2), i256::from_i128(-124)); From 9b6d0e1ef6039c67847ea614dc0e4d7175295943 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Mon, 6 Oct 2025 19:26:56 -0400 Subject: [PATCH 09/26] address comments --- .../src/type_conversion.rs | 70 +++++++++---------- parquet-variant-compute/src/variant_get.rs | 45 ++++-------- .../src/variant_to_arrow.rs | 47 ++++++------- parquet-variant/src/variant/decimal.rs | 6 +- 4 files changed, 70 insertions(+), 98 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index fbc668c7897b..6b45b38c428f 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -63,42 +63,39 @@ impl_primitive_from_variant!(datatypes::Float64Type, as_f64); macro_rules! scale_variant_decimal { ($variant:expr, $variant_method:ident, $to_int_ty:expr, $output_scale:expr, $precision:expr, $validate:path) => {{ - (|| -> Option<_> { - let variant = $variant.$variant_method()?; - let input_scale = variant.scale() as i8; - let variant = $to_int_ty(variant.integer()); - let ten = $to_int_ty(10); - - let scaled = if input_scale == $output_scale { - Some(variant) - } else if input_scale < $output_scale { - // scale_up means output has more fractional digits than input - // multiply integer by 10^(output_scale - input_scale) - let delta = ($output_scale - input_scale) as u32; - let mul = ten.checked_pow(delta)?; - variant.checked_mul(mul) - } else { - // scale_down means output has fewer fractional digits than input - // divide by 10^(input_scale - output_scale) with rounding - let delta = (input_scale - $output_scale) as u32; - let div = ten.checked_pow(delta)?; - let v = variant; - let d = v.checked_div(div)?; - let r = v % div; - - // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast - let half = div.checked_div($to_int_ty(2))?; - let half_neg = half.checked_neg()?; - let adjusted = match v >= $to_int_ty(0) { - true if r >= half => d.checked_add($to_int_ty(1))?, - false if r <= half_neg => d.checked_sub($to_int_ty(1))?, - _ => d, - }; - Some(adjusted) + let variant = $variant.$variant_method()?; + let input_scale = variant.scale() as i8; + let variant = $to_int_ty(variant.integer()); + let ten = $to_int_ty(10); + + let scaled = if input_scale == $output_scale { + Some(variant) + } else if input_scale < $output_scale { + // scale_up means output has more fractional digits than input + // multiply integer by 10^(output_scale - input_scale) + let delta = ($output_scale - input_scale) as u32; + let mul = ten.checked_pow(delta)?; + variant.checked_mul(mul) + } else { + // scale_down means output has fewer fractional digits than input + // divide by 10^(input_scale - output_scale) with rounding + let delta = (input_scale - $output_scale) as u32; + let div = ten.checked_pow(delta)?; + let d = variant.checked_div(div)?; + let r = variant % div; + + // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast + let half = div.checked_div($to_int_ty(2))?; + let half_neg = half.checked_neg()?; + let adjusted = match variant >= $to_int_ty(0) { + true if r >= half => d.checked_add($to_int_ty(1))?, + false if r <= half_neg => d.checked_sub($to_int_ty(1))?, + _ => d, }; + Some(adjusted) + }; - scaled.filter(|v| $validate(*v, $precision)) - })() + scaled.filter(|v| $validate(*v, $precision)) }}; } pub(crate) use scale_variant_decimal; @@ -151,9 +148,8 @@ macro_rules! decimal_to_variant_decimal { let (v, scale) = if *$scale < 0 { // For negative scale, we need to multiply the value by 10^|scale| // For example: 123 with scale -2 becomes 12300 with scale 0 - let v = (10 as $value_type) - .checked_pow((-*$scale) as u32) - .and_then(|m| m.checked_mul($v)); + let v = + <$value_type>::checked_pow(10, (-*$scale) as u32).and_then(|m| m.checked_mul($v)); (v, 0u8) } else { (Some($v), *$scale as u8) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index b9f2a34d50b4..b9d93cd020df 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -308,10 +308,7 @@ mod test { use arrow::compute::CastOptions; use arrow::datatypes::DataType::{Int16, Int32, Int64}; use arrow::datatypes::i256; - use arrow_schema::{ - DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION, DataType, - Field, FieldRef, Fields, - }; + use arrow_schema::{DataType, Field, FieldRef, Fields}; use chrono::DateTime; use parquet_variant::{ EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, @@ -2688,18 +2685,6 @@ mod test { Arc::new(struct_array) } - macro_rules! max_unscaled_value { - (32, $precision:expr) => { - (u32::pow(10, $precision as u32) - 1) as i32 - }; - (64, $precision:expr) => { - (u64::pow(10, $precision as u32) - 1) as i64 - }; - (128, $precision:expr) => { - (u128::pow(10, $precision as u32) - 1) as i128 - }; - } - #[test] fn get_decimal32_unshredded_var_scales_to_scale2() { // Build unshredded variant values with different scales @@ -2750,7 +2735,7 @@ mod test { // Exceed Decimal32 max precision (9) after scaling let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 0).unwrap(), + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2766,7 +2751,7 @@ mod test { fn get_decimal32_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 0).unwrap(), + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2836,7 +2821,7 @@ mod test { // Exceed Decimal64 max precision (18) after scaling let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 0).unwrap(), + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2852,7 +2837,7 @@ mod test { fn get_decimal64_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 0).unwrap(), + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2922,8 +2907,7 @@ mod test { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) - .unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2939,8 +2923,7 @@ mod test { fn get_decimal128_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) - .unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3009,12 +2992,10 @@ mod test { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 1) - .unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 1).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) - .unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3027,7 +3008,7 @@ mod test { // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38 let base = i256::from_i128(10); let factor = base.checked_pow(38).unwrap(); - let expected = i256::from_i128(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION)) + let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE as i128) .checked_mul(factor) .unwrap(); assert_eq!(result.value(0), expected); @@ -3039,12 +3020,10 @@ mod test { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 1) - .unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 1).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal16::try_new(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION), 0) - .unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 1760bdf41892..b8f8f866e6d9 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -18,9 +18,9 @@ use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; use arrow::compute::CastOptions; use arrow::datatypes::{ - self, ArrowPrimitiveType, DataType, Decimal32Type, Decimal64Type, Decimal128Type, - Decimal256Type, i256, is_validate_decimal_precision, is_validate_decimal32_precision, - is_validate_decimal64_precision, is_validate_decimal256_precision, + self, ArrowPrimitiveType, DataType, i256, is_validate_decimal_precision, + is_validate_decimal32_precision, is_validate_decimal64_precision, + is_validate_decimal256_precision, }; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; @@ -376,18 +376,18 @@ impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> { } // Minimal per-decimal hook: just wraps scale_variant_decimal! with correct parameters -pub(crate) trait VariantDecimalScaler: datatypes::DecimalType { - fn scale_from_variant( +pub(crate) trait RescaleVariantDecimal: datatypes::DecimalType { + fn rescale_variant_decimal( value: &Variant<'_, '_>, scale: i8, precision: u8, ) -> Option<::Native>; } -macro_rules! impl_variant_decimal_scaler { +macro_rules! impl_rescale_variant_decimal { ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => { - impl VariantDecimalScaler for $t { - fn scale_from_variant( + impl RescaleVariantDecimal for $t { + fn rescale_variant_decimal( value: &Variant<'_, '_>, scale: i8, precision: u8, @@ -405,43 +405,40 @@ macro_rules! impl_variant_decimal_scaler { }; } -impl_variant_decimal_scaler!( - Decimal32Type, +impl_rescale_variant_decimal!( + datatypes::Decimal32Type, as_decimal4, - |x: i32| x, + i32::from, is_validate_decimal32_precision ); -impl_variant_decimal_scaler!( - Decimal64Type, +impl_rescale_variant_decimal!( + datatypes::Decimal64Type, as_decimal8, - |x: i64| x, + i64::from, is_validate_decimal64_precision ); -impl_variant_decimal_scaler!( - Decimal128Type, +impl_rescale_variant_decimal!( + datatypes::Decimal128Type, as_decimal16, - |x: i128| x, + i128::from, is_validate_decimal_precision ); -impl_variant_decimal_scaler!( - Decimal256Type, +impl_rescale_variant_decimal!( + datatypes::Decimal256Type, as_decimal16, i256::from_i128, is_validate_decimal256_precision ); /// Builder for converting variant values to arrow Decimal values -pub(crate) struct VariantToDecimalArrowRowBuilder< - 'a, - T: datatypes::DecimalType + VariantDecimalScaler, -> { +pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T: RescaleVariantDecimal> { builder: PrimitiveBuilder, cast_options: &'a CastOptions<'a>, precision: u8, scale: i8, } -impl<'a, T: datatypes::DecimalType + VariantDecimalScaler> VariantToDecimalArrowRowBuilder<'a, T> { +impl<'a, T: RescaleVariantDecimal> VariantToDecimalArrowRowBuilder<'a, T> { fn new( cast_options: &'a CastOptions<'a>, capacity: usize, @@ -464,7 +461,7 @@ impl<'a, T: datatypes::DecimalType + VariantDecimalScaler> VariantToDecimalArrow } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - if let Some(scaled) = T::scale_from_variant(value, self.scale, self.precision) { + if let Some(scaled) = T::rescale_variant_decimal(value, self.scale, self.precision) { self.builder.append_value(scaled); Ok(true) } else if self.cast_options.safe { diff --git a/parquet-variant/src/variant/decimal.rs b/parquet-variant/src/variant/decimal.rs index 4793d88569bf..0f395c1faa6f 100644 --- a/parquet-variant/src/variant/decimal.rs +++ b/parquet-variant/src/variant/decimal.rs @@ -87,7 +87,7 @@ pub struct VariantDecimal4 { impl VariantDecimal4 { pub(crate) const MAX_PRECISION: u8 = 9; - pub(crate) const MAX_UNSCALED_VALUE: u32 = u32::pow(10, Self::MAX_PRECISION as u32) - 1; + pub const MAX_UNSCALED_VALUE: u32 = u32::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i32, scale: u8) -> Result { decimal_try_new!(integer, scale) @@ -137,7 +137,7 @@ pub struct VariantDecimal8 { impl VariantDecimal8 { pub(crate) const MAX_PRECISION: u8 = 18; - pub(crate) const MAX_UNSCALED_VALUE: u64 = u64::pow(10, Self::MAX_PRECISION as u32) - 1; + pub const MAX_UNSCALED_VALUE: u64 = u64::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i64, scale: u8) -> Result { decimal_try_new!(integer, scale) @@ -187,7 +187,7 @@ pub struct VariantDecimal16 { impl VariantDecimal16 { const MAX_PRECISION: u8 = 38; - const MAX_UNSCALED_VALUE: u128 = u128::pow(10, Self::MAX_PRECISION as u32) - 1; + pub const MAX_UNSCALED_VALUE: u128 = u128::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i128, scale: u8) -> Result { decimal_try_new!(integer, scale) From 54237fe83d391c2de707c07d425231660942130d Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Tue, 7 Oct 2025 20:09:39 -0400 Subject: [PATCH 10/26] add more overflow cases and valid cases that will overflow in current impl --- parquet-variant-compute/src/variant_get.rs | 24 ++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index b9d93cd020df..190242d331c1 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2693,6 +2693,7 @@ mod test { builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 3).unwrap())); // 1.234 builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 0).unwrap())); // 1234 builder.append_null(); + builder.append_variant(Variant::from(VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64)+1, 3).unwrap())); // should fit into Decimal32 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, 2), true); @@ -2706,28 +2707,35 @@ mod test { assert_eq!(result.value(1), 123); assert_eq!(result.value(2), 123400); assert!(result.is_null(3)); + assert!(result.is_null(4)); // should not be null as the final result fits into Decimal32 } #[test] fn get_decimal32_unshredded_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 2).unwrap())); builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(5235, 3).unwrap())); let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal32(9, 2), true); + let field = Field::new("result", DataType::Decimal32(9, -1), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert_eq!(result.precision(), 9); - assert_eq!(result.scale(), 2); + assert_eq!(result.scale(), -1); assert_eq!(result.value(0), 124); assert_eq!(result.value(1), 125); assert_eq!(result.value(2), -124); assert_eq!(result.value(3), -125); + assert_eq!(result.value(4), 1); + assert_eq!(result.value(5), 0); + assert_eq!(result.value(6), 1); } #[test] @@ -2737,14 +2745,18 @@ mod test { builder.append_variant(Variant::from( VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), )); + builder.append_variant(Variant::from( + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 9).unwrap(), + )); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal32(9, 2), true); + let field = Field::new("result", DataType::Decimal32(2, 2), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert!(result.is_null(0)); + assert_eq!(result.value(1), 0); // should overflow because 1.00 does not fit into precision (2) } #[test] From e0b18da0046bbfc5dbf7b685cee86931e1a57d35 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 8 Oct 2025 10:10:33 -0400 Subject: [PATCH 11/26] WIP --- .../src/type_conversion.rs | 51 ++++++++++++++++++- parquet-variant/src/variant/decimal.rs | 6 +-- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 6b45b38c428f..a4ba88437f68 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,8 +17,8 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::datatypes::{self, ArrowPrimitiveType}; -use parquet_variant::Variant; +use arrow::datatypes::{self, ArrowPrimitiveType, Decimal32Type, DecimalType, MAX_DECIMAL32_FOR_EACH_PRECISION}; +use parquet_variant::{Variant, VariantDecimal4}; /// Options for controlling the behavior of `cast_to_variant_with_options`. #[derive(Debug, Clone, PartialEq, Eq)] @@ -100,6 +100,53 @@ macro_rules! scale_variant_decimal { } pub(crate) use scale_variant_decimal; +fn variant_to_unscaled_decimal32( + variant: Variant<'_, '_>, + precision: u8, + scale: i8, +) -> Option { + match variant { + Variant::Int32(i) => scale_variant_decimal_new::(i, VariantDecimal4::MAX_PRECISION, 0, precision, scale), + Variant::Decimal4(d) => scale_variant_decimal_new::(d.integer(), VariantDecimal4::MAX_PRECISION, d.scale() as i8, precision, scale), + _ => None, + } +} + +// fn rescale_variant(integer: i32, input_precision: u8, input_scale: i8, output_precision: u8, output_scale: i8) -> Option { +// let input_precision = input_precision as i8; +// let output_precision = output_precision as i8; +// let mut input_integer_digits = input_precision - input_scale; +// let output_integer_digits = output_precision - output_scale; + +// // +// if input_integer_digits > output_integer_digits { +// if !Decimal32Type::is_valid_decimal_precision(integer, (output_integer_digits + input_scale) as u8) { +// return None; +// } +// input_integer_digits = output_integer_digits; +// } + +// if input_integer_digits == output_integer_digits { +// let rescaled = +// } +// } + + + +fn scale_variant_decimal_new( + integer: I::Native, + input_precision: u8, + input_scale: i8, + output_precision: u8, + output_scale: i8, +) -> Option +where + I: DecimalType, + O: DecimalType, +{ + return None; +} + /// Convert the value at a specific index in the given array into a `Variant`. macro_rules! non_generic_conversion_single_value { ($array:expr, $cast_fn:expr, $index:expr) => {{ diff --git a/parquet-variant/src/variant/decimal.rs b/parquet-variant/src/variant/decimal.rs index 0f395c1faa6f..2464d122c3a5 100644 --- a/parquet-variant/src/variant/decimal.rs +++ b/parquet-variant/src/variant/decimal.rs @@ -86,7 +86,7 @@ pub struct VariantDecimal4 { } impl VariantDecimal4 { - pub(crate) const MAX_PRECISION: u8 = 9; + pub const MAX_PRECISION: u8 = 9; pub const MAX_UNSCALED_VALUE: u32 = u32::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i32, scale: u8) -> Result { @@ -136,7 +136,7 @@ pub struct VariantDecimal8 { } impl VariantDecimal8 { - pub(crate) const MAX_PRECISION: u8 = 18; + pub const MAX_PRECISION: u8 = 18; pub const MAX_UNSCALED_VALUE: u64 = u64::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i64, scale: u8) -> Result { @@ -186,7 +186,7 @@ pub struct VariantDecimal16 { } impl VariantDecimal16 { - const MAX_PRECISION: u8 = 38; + pub const MAX_PRECISION: u8 = 38; pub const MAX_UNSCALED_VALUE: u128 = u128::pow(10, Self::MAX_PRECISION as u32) - 1; pub fn try_new(integer: i128, scale: u8) -> Result { From 1f19580d6571cc6f59c6b61905d618bd79b4415c Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 8 Oct 2025 10:27:29 -0400 Subject: [PATCH 12/26] Refactor common logic --- arrow-cast/src/cast/decimal.rs | 104 +++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 25 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index f7235d17f3a9..06c49b426b7f 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -139,6 +139,72 @@ impl DecimalCast for i256 { } } +/// Build a rescale function from (input_precision, input_scale) to (output_precision, output_scale) +/// returning a closure `Fn(I::Native) -> Option` that performs the conversion. +pub(crate) fn rescale_decimal( + _input_precision: u8, + input_scale: i8, + _output_precision: u8, + output_scale: i8, +) -> impl Fn(I::Native) -> Option +where + I: DecimalType, + O: DecimalType, + I::Native: DecimalCast + ArrowNativeTypeOp, + O::Native: DecimalCast + ArrowNativeTypeOp, +{ + let delta_scale = output_scale - input_scale; + + // Precompute parameters and capture them in a single closure type + let mul_opt = if delta_scale > 0 { + O::Native::from_decimal(10_i128) + .and_then(|t| t.pow_checked(delta_scale as u32).ok()) + } else { + None + }; + + let (div_opt, half_opt, half_neg_opt) = if delta_scale < 0 { + let div = I::Native::from_decimal(10_i128) + .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()); + if let Some(div) = div { + let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); + let half_neg = half.neg_wrapping(); + (Some(div), Some(half), Some(half_neg)) + } else { + (None, None, None) + } + } else { + (None, None, None) + }; + + move |x: I::Native| { + if delta_scale == 0 { + return O::Native::from_decimal(x); + } + + if let Some(mul) = mul_opt { + return O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); + } + + // Decrease scale path + let div = div_opt.unwrap(); + let half = half_opt.unwrap(); + let half_neg = half_neg_opt.unwrap(); + + // div is >= 10 and so this cannot overflow + let d = x.div_wrapping(div); + let r = x.mod_wrapping(div); + + // Round result + let adjusted = match x >= I::Native::ZERO { + true if r >= half => d.add_wrapping(I::Native::ONE), + false if r <= half_neg => d.sub_wrapping(I::Native::ONE), + _ => d, + }; + O::Native::from_decimal(adjusted) + } +} + pub(crate) fn cast_decimal_to_decimal_error( output_precision: u8, output_scale: i8, @@ -188,26 +254,12 @@ where // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8); - let div = I::Native::from_decimal(10_i128) - .unwrap() - .pow_checked(delta_scale as u32)?; - - let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); - let half_neg = half.neg_wrapping(); - - let f = |x: I::Native| { - // div is >= 10 and so this cannot overflow - let d = x.div_wrapping(div); - let r = x.mod_wrapping(div); - - // Round result - let adjusted = match x >= I::Native::ZERO { - true if r >= half => d.add_wrapping(I::Native::ONE), - false if r <= half_neg => d.sub_wrapping(I::Native::ONE), - _ => d, - }; - O::Native::from_decimal(adjusted) - }; + let f = rescale_decimal::( + input_precision, + input_scale, + output_precision, + output_scale, + ); Ok(if is_infallible_cast { // make sure we don't perform calculations that don't make sense w/o validation @@ -242,9 +294,6 @@ where { let error = cast_decimal_to_decimal_error::(output_precision, output_scale); let delta_scale = output_scale - input_scale; - let mul = O::Native::from_decimal(10_i128) - .unwrap() - .pow_checked(delta_scale as u32)?; // if the gain in precision (digits) is greater than the multiplication due to scaling // every number will fit into the output type @@ -253,13 +302,18 @@ where // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type // needs to provide at least 8 digits precision let is_infallible_cast = (input_precision as i8) + delta_scale <= (output_precision as i8); - let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); + let f = rescale_decimal::( + input_precision, + input_scale, + output_precision, + output_scale, + ); Ok(if is_infallible_cast { // make sure we don't perform calculations that don't make sense w/o validation validate_decimal_precision_and_scale::(output_precision, output_scale)?; // unwrapping is safe since the result is guaranteed to fit into the target type - let f = |x| O::Native::from_decimal(x).unwrap().mul_wrapping(mul); + let f = |x: I::Native| f(x).unwrap(); array.unary(f) } else if cast_options.safe { array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) From c163a91e255a288696598ee5c853e2f6f3b678b9 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 8 Oct 2025 11:55:59 -0400 Subject: [PATCH 13/26] Refactor common logic --- arrow-cast/src/cast/decimal.rs | 163 +++++++++++++++++---------------- 1 file changed, 85 insertions(+), 78 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 06c49b426b7f..e2c8e2836f69 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -142,11 +142,11 @@ impl DecimalCast for i256 { /// Build a rescale function from (input_precision, input_scale) to (output_precision, output_scale) /// returning a closure `Fn(I::Native) -> Option` that performs the conversion. pub(crate) fn rescale_decimal( - _input_precision: u8, + input_precision: u8, input_scale: i8, - _output_precision: u8, + output_precision: u8, output_scale: i8, -) -> impl Fn(I::Native) -> Option +) -> impl Fn(I::Native) -> Result where I: DecimalType, O: DecimalType, @@ -154,54 +154,83 @@ where O::Native: DecimalCast + ArrowNativeTypeOp, { let delta_scale = output_scale - input_scale; - - // Precompute parameters and capture them in a single closure type - let mul_opt = if delta_scale > 0 { - O::Native::from_decimal(10_i128) - .and_then(|t| t.pow_checked(delta_scale as u32).ok()) + let input_precision_i8 = input_precision as i8; + let output_precision_i8 = output_precision as i8; + + // Determine if the cast is infallible based on precision/scale math + let is_infallible_cast = input_precision_i8 + delta_scale < output_precision_i8; + + // Build a single mode once and use a thin closure that calls into it + enum RescaleMode { + SameScale, + Up { mul: O }, + Down { div: I, half: I, half_neg: I }, + Invalid, + } + + let mode = if delta_scale == 0 { + RescaleMode::SameScale + } else if delta_scale > 0 { + match O::Native::from_decimal(10_i128).and_then(|t| t.pow_checked(delta_scale as u32).ok()) + { + Some(mul) => RescaleMode::Up { mul }, + None => RescaleMode::Invalid, + } } else { - None + // delta_scale < 0 + match I::Native::from_decimal(10_i128) + .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()) + { + Some(div) => { + let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); + let half_neg = half.neg_wrapping(); + RescaleMode::Down { + div, + half, + half_neg, + } + } + None => RescaleMode::Invalid, + } }; - let (div_opt, half_opt, half_neg_opt) = if delta_scale < 0 { - let div = I::Native::from_decimal(10_i128) - .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()); - if let Some(div) = div { - let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); - let half_neg = half.neg_wrapping(); - (Some(div), Some(half), Some(half_neg)) - } else { - (None, None, None) + let f = move |x: I::Native| { + match &mode { + RescaleMode::SameScale => O::Native::from_decimal(x), + RescaleMode::Up { mul } => { + O::Native::from_decimal(x).and_then(|x| x.mul_checked(*mul).ok()) + } + RescaleMode::Down { + div, + half, + half_neg, + } => { + // div is >= 10 and so this cannot overflow + let d = x.div_wrapping(*div); + let r = x.mod_wrapping(*div); + + // Round result + let adjusted = match x >= I::Native::ZERO { + true if r >= *half => d.add_wrapping(I::Native::ONE), + false if r <= *half_neg => d.sub_wrapping(I::Native::ONE), + _ => d, + }; + O::Native::from_decimal(adjusted) + } + RescaleMode::Invalid => None, } - } else { - (None, None, None) }; - move |x: I::Native| { - if delta_scale == 0 { - return O::Native::from_decimal(x); - } + let error = cast_decimal_to_decimal_error::(output_precision, output_scale); - if let Some(mul) = mul_opt { - return O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); + move |x| { + if is_infallible_cast { + f(x).ok_or_else(|| error(x)) + } else { + f(x).ok_or_else(|| error(x)).and_then(|v| { + O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) + }) } - - // Decrease scale path - let div = div_opt.unwrap(); - let half = half_opt.unwrap(); - let half_neg = half_neg_opt.unwrap(); - - // div is >= 10 and so this cannot overflow - let d = x.div_wrapping(div); - let r = x.mod_wrapping(div); - - // Round result - let adjusted = match x >= I::Native::ZERO { - true if r >= half => d.add_wrapping(I::Native::ONE), - false if r <= half_neg => d.sub_wrapping(I::Native::ONE), - _ => d, - }; - O::Native::from_decimal(adjusted) } } @@ -240,7 +269,8 @@ where I::Native: DecimalCast + ArrowNativeTypeOp, O::Native: DecimalCast + ArrowNativeTypeOp, { - let error = cast_decimal_to_decimal_error::(output_precision, output_scale); + // make sure we don't perform calculations that don't make sense w/o validation + validate_decimal_precision_and_scale::(output_precision, output_scale)?; let delta_scale = input_scale - output_scale; // if the reduction of the input number through scaling (dividing) is greater // than a possible precision loss (plus potential increase via rounding) @@ -254,27 +284,15 @@ where // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8); - let f = rescale_decimal::( - input_precision, - input_scale, - output_precision, - output_scale, - ); + let f = rescale_decimal::(input_precision, input_scale, output_precision, output_scale); Ok(if is_infallible_cast { - // make sure we don't perform calculations that don't make sense w/o validation - validate_decimal_precision_and_scale::(output_precision, output_scale)?; - let g = |x: I::Native| f(x).unwrap(); // unwrapping is safe since the result is guaranteed - // to fit into the target type - array.unary(g) + // unwrapping is safe since the result is guaranteed to fit into the target type + array.unary(|x| f(x).unwrap()) } else if cast_options.safe { - array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) + array.unary_opt(|x| f(x).ok()) } else { - array.try_unary(|x| { - f(x).ok_or_else(|| error(x)).and_then(|v| { - O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) - }) - })? + array.try_unary(|x| f(x))? }) } @@ -292,7 +310,8 @@ where I::Native: DecimalCast + ArrowNativeTypeOp, O::Native: DecimalCast + ArrowNativeTypeOp, { - let error = cast_decimal_to_decimal_error::(output_precision, output_scale); + // make sure we don't perform calculations that don't make sense w/o validation + validate_decimal_precision_and_scale::(output_precision, output_scale)?; let delta_scale = output_scale - input_scale; // if the gain in precision (digits) is greater than the multiplication due to scaling @@ -302,27 +321,15 @@ where // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type // needs to provide at least 8 digits precision let is_infallible_cast = (input_precision as i8) + delta_scale <= (output_precision as i8); - let f = rescale_decimal::( - input_precision, - input_scale, - output_precision, - output_scale, - ); + let f = rescale_decimal::(input_precision, input_scale, output_precision, output_scale); Ok(if is_infallible_cast { - // make sure we don't perform calculations that don't make sense w/o validation - validate_decimal_precision_and_scale::(output_precision, output_scale)?; // unwrapping is safe since the result is guaranteed to fit into the target type - let f = |x: I::Native| f(x).unwrap(); - array.unary(f) + array.unary(|x| f(x).unwrap()) } else if cast_options.safe { - array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) + array.unary_opt(|x| f(x).ok()) } else { - array.try_unary(|x| { - f(x).ok_or_else(|| error(x)).and_then(|v| { - O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) - }) - })? + array.try_unary(|x| f(x))? }) } From 274a02824668dc73e5c0242baec393de9cec12ee Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 8 Oct 2025 12:27:37 -0400 Subject: [PATCH 14/26] Refactor common logic --- arrow-cast/src/cast/decimal.rs | 68 +++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index e2c8e2836f69..318bbf0326b2 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -150,15 +150,14 @@ pub(crate) fn rescale_decimal( where I: DecimalType, O: DecimalType, - I::Native: DecimalCast + ArrowNativeTypeOp, - O::Native: DecimalCast + ArrowNativeTypeOp, + I::Native: DecimalCast, + O::Native: DecimalCast, { let delta_scale = output_scale - input_scale; - let input_precision_i8 = input_precision as i8; - let output_precision_i8 = output_precision as i8; // Determine if the cast is infallible based on precision/scale math - let is_infallible_cast = input_precision_i8 + delta_scale < output_precision_i8; + let is_infallible_cast = + is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); // Build a single mode once and use a thin closure that calls into it enum RescaleMode { @@ -177,7 +176,6 @@ where None => RescaleMode::Invalid, } } else { - // delta_scale < 0 match I::Native::from_decimal(10_i128) .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()) { @@ -234,6 +232,40 @@ where } } +/// Returns true if casting from (input_precision, input_scale) to +/// (output_precision, output_scale) is infallible based on precision/scale math. +fn is_infallible_decimal_cast( + input_precision: u8, + input_scale: i8, + output_precision: u8, + output_scale: i8, +) -> bool { + let delta_scale = output_scale - input_scale; + let input_precision_i8 = input_precision as i8; + let output_precision_i8 = output_precision as i8; + if delta_scale >= 0 { + // if the gain in precision (digits) is greater than the multiplication due to scaling + // every number will fit into the output type + // Example: If we are starting with any number of precision 5 [xxxxx], + // then an increase of scale by 3 will have the following effect on the representation: + // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type + // needs to provide at least 8 digits precision + input_precision_i8 + delta_scale <= output_precision_i8 + } else { + // if the reduction of the input number through scaling (dividing) is greater + // than a possible precision loss (plus potential increase via rounding) + // every input number will fit into the output type + // Example: If we are starting with any number of precision 5 [xxxxx], + // then and decrease the scale by 3 will have the following effect on the representation: + // [xxxxx] -> [xx] (+ 1 possibly, due to rounding). + // The rounding may add an additional digit, so the cast to be infallible, + // the output type needs to have at least 3 digits of precision. + // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100: + // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible + input_precision_i8 + delta_scale < output_precision_i8 + } +} + pub(crate) fn cast_decimal_to_decimal_error( output_precision: u8, output_scale: i8, @@ -271,18 +303,8 @@ where { // make sure we don't perform calculations that don't make sense w/o validation validate_decimal_precision_and_scale::(output_precision, output_scale)?; - let delta_scale = input_scale - output_scale; - // if the reduction of the input number through scaling (dividing) is greater - // than a possible precision loss (plus potential increase via rounding) - // every input number will fit into the output type - // Example: If we are starting with any number of precision 5 [xxxxx], - // then and decrease the scale by 3 will have the following effect on the representation: - // [xxxxx] -> [xx] (+ 1 possibly, due to rounding). - // The rounding may add an additional digit, so the cast to be infallible, - // the output type needs to have at least 3 digits of precision. - // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100: - // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible - let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8); + let is_infallible_cast = + is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); let f = rescale_decimal::(input_precision, input_scale, output_precision, output_scale); @@ -312,15 +334,9 @@ where { // make sure we don't perform calculations that don't make sense w/o validation validate_decimal_precision_and_scale::(output_precision, output_scale)?; - let delta_scale = output_scale - input_scale; - // if the gain in precision (digits) is greater than the multiplication due to scaling - // every number will fit into the output type - // Example: If we are starting with any number of precision 5 [xxxxx], - // then an increase of scale by 3 will have the following effect on the representation: - // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type - // needs to provide at least 8 digits precision - let is_infallible_cast = (input_precision as i8) + delta_scale <= (output_precision as i8); + let is_infallible_cast = + is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); let f = rescale_decimal::(input_precision, input_scale, output_precision, output_scale); Ok(if is_infallible_cast { From a7cdd339bc44dc1a3c44fab3b5a4776208778a20 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 8 Oct 2025 14:32:16 -0400 Subject: [PATCH 15/26] Use rescale_decimal for variant decimal scaling --- arrow-cast/src/cast/decimal.rs | 10 +- arrow-cast/src/cast/mod.rs | 2 + .../src/type_conversion.rs | 137 ++++++++---------- parquet-variant-compute/src/variant_get.rs | 113 ++++++++++----- .../src/variant_to_arrow.rs | 79 ++-------- 5 files changed, 157 insertions(+), 184 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 318bbf0326b2..9a6598aeb1c1 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -19,17 +19,23 @@ use crate::cast::*; /// A utility trait that provides checked conversions between /// decimal types inspired by [`NumCast`] -pub(crate) trait DecimalCast: Sized { +pub trait DecimalCast: Sized { + /// Convert the decimal to an i32 fn to_i32(self) -> Option; + /// Convert the decimal to an i64 fn to_i64(self) -> Option; + /// Convert the decimal to an i128 fn to_i128(self) -> Option; + /// Convert the decimal to an i256 fn to_i256(self) -> Option; + /// Convert a decimal from a decimal fn from_decimal(n: T) -> Option; + /// Convert a decimal from a f64 fn from_f64(n: f64) -> Option; } @@ -141,7 +147,7 @@ impl DecimalCast for i256 { /// Build a rescale function from (input_precision, input_scale) to (output_precision, output_scale) /// returning a closure `Fn(I::Native) -> Option` that performs the conversion. -pub(crate) fn rescale_decimal( +pub fn rescale_decimal( input_precision: u8, input_scale: i8, output_precision: u8, diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index f89b7eab7f41..77f70584cc00 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -67,6 +67,8 @@ use arrow_schema::*; use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; +pub use decimal::{DecimalCast, rescale_decimal}; + /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CastOptions<'a> { diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index a4ba88437f68..8581f13f5441 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,8 +17,14 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::datatypes::{self, ArrowPrimitiveType, Decimal32Type, DecimalType, MAX_DECIMAL32_FOR_EACH_PRECISION}; -use parquet_variant::{Variant, VariantDecimal4}; +use arrow::{ + compute::{DecimalCast, rescale_decimal}, + datatypes::{ + self, ArrowPrimitiveType, Decimal32Type, Decimal64Type, Decimal128Type, DecimalType, + }, +}; +use arrow_schema::ArrowError; +use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; /// Options for controlling the behavior of `cast_to_variant_with_options`. #[derive(Debug, Clone, PartialEq, Eq)] @@ -61,90 +67,61 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16); impl_primitive_from_variant!(datatypes::Float32Type, as_f32); impl_primitive_from_variant!(datatypes::Float64Type, as_f64); -macro_rules! scale_variant_decimal { - ($variant:expr, $variant_method:ident, $to_int_ty:expr, $output_scale:expr, $precision:expr, $validate:path) => {{ - let variant = $variant.$variant_method()?; - let input_scale = variant.scale() as i8; - let variant = $to_int_ty(variant.integer()); - let ten = $to_int_ty(10); - - let scaled = if input_scale == $output_scale { - Some(variant) - } else if input_scale < $output_scale { - // scale_up means output has more fractional digits than input - // multiply integer by 10^(output_scale - input_scale) - let delta = ($output_scale - input_scale) as u32; - let mul = ten.checked_pow(delta)?; - variant.checked_mul(mul) - } else { - // scale_down means output has fewer fractional digits than input - // divide by 10^(input_scale - output_scale) with rounding - let delta = (input_scale - $output_scale) as u32; - let div = ten.checked_pow(delta)?; - let d = variant.checked_div(div)?; - let r = variant % div; - - // rounding in the same way as convert_to_smaller_scale_decimal in arrow-cast - let half = div.checked_div($to_int_ty(2))?; - let half_neg = half.checked_neg()?; - let adjusted = match variant >= $to_int_ty(0) { - true if r >= half => d.checked_add($to_int_ty(1))?, - false if r <= half_neg => d.checked_sub($to_int_ty(1))?, - _ => d, - }; - Some(adjusted) - }; - - scaled.filter(|v| $validate(*v, $precision)) - }}; -} -pub(crate) use scale_variant_decimal; - -fn variant_to_unscaled_decimal32( - variant: Variant<'_, '_>, +pub(crate) fn variant_to_unscaled_decimal( + variant: &Variant<'_, '_>, precision: u8, scale: i8, -) -> Option { - match variant { - Variant::Int32(i) => scale_variant_decimal_new::(i, VariantDecimal4::MAX_PRECISION, 0, precision, scale), - Variant::Decimal4(d) => scale_variant_decimal_new::(d.integer(), VariantDecimal4::MAX_PRECISION, d.scale() as i8, precision, scale), - _ => None, - } -} - -// fn rescale_variant(integer: i32, input_precision: u8, input_scale: i8, output_precision: u8, output_scale: i8) -> Option { -// let input_precision = input_precision as i8; -// let output_precision = output_precision as i8; -// let mut input_integer_digits = input_precision - input_scale; -// let output_integer_digits = output_precision - output_scale; - -// // -// if input_integer_digits > output_integer_digits { -// if !Decimal32Type::is_valid_decimal_precision(integer, (output_integer_digits + input_scale) as u8) { -// return None; -// } -// input_integer_digits = output_integer_digits; -// } - -// if input_integer_digits == output_integer_digits { -// let rescaled = -// } -// } - - - -fn scale_variant_decimal_new( - integer: I::Native, - input_precision: u8, - input_scale: i8, - output_precision: u8, - output_scale: i8, ) -> Option where - I: DecimalType, O: DecimalType, + O::Native: DecimalCast, { - return None; + let maybe_rescaled = match variant { + Variant::Int8(i) => { + rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( + *i as i32, + ) + } + Variant::Int16(i) => { + rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( + *i as i32, + ) + } + Variant::Int32(i) => { + rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( + *i, + ) + } + Variant::Int64(i) => { + rescale_decimal::(VariantDecimal8::MAX_PRECISION, 0, precision, scale)( + *i, + ) + } + Variant::Decimal4(d) => rescale_decimal::( + VariantDecimal4::MAX_PRECISION, + d.scale() as i8, + precision, + scale, + )(d.integer()), + Variant::Decimal8(d) => rescale_decimal::( + VariantDecimal8::MAX_PRECISION, + d.scale() as i8, + precision, + scale, + )(d.integer()), + Variant::Decimal16(d) => rescale_decimal::( + VariantDecimal16::MAX_PRECISION, + d.scale() as i8, + precision, + scale, + )(d.integer()), + _ => Err(ArrowError::InvalidArgumentError(format!( + "Invalid variant type: {:?}", + variant + ))), + }; + + maybe_rescaled.ok() } /// Convert the value at a specific index in the given array into a `Variant`. diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 190242d331c1..0fdc19ffbd54 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2686,14 +2686,16 @@ mod test { } #[test] - fn get_decimal32_unshredded_var_scales_to_scale2() { + fn get_decimal32_rescaled_to_scale2() { // Build unshredded variant values with different scales let mut builder = crate::VariantArrayBuilder::new(4); builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())); // 12.34 builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 3).unwrap())); // 1.234 builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 0).unwrap())); // 1234 builder.append_null(); - builder.append_variant(Variant::from(VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64)+1, 3).unwrap())); // should fit into Decimal32 + builder.append_variant(Variant::from( + VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3).unwrap(), + )); // should fit into Decimal32 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, 2), true); @@ -2707,19 +2709,22 @@ mod test { assert_eq!(result.value(1), 123); assert_eq!(result.value(2), 123400); assert!(result.is_null(3)); - assert!(result.is_null(4)); // should not be null as the final result fits into Decimal32 + assert_eq!( + result.value(4), + (VariantDecimal4::MAX_UNSCALED_VALUE as i32) / 10 + 1 + ); // should not be null as the final result fits into Decimal32 } #[test] - fn get_decimal32_unshredded_scale_down_rounding() { + fn get_decimal32_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 2).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(5235, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(Variant::from(VariantDecimal4::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, -1), true); @@ -2740,7 +2745,7 @@ mod test { #[test] fn get_decimal32_precision_overflow_safe() { - // Exceed Decimal32 max precision (9) after scaling + // Exceed Decimal32 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), @@ -2756,7 +2761,7 @@ mod test { let result = result.as_any().downcast_ref::().unwrap(); assert!(result.is_null(0)); - assert_eq!(result.value(1), 0); // should overflow because 1.00 does not fit into precision (2) + assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2) } #[test] @@ -2785,12 +2790,16 @@ mod test { } #[test] - fn get_decimal64_unshredded_var_scales_to_scale2() { + fn get_decimal64_rescaled_to_scale2() { let mut builder = crate::VariantArrayBuilder::new(4); builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 2).unwrap())); // 12.34 builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 3).unwrap())); // 1.234 builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 0).unwrap())); // 1234 builder.append_null(); + builder.append_variant(Variant::from( + VariantDecimal16::try_new((VariantDecimal8::MAX_UNSCALED_VALUE as i128) + 1, 3) + .unwrap(), + )); // should fit into Decimal64 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal64(18, 2), true); @@ -2804,45 +2813,59 @@ mod test { assert_eq!(result.value(1), 123); assert_eq!(result.value(2), 123400); assert!(result.is_null(3)); + assert_eq!( + result.value(4), + (VariantDecimal8::MAX_UNSCALED_VALUE as i64) / 10 + 1 + ); // should not be null as the final result fits into Decimal64 } #[test] - fn get_decimal64_unshredded_scale_down_rounding() { + fn get_decimal64_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(1245, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(-1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(-1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(-1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(-1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(Variant::from(VariantDecimal8::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal64(18, 2), true); + let field = Field::new("result", DataType::Decimal64(18, -1), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert_eq!(result.precision(), 18); - assert_eq!(result.scale(), 2); + assert_eq!(result.scale(), -1); assert_eq!(result.value(0), 124); assert_eq!(result.value(1), 125); assert_eq!(result.value(2), -124); assert_eq!(result.value(3), -125); + assert_eq!(result.value(4), 1); + assert_eq!(result.value(5), 0); + assert_eq!(result.value(6), 1); } #[test] fn get_decimal64_precision_overflow_safe() { - // Exceed Decimal64 max precision (18) after scaling + // Exceed Decimal64 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), )); + builder.append_variant(Variant::from( + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 18).unwrap(), + )); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal64(18, 2), true); + let field = Field::new("result", DataType::Decimal64(2, 2), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert!(result.is_null(0)); + assert!(result.is_null(1)); } #[test] @@ -2871,7 +2894,7 @@ mod test { } #[test] - fn get_decimal128_unshredded_var_scales_to_scale2() { + fn get_decimal128_rescaled_to_scale2() { let mut builder = crate::VariantArrayBuilder::new(4); builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 2).unwrap())); builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 3).unwrap())); @@ -2893,42 +2916,52 @@ mod test { } #[test] - fn get_decimal128_unshredded_scale_down_rounding() { + fn get_decimal128_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(Variant::from(VariantDecimal16::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal128(38, 2), true); + let field = Field::new("result", DataType::Decimal128(38, -1), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert_eq!(result.precision(), 38); - assert_eq!(result.scale(), 2); + assert_eq!(result.scale(), -1); assert_eq!(result.value(0), 124); assert_eq!(result.value(1), 125); assert_eq!(result.value(2), -124); assert_eq!(result.value(3), -125); + assert_eq!(result.value(4), 1); + assert_eq!(result.value(5), 0); + assert_eq!(result.value(6), 1); } #[test] fn get_decimal128_precision_overflow_safe() { - // Exceed Decimal128 max precision (38) after scaling + // Exceed Decimal128 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), )); + builder.append_variant(Variant::from( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 38).unwrap(), + )); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal128(38, 2), true); + let field = Field::new("result", DataType::Decimal128(2, 2), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert!(result.is_null(0)); + assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2) } #[test] @@ -2955,7 +2988,7 @@ mod test { } #[test] - fn get_decimal256_unshredded_var_scales_to_scale2_from_decimal16() { + fn get_decimal256_rescaled_to_scale2() { // Build unshredded variant values with different scales using Decimal16 source let mut builder = crate::VariantArrayBuilder::new(4); builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 2).unwrap())); // 12.34 @@ -2978,25 +3011,31 @@ mod test { } #[test] - fn get_decimal256_unshredded_scale_down_rounding() { + fn get_decimal256_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 3).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 0).unwrap())); + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(Variant::from(VariantDecimal16::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); - let field = Field::new("result", DataType::Decimal256(76, 2), true); + let field = Field::new("result", DataType::Decimal256(76, -1), true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); assert_eq!(result.precision(), 76); - assert_eq!(result.scale(), 2); + assert_eq!(result.scale(), -1); assert_eq!(result.value(0), i256::from_i128(124)); assert_eq!(result.value(1), i256::from_i128(125)); assert_eq!(result.value(2), i256::from_i128(-124)); assert_eq!(result.value(3), i256::from_i128(-125)); + assert_eq!(result.value(4), i256::from_i128(1)); + assert_eq!(result.value(5), i256::from_i128(0)); + assert_eq!(result.value(6), i256::from_i128(1)); } #[test] diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index b8f8f866e6d9..93056a330b05 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -16,16 +16,12 @@ // under the License. use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder}; -use arrow::compute::CastOptions; -use arrow::datatypes::{ - self, ArrowPrimitiveType, DataType, i256, is_validate_decimal_precision, - is_validate_decimal32_precision, is_validate_decimal64_precision, - is_validate_decimal256_precision, -}; +use arrow::compute::{CastOptions, DecimalCast}; +use arrow::datatypes::{self, ArrowPrimitiveType, DataType, DecimalType}; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; -use crate::type_conversion::{PrimitiveFromVariant, scale_variant_decimal}; +use crate::type_conversion::{PrimitiveFromVariant, variant_to_unscaled_decimal}; use crate::{VariantArray, VariantValueArrayBuilder}; use std::sync::Arc; @@ -375,70 +371,23 @@ impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> { } } -// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct parameters -pub(crate) trait RescaleVariantDecimal: datatypes::DecimalType { - fn rescale_variant_decimal( - value: &Variant<'_, '_>, - scale: i8, - precision: u8, - ) -> Option<::Native>; -} - -macro_rules! impl_rescale_variant_decimal { - ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => { - impl RescaleVariantDecimal for $t { - fn rescale_variant_decimal( - value: &Variant<'_, '_>, - scale: i8, - precision: u8, - ) -> Option<::Native> { - scale_variant_decimal!( - value, - $variant_method, - $to_native, - scale, - precision, - $validate - ) - } - } - }; -} - -impl_rescale_variant_decimal!( - datatypes::Decimal32Type, - as_decimal4, - i32::from, - is_validate_decimal32_precision -); -impl_rescale_variant_decimal!( - datatypes::Decimal64Type, - as_decimal8, - i64::from, - is_validate_decimal64_precision -); -impl_rescale_variant_decimal!( - datatypes::Decimal128Type, - as_decimal16, - i128::from, - is_validate_decimal_precision -); -impl_rescale_variant_decimal!( - datatypes::Decimal256Type, - as_decimal16, - i256::from_i128, - is_validate_decimal256_precision -); - /// Builder for converting variant values to arrow Decimal values -pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T: RescaleVariantDecimal> { +pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T> +where + T: DecimalType, + T::Native: DecimalCast, +{ builder: PrimitiveBuilder, cast_options: &'a CastOptions<'a>, precision: u8, scale: i8, } -impl<'a, T: RescaleVariantDecimal> VariantToDecimalArrowRowBuilder<'a, T> { +impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T> +where + T: DecimalType, + T::Native: DecimalCast, +{ fn new( cast_options: &'a CastOptions<'a>, capacity: usize, @@ -461,7 +410,7 @@ impl<'a, T: RescaleVariantDecimal> VariantToDecimalArrowRowBuilder<'a, T> { } fn append_value(&mut self, value: &Variant<'_, '_>) -> Result { - if let Some(scaled) = T::rescale_variant_decimal(value, self.scale, self.precision) { + if let Some(scaled) = variant_to_unscaled_decimal::(value, self.precision, self.scale) { self.builder.append_value(scaled); Ok(true) } else if self.cast_options.safe { From 94d60c050877b98e2f09e0da35d33d1cbed9e5e7 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Wed, 8 Oct 2025 15:40:06 -0400 Subject: [PATCH 16/26] Fix clippy --- arrow-cast/src/cast/decimal.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 9a6598aeb1c1..b2c744dbec98 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -320,7 +320,7 @@ where } else if cast_options.safe { array.unary_opt(|x| f(x).ok()) } else { - array.try_unary(|x| f(x))? + array.try_unary(f)? }) } @@ -351,7 +351,7 @@ where } else if cast_options.safe { array.unary_opt(|x| f(x).ok()) } else { - array.try_unary(|x| f(x))? + array.try_unary(f)? }) } From e1febf6353e4d6956b3e0428b8c61cf4e211d245 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Thu, 9 Oct 2025 18:59:33 -0400 Subject: [PATCH 17/26] Address comments --- arrow-cast/src/cast/decimal.rs | 10 +++++----- parquet-variant-compute/src/type_conversion.rs | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index b2c744dbec98..fab9544e665d 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -247,8 +247,8 @@ fn is_infallible_decimal_cast( output_scale: i8, ) -> bool { let delta_scale = output_scale - input_scale; - let input_precision_i8 = input_precision as i8; - let output_precision_i8 = output_precision as i8; + let input_precision = input_precision as i8; + let output_precision = output_precision as i8; if delta_scale >= 0 { // if the gain in precision (digits) is greater than the multiplication due to scaling // every number will fit into the output type @@ -256,7 +256,7 @@ fn is_infallible_decimal_cast( // then an increase of scale by 3 will have the following effect on the representation: // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type // needs to provide at least 8 digits precision - input_precision_i8 + delta_scale <= output_precision_i8 + input_precision + delta_scale <= output_precision } else { // if the reduction of the input number through scaling (dividing) is greater // than a possible precision loss (plus potential increase via rounding) @@ -264,11 +264,11 @@ fn is_infallible_decimal_cast( // Example: If we are starting with any number of precision 5 [xxxxx], // then and decrease the scale by 3 will have the following effect on the representation: // [xxxxx] -> [xx] (+ 1 possibly, due to rounding). - // The rounding may add an additional digit, so the cast to be infallible, + // The rounding may add an additional digit, so for the cast to be infallible, // the output type needs to have at least 3 digits of precision. // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100: // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible - input_precision_i8 + delta_scale < output_precision_i8 + input_precision + delta_scale < output_precision } } diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index db95f52d9727..b50ec084fe8c 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -20,8 +20,8 @@ use arrow::{ compute::{DecimalCast, rescale_decimal}, datatypes::{ - self, ArrowPrimitiveType, ArrowTimestampType, Date32Type, Decimal32Type, Decimal64Type, - Decimal128Type, DecimalType, + self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, + DecimalType, }, }; use arrow_schema::ArrowError; @@ -89,7 +89,7 @@ impl_primitive_from_variant!(datatypes::Float64Type, as_f64); impl_primitive_from_variant!( datatypes::Date32Type, as_naive_date, - Date32Type::from_naive_date + datatypes::Date32Type::from_naive_date ); impl_timestamp_from_variant!( datatypes::TimestampMicrosecondType, From 51648fdc821fb604f0ce19cd1ea0fd5dd4b5e7cf Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Thu, 9 Oct 2025 20:10:09 -0400 Subject: [PATCH 18/26] Move rescale_decimal into variant-compute --- .../src/type_conversion.rs | 243 +++++++++++++++++- .../src/variant_to_arrow.rs | 6 +- 2 files changed, 232 insertions(+), 17 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index b50ec084fe8c..5435cfefcf4f 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -17,14 +17,11 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. -use arrow::{ - compute::{DecimalCast, rescale_decimal}, - datatypes::{ - self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, - DecimalType, - }, +use arrow::array::ArrowNativeTypeOp; +use arrow::datatypes::{ + self, ArrowNativeType, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, + Decimal128Type, DecimalType, i256, }; -use arrow_schema::ArrowError; use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; /// Options for controlling the behavior of `cast_to_variant_with_options`. @@ -116,6 +113,108 @@ impl_timestamp_from_variant!( |timestamp| Self::make_value(timestamp.naive_utc()) ); +/// A utility trait that provides checked conversions between +/// decimal types inspired by [`NumCast`] +pub(crate) trait DecimalCast: Sized { + fn to_i32(self) -> Option; + + fn to_i64(self) -> Option; + + fn to_i128(self) -> Option; + + fn to_i256(self) -> Option; + + fn from_decimal(n: T) -> Option; +} + +impl DecimalCast for i32 { + fn to_i32(self) -> Option { + Some(self) + } + + fn to_i64(self) -> Option { + Some(self as i64) + } + + fn to_i128(self) -> Option { + Some(self as i128) + } + + fn to_i256(self) -> Option { + Some(i256::from_i128(self as i128)) + } + + fn from_decimal(n: T) -> Option { + n.to_i32() + } +} + +impl DecimalCast for i64 { + fn to_i32(self) -> Option { + i32::try_from(self).ok() + } + + fn to_i64(self) -> Option { + Some(self) + } + + fn to_i128(self) -> Option { + Some(self as i128) + } + + fn to_i256(self) -> Option { + Some(i256::from_i128(self as i128)) + } + + fn from_decimal(n: T) -> Option { + n.to_i64() + } +} + +impl DecimalCast for i128 { + fn to_i32(self) -> Option { + i32::try_from(self).ok() + } + + fn to_i64(self) -> Option { + i64::try_from(self).ok() + } + + fn to_i128(self) -> Option { + Some(self) + } + + fn to_i256(self) -> Option { + Some(i256::from_i128(self)) + } + + fn from_decimal(n: T) -> Option { + n.to_i128() + } +} + +impl DecimalCast for i256 { + fn to_i32(self) -> Option { + self.to_i128().map(|x| i32::try_from(x).ok())? + } + + fn to_i64(self) -> Option { + self.to_i128().map(|x| i64::try_from(x).ok())? + } + + fn to_i128(self) -> Option { + self.to_i128() + } + + fn to_i256(self) -> Option { + Some(self) + } + + fn from_decimal(n: T) -> Option { + n.to_i256() + } +} + pub(crate) fn variant_to_unscaled_decimal( variant: &Variant<'_, '_>, precision: u8, @@ -125,7 +224,7 @@ where O: DecimalType, O::Native: DecimalCast, { - let maybe_rescaled = match variant { + match variant { Variant::Int8(i) => { rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( *i as i32, @@ -164,13 +263,131 @@ where precision, scale, )(d.integer()), - _ => Err(ArrowError::InvalidArgumentError(format!( - "Invalid variant type: {:?}", - variant - ))), + _ => None, + } +} + +/// Build a rescale function from (input_precision, input_scale) to (output_precision, output_scale) +/// returning a closure `Fn(I::Native) -> Option` that performs the conversion. +pub(crate) fn rescale_decimal( + input_precision: u8, + input_scale: i8, + output_precision: u8, + output_scale: i8, +) -> impl Fn(I::Native) -> Option +where + I: DecimalType, + O: DecimalType, + I::Native: DecimalCast, + O::Native: DecimalCast, +{ + let delta_scale = output_scale - input_scale; + + // Determine if the cast is infallible based on precision/scale math + let is_infallible_cast = + is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); + + // Build a single mode once and use a thin closure that calls into it + enum RescaleMode { + SameScale, + Up { mul: O }, + Down { div: I, half: I, half_neg: I }, + Invalid, + } + + let mode = if delta_scale == 0 { + RescaleMode::SameScale + } else if delta_scale > 0 { + match O::Native::from_decimal(10_i128).and_then(|t| t.pow_checked(delta_scale as u32).ok()) + { + Some(mul) => RescaleMode::Up { mul }, + None => RescaleMode::Invalid, + } + } else { + match I::Native::from_decimal(10_i128) + .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()) + { + Some(div) => { + let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); + let half_neg = half.neg_wrapping(); + RescaleMode::Down { + div, + half, + half_neg, + } + } + None => RescaleMode::Invalid, + } }; - maybe_rescaled.ok() + let f = move |x: I::Native| { + match &mode { + RescaleMode::SameScale => O::Native::from_decimal(x), + RescaleMode::Up { mul } => { + O::Native::from_decimal(x).and_then(|x| x.mul_checked(*mul).ok()) + } + RescaleMode::Down { + div, + half, + half_neg, + } => { + // div is >= 10 and so this cannot overflow + let d = x.div_wrapping(*div); + let r = x.mod_wrapping(*div); + + // Round result + let adjusted = match x >= I::Native::ZERO { + true if r >= *half => d.add_wrapping(I::Native::ONE), + false if r <= *half_neg => d.sub_wrapping(I::Native::ONE), + _ => d, + }; + O::Native::from_decimal(adjusted) + } + RescaleMode::Invalid => None, + } + }; + + move |x| { + if is_infallible_cast { + f(x) + } else { + f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)) + } + } +} + +/// Returns true if casting from (input_precision, input_scale) to +/// (output_precision, output_scale) is infallible based on precision/scale math. +fn is_infallible_decimal_cast( + input_precision: u8, + input_scale: i8, + output_precision: u8, + output_scale: i8, +) -> bool { + let delta_scale = output_scale - input_scale; + let input_precision = input_precision as i8; + let output_precision = output_precision as i8; + if delta_scale >= 0 { + // if the gain in precision (digits) is greater than the multiplication due to scaling + // every number will fit into the output type + // Example: If we are starting with any number of precision 5 [xxxxx], + // then an increase of scale by 3 will have the following effect on the representation: + // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type + // needs to provide at least 8 digits precision + input_precision + delta_scale <= output_precision + } else { + // if the reduction of the input number through scaling (dividing) is greater + // than a possible precision loss (plus potential increase via rounding) + // every input number will fit into the output type + // Example: If we are starting with any number of precision 5 [xxxxx], + // then and decrease the scale by 3 will have the following effect on the representation: + // [xxxxx] -> [xx] (+ 1 possibly, due to rounding). + // The rounding may add an additional digit, so for the cast to be infallible, + // the output type needs to have at least 3 digits of precision. + // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100: + // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible + input_precision + delta_scale < output_precision + } } /// Convert the value at a specific index in the given array into a `Variant`. diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index f2ac4a722fff..2b0effd9a9d0 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -18,14 +18,12 @@ use arrow::array::{ ArrayRef, BinaryViewArray, BooleanBuilder, NullBufferBuilder, PrimitiveBuilder, }; -use arrow::compute::{CastOptions, DecimalCast}; +use arrow::compute::CastOptions; use arrow::datatypes::{self, ArrowPrimitiveType, DataType, DecimalType}; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; -use crate::type_conversion::{ - PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal, -}; +use crate::type_conversion::{PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal, DecimalCast}; use crate::{VariantArray, VariantValueArrayBuilder}; use arrow_schema::TimeUnit; From a48bbf41c9ce72595a22e5d152c51e5374818ef9 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Thu, 9 Oct 2025 20:11:16 -0400 Subject: [PATCH 19/26] Revert changes in arrow-cast --- arrow-cast/src/cast/decimal.rs | 221 ++++++++++----------------------- arrow-cast/src/cast/mod.rs | 2 - 2 files changed, 69 insertions(+), 154 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index fab9544e665d..f7235d17f3a9 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -19,23 +19,17 @@ use crate::cast::*; /// A utility trait that provides checked conversions between /// decimal types inspired by [`NumCast`] -pub trait DecimalCast: Sized { - /// Convert the decimal to an i32 +pub(crate) trait DecimalCast: Sized { fn to_i32(self) -> Option; - /// Convert the decimal to an i64 fn to_i64(self) -> Option; - /// Convert the decimal to an i128 fn to_i128(self) -> Option; - /// Convert the decimal to an i256 fn to_i256(self) -> Option; - /// Convert a decimal from a decimal fn from_decimal(n: T) -> Option; - /// Convert a decimal from a f64 fn from_f64(n: f64) -> Option; } @@ -145,133 +139,6 @@ impl DecimalCast for i256 { } } -/// Build a rescale function from (input_precision, input_scale) to (output_precision, output_scale) -/// returning a closure `Fn(I::Native) -> Option` that performs the conversion. -pub fn rescale_decimal( - input_precision: u8, - input_scale: i8, - output_precision: u8, - output_scale: i8, -) -> impl Fn(I::Native) -> Result -where - I: DecimalType, - O: DecimalType, - I::Native: DecimalCast, - O::Native: DecimalCast, -{ - let delta_scale = output_scale - input_scale; - - // Determine if the cast is infallible based on precision/scale math - let is_infallible_cast = - is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); - - // Build a single mode once and use a thin closure that calls into it - enum RescaleMode { - SameScale, - Up { mul: O }, - Down { div: I, half: I, half_neg: I }, - Invalid, - } - - let mode = if delta_scale == 0 { - RescaleMode::SameScale - } else if delta_scale > 0 { - match O::Native::from_decimal(10_i128).and_then(|t| t.pow_checked(delta_scale as u32).ok()) - { - Some(mul) => RescaleMode::Up { mul }, - None => RescaleMode::Invalid, - } - } else { - match I::Native::from_decimal(10_i128) - .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()) - { - Some(div) => { - let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); - let half_neg = half.neg_wrapping(); - RescaleMode::Down { - div, - half, - half_neg, - } - } - None => RescaleMode::Invalid, - } - }; - - let f = move |x: I::Native| { - match &mode { - RescaleMode::SameScale => O::Native::from_decimal(x), - RescaleMode::Up { mul } => { - O::Native::from_decimal(x).and_then(|x| x.mul_checked(*mul).ok()) - } - RescaleMode::Down { - div, - half, - half_neg, - } => { - // div is >= 10 and so this cannot overflow - let d = x.div_wrapping(*div); - let r = x.mod_wrapping(*div); - - // Round result - let adjusted = match x >= I::Native::ZERO { - true if r >= *half => d.add_wrapping(I::Native::ONE), - false if r <= *half_neg => d.sub_wrapping(I::Native::ONE), - _ => d, - }; - O::Native::from_decimal(adjusted) - } - RescaleMode::Invalid => None, - } - }; - - let error = cast_decimal_to_decimal_error::(output_precision, output_scale); - - move |x| { - if is_infallible_cast { - f(x).ok_or_else(|| error(x)) - } else { - f(x).ok_or_else(|| error(x)).and_then(|v| { - O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) - }) - } - } -} - -/// Returns true if casting from (input_precision, input_scale) to -/// (output_precision, output_scale) is infallible based on precision/scale math. -fn is_infallible_decimal_cast( - input_precision: u8, - input_scale: i8, - output_precision: u8, - output_scale: i8, -) -> bool { - let delta_scale = output_scale - input_scale; - let input_precision = input_precision as i8; - let output_precision = output_precision as i8; - if delta_scale >= 0 { - // if the gain in precision (digits) is greater than the multiplication due to scaling - // every number will fit into the output type - // Example: If we are starting with any number of precision 5 [xxxxx], - // then an increase of scale by 3 will have the following effect on the representation: - // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type - // needs to provide at least 8 digits precision - input_precision + delta_scale <= output_precision - } else { - // if the reduction of the input number through scaling (dividing) is greater - // than a possible precision loss (plus potential increase via rounding) - // every input number will fit into the output type - // Example: If we are starting with any number of precision 5 [xxxxx], - // then and decrease the scale by 3 will have the following effect on the representation: - // [xxxxx] -> [xx] (+ 1 possibly, due to rounding). - // The rounding may add an additional digit, so for the cast to be infallible, - // the output type needs to have at least 3 digits of precision. - // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100: - // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible - input_precision + delta_scale < output_precision - } -} - pub(crate) fn cast_decimal_to_decimal_error( output_precision: u8, output_scale: i8, @@ -307,20 +174,55 @@ where I::Native: DecimalCast + ArrowNativeTypeOp, O::Native: DecimalCast + ArrowNativeTypeOp, { - // make sure we don't perform calculations that don't make sense w/o validation - validate_decimal_precision_and_scale::(output_precision, output_scale)?; - let is_infallible_cast = - is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); + let error = cast_decimal_to_decimal_error::(output_precision, output_scale); + let delta_scale = input_scale - output_scale; + // if the reduction of the input number through scaling (dividing) is greater + // than a possible precision loss (plus potential increase via rounding) + // every input number will fit into the output type + // Example: If we are starting with any number of precision 5 [xxxxx], + // then and decrease the scale by 3 will have the following effect on the representation: + // [xxxxx] -> [xx] (+ 1 possibly, due to rounding). + // The rounding may add an additional digit, so the cast to be infallible, + // the output type needs to have at least 3 digits of precision. + // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100: + // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible + let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8); + + let div = I::Native::from_decimal(10_i128) + .unwrap() + .pow_checked(delta_scale as u32)?; + + let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); + let half_neg = half.neg_wrapping(); + + let f = |x: I::Native| { + // div is >= 10 and so this cannot overflow + let d = x.div_wrapping(div); + let r = x.mod_wrapping(div); - let f = rescale_decimal::(input_precision, input_scale, output_precision, output_scale); + // Round result + let adjusted = match x >= I::Native::ZERO { + true if r >= half => d.add_wrapping(I::Native::ONE), + false if r <= half_neg => d.sub_wrapping(I::Native::ONE), + _ => d, + }; + O::Native::from_decimal(adjusted) + }; Ok(if is_infallible_cast { - // unwrapping is safe since the result is guaranteed to fit into the target type - array.unary(|x| f(x).unwrap()) + // make sure we don't perform calculations that don't make sense w/o validation + validate_decimal_precision_and_scale::(output_precision, output_scale)?; + let g = |x: I::Native| f(x).unwrap(); // unwrapping is safe since the result is guaranteed + // to fit into the target type + array.unary(g) } else if cast_options.safe { - array.unary_opt(|x| f(x).ok()) + array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) } else { - array.try_unary(f)? + array.try_unary(|x| { + f(x).ok_or_else(|| error(x)).and_then(|v| { + O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) + }) + })? }) } @@ -338,20 +240,35 @@ where I::Native: DecimalCast + ArrowNativeTypeOp, O::Native: DecimalCast + ArrowNativeTypeOp, { - // make sure we don't perform calculations that don't make sense w/o validation - validate_decimal_precision_and_scale::(output_precision, output_scale)?; - - let is_infallible_cast = - is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); - let f = rescale_decimal::(input_precision, input_scale, output_precision, output_scale); + let error = cast_decimal_to_decimal_error::(output_precision, output_scale); + let delta_scale = output_scale - input_scale; + let mul = O::Native::from_decimal(10_i128) + .unwrap() + .pow_checked(delta_scale as u32)?; + + // if the gain in precision (digits) is greater than the multiplication due to scaling + // every number will fit into the output type + // Example: If we are starting with any number of precision 5 [xxxxx], + // then an increase of scale by 3 will have the following effect on the representation: + // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type + // needs to provide at least 8 digits precision + let is_infallible_cast = (input_precision as i8) + delta_scale <= (output_precision as i8); + let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok()); Ok(if is_infallible_cast { + // make sure we don't perform calculations that don't make sense w/o validation + validate_decimal_precision_and_scale::(output_precision, output_scale)?; // unwrapping is safe since the result is guaranteed to fit into the target type - array.unary(|x| f(x).unwrap()) + let f = |x| O::Native::from_decimal(x).unwrap().mul_wrapping(mul); + array.unary(f) } else if cast_options.safe { - array.unary_opt(|x| f(x).ok()) + array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) } else { - array.try_unary(f)? + array.try_unary(|x| { + f(x).ok_or_else(|| error(x)).and_then(|v| { + O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) + }) + })? }) } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 77f70584cc00..f89b7eab7f41 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -67,8 +67,6 @@ use arrow_schema::*; use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; -pub use decimal::{DecimalCast, rescale_decimal}; - /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CastOptions<'a> { From cb2576cf8c9996bae0c9ddfd1414bdfb1cc5469d Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Thu, 9 Oct 2025 20:41:05 -0400 Subject: [PATCH 20/26] Fix doc --- parquet-variant-compute/src/type_conversion.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 5435cfefcf4f..7cd032a7198c 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -113,8 +113,7 @@ impl_timestamp_from_variant!( |timestamp| Self::make_value(timestamp.naive_utc()) ); -/// A utility trait that provides checked conversions between -/// decimal types inspired by [`NumCast`] +/// A utility trait that provides checked conversions between decimal types pub(crate) trait DecimalCast: Sized { fn to_i32(self) -> Option; From ef624745e56fd670df1fda327295562969e94911 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Fri, 10 Oct 2025 09:19:12 -0400 Subject: [PATCH 21/26] Return value instead of fn --- .../src/type_conversion.rs | 149 +++++++----------- 1 file changed, 60 insertions(+), 89 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 7cd032a7198c..3eea8f5374af 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -224,56 +224,68 @@ where O::Native: DecimalCast, { match variant { - Variant::Int8(i) => { - rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( - *i as i32, - ) - } - Variant::Int16(i) => { - rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( - *i as i32, - ) - } - Variant::Int32(i) => { - rescale_decimal::(VariantDecimal4::MAX_PRECISION, 0, precision, scale)( - *i, - ) - } - Variant::Int64(i) => { - rescale_decimal::(VariantDecimal8::MAX_PRECISION, 0, precision, scale)( - *i, - ) - } + Variant::Int8(i) => rescale_decimal::( + *i as i32, + VariantDecimal4::MAX_PRECISION, + 0, + precision, + scale, + ), + Variant::Int16(i) => rescale_decimal::( + *i as i32, + VariantDecimal4::MAX_PRECISION, + 0, + precision, + scale, + ), + Variant::Int32(i) => rescale_decimal::( + *i, + VariantDecimal4::MAX_PRECISION, + 0, + precision, + scale, + ), + Variant::Int64(i) => rescale_decimal::( + *i, + VariantDecimal8::MAX_PRECISION, + 0, + precision, + scale, + ), Variant::Decimal4(d) => rescale_decimal::( + d.integer(), VariantDecimal4::MAX_PRECISION, d.scale() as i8, precision, scale, - )(d.integer()), + ), Variant::Decimal8(d) => rescale_decimal::( + d.integer(), VariantDecimal8::MAX_PRECISION, d.scale() as i8, precision, scale, - )(d.integer()), + ), Variant::Decimal16(d) => rescale_decimal::( + d.integer(), VariantDecimal16::MAX_PRECISION, d.scale() as i8, precision, scale, - )(d.integer()), + ), _ => None, } } -/// Build a rescale function from (input_precision, input_scale) to (output_precision, output_scale) -/// returning a closure `Fn(I::Native) -> Option` that performs the conversion. +/// Rescale a decimal from (input_precision, input_scale) to (output_precision, output_scale) +/// and return the scaled value if it fits the output precision. pub(crate) fn rescale_decimal( + value: I::Native, input_precision: u8, input_scale: i8, output_precision: u8, output_scale: i8, -) -> impl Fn(I::Native) -> Option +) -> Option where I: DecimalType, O: DecimalType, @@ -286,73 +298,32 @@ where let is_infallible_cast = is_infallible_decimal_cast(input_precision, input_scale, output_precision, output_scale); - // Build a single mode once and use a thin closure that calls into it - enum RescaleMode { - SameScale, - Up { mul: O }, - Down { div: I, half: I, half_neg: I }, - Invalid, - } - - let mode = if delta_scale == 0 { - RescaleMode::SameScale + let scaled = if delta_scale == 0 { + O::Native::from_decimal(value) } else if delta_scale > 0 { - match O::Native::from_decimal(10_i128).and_then(|t| t.pow_checked(delta_scale as u32).ok()) - { - Some(mul) => RescaleMode::Up { mul }, - None => RescaleMode::Invalid, - } + let mul = O::Native::from_decimal(10_i128) + .and_then(|t| t.pow_checked(delta_scale as u32).ok())?; + O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok()) } else { - match I::Native::from_decimal(10_i128) - .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok()) - { - Some(div) => { - let half = div.div_wrapping(I::Native::from_usize(2).unwrap()); - let half_neg = half.neg_wrapping(); - RescaleMode::Down { - div, - half, - half_neg, - } - } - None => RescaleMode::Invalid, - } - }; - - let f = move |x: I::Native| { - match &mode { - RescaleMode::SameScale => O::Native::from_decimal(x), - RescaleMode::Up { mul } => { - O::Native::from_decimal(x).and_then(|x| x.mul_checked(*mul).ok()) - } - RescaleMode::Down { - div, - half, - half_neg, - } => { - // div is >= 10 and so this cannot overflow - let d = x.div_wrapping(*div); - let r = x.mod_wrapping(*div); - - // Round result - let adjusted = match x >= I::Native::ZERO { - true if r >= *half => d.add_wrapping(I::Native::ONE), - false if r <= *half_neg => d.sub_wrapping(I::Native::ONE), - _ => d, - }; - O::Native::from_decimal(adjusted) - } - RescaleMode::Invalid => None, - } + let div = I::Native::from_decimal(10_i128) + .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok())?; + let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE)); + let half_neg = half.neg_wrapping(); + + // div is >= 10 and so this cannot overflow + let d = value.div_wrapping(div); + let r = value.mod_wrapping(div); + + // Round result + let adjusted = match value >= I::Native::ZERO { + true if r >= half => d.add_wrapping(I::Native::ONE), + false if r <= half_neg => d.sub_wrapping(I::Native::ONE), + _ => d, + }; + O::Native::from_decimal(adjusted) }; - move |x| { - if is_infallible_cast { - f(x) - } else { - f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)) - } - } + scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v, output_precision)) } /// Returns true if casting from (input_precision, input_scale) to From 25e4aa9c553cd079ab2ebbff63f3477db2613676 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Fri, 10 Oct 2025 09:27:09 -0400 Subject: [PATCH 22/26] Fix large scale reduction case --- .../src/type_conversion.rs | 20 +++-- parquet-variant-compute/src/variant_get.rs | 90 +++++++++++++++++-- 2 files changed, 96 insertions(+), 14 deletions(-) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 3eea8f5374af..2542fdac1beb 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -19,8 +19,8 @@ use arrow::array::ArrowNativeTypeOp; use arrow::datatypes::{ - self, ArrowNativeType, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, - Decimal128Type, DecimalType, i256, + self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, + DecimalType, i256, }; use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; @@ -278,7 +278,8 @@ where } /// Rescale a decimal from (input_precision, input_scale) to (output_precision, output_scale) -/// and return the scaled value if it fits the output precision. +/// and return the scaled value if it fits the output precision. Similar to the implementation in +/// decimal.rs in arrow-cast. pub(crate) fn rescale_decimal( value: I::Native, input_precision: u8, @@ -305,8 +306,17 @@ where .and_then(|t| t.pow_checked(delta_scale as u32).ok())?; O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok()) } else { - let div = I::Native::from_decimal(10_i128) - .and_then(|t| t.pow_checked(delta_scale.unsigned_abs() as u32).ok())?; + // delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the + // scale change divides out more digits than the input has precision and the result of the cast + // is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest + // possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values + // (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even + // smaller results, which also round to zero. In that case, just return an array of zeros. + let delta_scale = delta_scale.unsigned_abs() as usize; + let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale) else { + return Some(O::Native::ZERO); + }; + let div = max.add_wrapping(I::Native::ONE); let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE)); let half_neg = half.neg_wrapping(); diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 06b605fd5f7c..ea7976a4c1c4 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2851,7 +2851,7 @@ mod test { #[test] fn get_decimal32_rescaled_to_scale2() { // Build unshredded variant values with different scales - let mut builder = crate::VariantArrayBuilder::new(4); + let mut builder = crate::VariantArrayBuilder::new(5); builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())); // 12.34 builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 3).unwrap())); // 1.234 builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 0).unwrap())); // 1234 @@ -2880,7 +2880,7 @@ mod test { #[test] fn get_decimal32_scale_down_rounding() { - let mut builder = crate::VariantArrayBuilder::new(2); + let mut builder = crate::VariantArrayBuilder::new(7); builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 0).unwrap())); @@ -2902,14 +2902,49 @@ mod test { assert_eq!(result.value(2), -124); assert_eq!(result.value(3), -125); assert_eq!(result.value(4), 1); + assert!(result.is_valid(5)); assert_eq!(result.value(5), 0); assert_eq!(result.value(6), 1); } + #[test] + fn get_decimal32_large_scale_reduction() { + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from( + VariantDecimal4::try_new(-(VariantDecimal4::MAX_UNSCALED_VALUE as i32), 0).unwrap(), + )); + builder.append_variant(Variant::from( + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal32(9, -9), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 9); + assert_eq!(result.scale(), -9); + assert_eq!(result.value(0), -1); + assert_eq!(result.value(1), 1); + + let field = Field::new("result", DataType::Decimal32(9, -10), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 9); + assert_eq!(result.scale(), -10); + assert!(result.is_valid(0)); + assert_eq!(result.value(0), 0); + assert!(result.is_valid(1)); + assert_eq!(result.value(1), 0); + } + #[test] fn get_decimal32_precision_overflow_safe() { // Exceed Decimal32 after scaling and rounding - let mut builder = crate::VariantArrayBuilder::new(1); + let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), )); @@ -2954,7 +2989,7 @@ mod test { #[test] fn get_decimal64_rescaled_to_scale2() { - let mut builder = crate::VariantArrayBuilder::new(4); + let mut builder = crate::VariantArrayBuilder::new(5); builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 2).unwrap())); // 12.34 builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 3).unwrap())); // 1.234 builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 0).unwrap())); // 1234 @@ -2984,7 +3019,7 @@ mod test { #[test] fn get_decimal64_scale_down_rounding() { - let mut builder = crate::VariantArrayBuilder::new(2); + let mut builder = crate::VariantArrayBuilder::new(7); builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal8::try_new(1245, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal8::try_new(-1235, 0).unwrap())); @@ -3006,14 +3041,49 @@ mod test { assert_eq!(result.value(2), -124); assert_eq!(result.value(3), -125); assert_eq!(result.value(4), 1); + assert!(result.is_valid(5)); assert_eq!(result.value(5), 0); assert_eq!(result.value(6), 1); } + #[test] + fn get_decimal64_large_scale_reduction() { + let mut builder = crate::VariantArrayBuilder::new(2); + builder.append_variant(Variant::from( + VariantDecimal8::try_new(-(VariantDecimal8::MAX_UNSCALED_VALUE as i64), 0).unwrap(), + )); + builder.append_variant(Variant::from( + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), + )); + let variant_array: ArrayRef = ArrayRef::from(builder.build()); + + let field = Field::new("result", DataType::Decimal64(18, -18), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 18); + assert_eq!(result.scale(), -18); + assert_eq!(result.value(0), -1); + assert_eq!(result.value(1), 1); + + let field = Field::new("result", DataType::Decimal64(18, -19), true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(result.precision(), 18); + assert_eq!(result.scale(), -19); + assert!(result.is_valid(0)); + assert_eq!(result.value(0), 0); + assert!(result.is_valid(1)); + assert_eq!(result.value(1), 0); + } + #[test] fn get_decimal64_precision_overflow_safe() { // Exceed Decimal64 after scaling and rounding - let mut builder = crate::VariantArrayBuilder::new(1); + let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), )); @@ -3080,7 +3150,7 @@ mod test { #[test] fn get_decimal128_scale_down_rounding() { - let mut builder = crate::VariantArrayBuilder::new(2); + let mut builder = crate::VariantArrayBuilder::new(7); builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 0).unwrap())); @@ -3102,6 +3172,7 @@ mod test { assert_eq!(result.value(2), -124); assert_eq!(result.value(3), -125); assert_eq!(result.value(4), 1); + assert!(result.is_valid(5)); assert_eq!(result.value(5), 0); assert_eq!(result.value(6), 1); } @@ -3109,7 +3180,7 @@ mod test { #[test] fn get_decimal128_precision_overflow_safe() { // Exceed Decimal128 after scaling and rounding - let mut builder = crate::VariantArrayBuilder::new(1); + let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), )); @@ -3175,7 +3246,7 @@ mod test { #[test] fn get_decimal256_scale_down_rounding() { - let mut builder = crate::VariantArrayBuilder::new(2); + let mut builder = crate::VariantArrayBuilder::new(7); builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 0).unwrap())); builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 0).unwrap())); @@ -3197,6 +3268,7 @@ mod test { assert_eq!(result.value(2), i256::from_i128(-124)); assert_eq!(result.value(3), i256::from_i128(-125)); assert_eq!(result.value(4), i256::from_i128(1)); + assert!(result.is_valid(5)); assert_eq!(result.value(5), i256::from_i128(0)); assert_eq!(result.value(6), i256::from_i128(1)); } From 539d73fa5021f7079bd20a3282e02f38c48850c9 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Fri, 10 Oct 2025 09:39:21 -0400 Subject: [PATCH 23/26] Reuse DecimalCast --- arrow-cast/src/cast/decimal.rs | 8 +- arrow-cast/src/cast/mod.rs | 2 + .../src/type_conversion.rs | 104 +----------------- .../src/variant_to_arrow.rs | 6 +- 4 files changed, 15 insertions(+), 105 deletions(-) diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 1fcae9f66aca..907e61b09f7b 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -19,17 +19,23 @@ use crate::cast::*; /// A utility trait that provides checked conversions between /// decimal types inspired by [`NumCast`] -pub(crate) trait DecimalCast: Sized { +pub trait DecimalCast: Sized { + /// Convert the decimal to an i32 fn to_i32(self) -> Option; + /// Convert the decimal to an i64 fn to_i64(self) -> Option; + /// Convert the decimal to an i128 fn to_i128(self) -> Option; + /// Convert the decimal to an i256 fn to_i256(self) -> Option; + /// Convert a decimal from a decimal fn from_decimal(n: T) -> Option; + /// Convert a decimal from a f64 fn from_f64(n: f64) -> Option; } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index aa9d4b021f5c..868d1294982d 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -67,6 +67,8 @@ use arrow_schema::*; use arrow_select::take::take; use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; +pub use decimal::DecimalCast; + /// CastOptions provides a way to override the default cast behaviors #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CastOptions<'a> { diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index 2542fdac1beb..5976464151c7 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -18,9 +18,10 @@ //! Module for transforming a typed arrow `Array` to `VariantArray`. use arrow::array::ArrowNativeTypeOp; +use arrow::compute::DecimalCast; use arrow::datatypes::{ self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type, - DecimalType, i256, + DecimalType, }; use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16}; @@ -113,107 +114,6 @@ impl_timestamp_from_variant!( |timestamp| Self::make_value(timestamp.naive_utc()) ); -/// A utility trait that provides checked conversions between decimal types -pub(crate) trait DecimalCast: Sized { - fn to_i32(self) -> Option; - - fn to_i64(self) -> Option; - - fn to_i128(self) -> Option; - - fn to_i256(self) -> Option; - - fn from_decimal(n: T) -> Option; -} - -impl DecimalCast for i32 { - fn to_i32(self) -> Option { - Some(self) - } - - fn to_i64(self) -> Option { - Some(self as i64) - } - - fn to_i128(self) -> Option { - Some(self as i128) - } - - fn to_i256(self) -> Option { - Some(i256::from_i128(self as i128)) - } - - fn from_decimal(n: T) -> Option { - n.to_i32() - } -} - -impl DecimalCast for i64 { - fn to_i32(self) -> Option { - i32::try_from(self).ok() - } - - fn to_i64(self) -> Option { - Some(self) - } - - fn to_i128(self) -> Option { - Some(self as i128) - } - - fn to_i256(self) -> Option { - Some(i256::from_i128(self as i128)) - } - - fn from_decimal(n: T) -> Option { - n.to_i64() - } -} - -impl DecimalCast for i128 { - fn to_i32(self) -> Option { - i32::try_from(self).ok() - } - - fn to_i64(self) -> Option { - i64::try_from(self).ok() - } - - fn to_i128(self) -> Option { - Some(self) - } - - fn to_i256(self) -> Option { - Some(i256::from_i128(self)) - } - - fn from_decimal(n: T) -> Option { - n.to_i128() - } -} - -impl DecimalCast for i256 { - fn to_i32(self) -> Option { - self.to_i128().map(|x| i32::try_from(x).ok())? - } - - fn to_i64(self) -> Option { - self.to_i128().map(|x| i64::try_from(x).ok())? - } - - fn to_i128(self) -> Option { - self.to_i128() - } - - fn to_i256(self) -> Option { - Some(self) - } - - fn from_decimal(n: T) -> Option { - n.to_i256() - } -} - pub(crate) fn variant_to_unscaled_decimal( variant: &Variant<'_, '_>, precision: u8, diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs index 2b0effd9a9d0..f2ac4a722fff 100644 --- a/parquet-variant-compute/src/variant_to_arrow.rs +++ b/parquet-variant-compute/src/variant_to_arrow.rs @@ -18,12 +18,14 @@ use arrow::array::{ ArrayRef, BinaryViewArray, BooleanBuilder, NullBufferBuilder, PrimitiveBuilder, }; -use arrow::compute::CastOptions; +use arrow::compute::{CastOptions, DecimalCast}; use arrow::datatypes::{self, ArrowPrimitiveType, DataType, DecimalType}; use arrow::error::{ArrowError, Result}; use parquet_variant::{Variant, VariantPath}; -use crate::type_conversion::{PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal, DecimalCast}; +use crate::type_conversion::{ + PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal, +}; use crate::{VariantArray, VariantValueArrayBuilder}; use arrow_schema::TimeUnit; From cfc8580ff5b58dbd34e9c755eb28906e4bc51108 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Tue, 14 Oct 2025 20:03:08 -0400 Subject: [PATCH 24/26] Use trait VariantDecimalType --- parquet-variant-compute/src/variant_get.rs | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index ea7976a4c1c4..a840ca680a3a 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -313,7 +313,7 @@ mod test { use chrono::DateTime; use parquet_variant::{ EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, - VariantPath, + VariantDecimalType, VariantPath, }; fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) { @@ -2874,7 +2874,7 @@ mod test { assert!(result.is_null(3)); assert_eq!( result.value(4), - (VariantDecimal4::MAX_UNSCALED_VALUE as i32) / 10 + 1 + VariantDecimal4::MAX_UNSCALED_VALUE / 10 + 1 ); // should not be null as the final result fits into Decimal32 } @@ -2911,10 +2911,10 @@ mod test { fn get_decimal32_large_scale_reduction() { let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal4::try_new(-(VariantDecimal4::MAX_UNSCALED_VALUE as i32), 0).unwrap(), + VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2946,10 +2946,10 @@ mod test { // Exceed Decimal32 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 9).unwrap(), + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9).unwrap(), )); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -2966,7 +2966,7 @@ mod test { fn get_decimal32_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as i32, 0).unwrap(), + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3013,7 +3013,7 @@ mod test { assert!(result.is_null(3)); assert_eq!( result.value(4), - (VariantDecimal8::MAX_UNSCALED_VALUE as i64) / 10 + 1 + VariantDecimal8::MAX_UNSCALED_VALUE / 10 + 1 ); // should not be null as the final result fits into Decimal64 } @@ -3050,10 +3050,10 @@ mod test { fn get_decimal64_large_scale_reduction() { let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal8::try_new(-(VariantDecimal8::MAX_UNSCALED_VALUE as i64), 0).unwrap(), + VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3085,10 +3085,10 @@ mod test { // Exceed Decimal64 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 18).unwrap(), + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18).unwrap(), )); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3105,7 +3105,7 @@ mod test { fn get_decimal64_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE as i64, 0).unwrap(), + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3182,10 +3182,10 @@ mod test { // Exceed Decimal128 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 38).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38).unwrap(), )); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3202,7 +3202,7 @@ mod test { fn get_decimal128_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3278,10 +3278,10 @@ mod test { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 1).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3294,7 +3294,7 @@ mod test { // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38 let base = i256::from_i128(10); let factor = base.checked_pow(38).unwrap(); - let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE as i128) + let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE) .checked_mul(factor) .unwrap(); assert_eq!(result.value(0), expected); @@ -3306,10 +3306,10 @@ mod test { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(2); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 1).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1).unwrap(), )); builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE as i128, 0).unwrap(), + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), )); let variant_array: ArrayRef = ArrayRef::from(builder.build()); From 9ed0d7a702a7cd2fa4445f51e50ab888b58b0642 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Tue, 14 Oct 2025 20:36:34 -0400 Subject: [PATCH 25/26] Add doc --- parquet-variant-compute/src/type_conversion.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs index c151ab387e5f..83ffc8f08dc3 100644 --- a/parquet-variant-compute/src/type_conversion.rs +++ b/parquet-variant-compute/src/type_conversion.rs @@ -114,6 +114,15 @@ impl_timestamp_from_variant!( |timestamp| Self::make_value(timestamp.naive_utc()) ); +/// Returns the unscaled integer representation for Arrow decimal type `O` +/// from a `Variant`. +/// +/// - `precision` and `scale` specify the target Arrow decimal parameters +/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0 +/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale +/// +/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and +/// returns `None` if it cannot fit the requested precision. pub(crate) fn variant_to_unscaled_decimal( variant: &Variant<'_, '_>, precision: u8, From 0567cb66e60b786683379819184ec07f3822be09 Mon Sep 17 00:00:00 2001 From: Liam Bao Date: Tue, 14 Oct 2025 22:24:04 -0400 Subject: [PATCH 26/26] Refactor tests to use `into` --- parquet-variant-compute/src/variant_get.rs | 231 ++++++++++++--------- 1 file changed, 134 insertions(+), 97 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index a840ca680a3a..fda41cc84a35 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -2852,13 +2852,15 @@ mod test { fn get_decimal32_rescaled_to_scale2() { // Build unshredded variant values with different scales let mut builder = crate::VariantArrayBuilder::new(5); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())); // 12.34 - builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 3).unwrap())); // 1.234 - builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 0).unwrap())); // 1234 + builder.append_variant(VariantDecimal4::try_new(1234, 2).unwrap().into()); // 12.34 + builder.append_variant(VariantDecimal4::try_new(1234, 3).unwrap().into()); // 1.234 + builder.append_variant(VariantDecimal4::try_new(1234, 0).unwrap().into()); // 1234 builder.append_null(); - builder.append_variant(Variant::from( - VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3).unwrap(), - )); // should fit into Decimal32 + builder.append_variant( + VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3) + .unwrap() + .into(), + ); // should fit into Decimal32 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, 2), true); @@ -2881,13 +2883,13 @@ mod test { #[test] fn get_decimal32_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(7); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 - builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 - builder.append_variant(Variant::from(VariantDecimal4::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 + builder.append_variant(VariantDecimal4::try_new(1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal4::try_new(1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal4::try_new(-1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal4::try_new(-1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal4::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(VariantDecimal4::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(VariantDecimal4::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, -1), true); @@ -2910,12 +2912,16 @@ mod test { #[test] fn get_decimal32_large_scale_reduction() { let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, -9), true); @@ -2945,12 +2951,16 @@ mod test { fn get_decimal32_precision_overflow_safe() { // Exceed Decimal32 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9).unwrap(), - )); // integer value round up overflows + builder.append_variant( + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9) + .unwrap() + .into(), + ); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(2, 2), true); @@ -2965,9 +2975,11 @@ mod test { #[test] fn get_decimal32_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); - builder.append_variant(Variant::from( - VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal32(9, 2), true); @@ -2990,14 +3002,15 @@ mod test { #[test] fn get_decimal64_rescaled_to_scale2() { let mut builder = crate::VariantArrayBuilder::new(5); - builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 2).unwrap())); // 12.34 - builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 3).unwrap())); // 1.234 - builder.append_variant(Variant::from(VariantDecimal8::try_new(1234, 0).unwrap())); // 1234 + builder.append_variant(VariantDecimal8::try_new(1234, 2).unwrap().into()); // 12.34 + builder.append_variant(VariantDecimal8::try_new(1234, 3).unwrap().into()); // 1.234 + builder.append_variant(VariantDecimal8::try_new(1234, 0).unwrap().into()); // 1234 builder.append_null(); - builder.append_variant(Variant::from( + builder.append_variant( VariantDecimal16::try_new((VariantDecimal8::MAX_UNSCALED_VALUE as i128) + 1, 3) - .unwrap(), - )); // should fit into Decimal64 + .unwrap() + .into(), + ); // should fit into Decimal64 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal64(18, 2), true); @@ -3020,13 +3033,13 @@ mod test { #[test] fn get_decimal64_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(7); - builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(-1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(-1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 - builder.append_variant(Variant::from(VariantDecimal8::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 - builder.append_variant(Variant::from(VariantDecimal8::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 + builder.append_variant(VariantDecimal8::try_new(1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal8::try_new(1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal8::try_new(-1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal8::try_new(-1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal8::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(VariantDecimal8::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(VariantDecimal8::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal64(18, -1), true); @@ -3049,12 +3062,16 @@ mod test { #[test] fn get_decimal64_large_scale_reduction() { let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal64(18, -18), true); @@ -3084,12 +3101,16 @@ mod test { fn get_decimal64_precision_overflow_safe() { // Exceed Decimal64 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18).unwrap(), - )); // integer value round up overflows + builder.append_variant( + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18) + .unwrap() + .into(), + ); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal64(2, 2), true); @@ -3104,9 +3125,11 @@ mod test { #[test] fn get_decimal64_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); - builder.append_variant(Variant::from( - VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal64(18, 2), true); @@ -3129,9 +3152,9 @@ mod test { #[test] fn get_decimal128_rescaled_to_scale2() { let mut builder = crate::VariantArrayBuilder::new(4); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 2).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 3).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 0).unwrap())); + builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into()); builder.append_null(); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3151,13 +3174,13 @@ mod test { #[test] fn get_decimal128_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(7); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 - builder.append_variant(Variant::from(VariantDecimal16::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 + builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal128(38, -1), true); @@ -3181,12 +3204,16 @@ mod test { fn get_decimal128_precision_overflow_safe() { // Exceed Decimal128 after scaling and rounding let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38).unwrap(), - )); // integer value round up overflows + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38) + .unwrap() + .into(), + ); // integer value round up overflows let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal128(2, 2), true); @@ -3201,9 +3228,11 @@ mod test { #[test] fn get_decimal128_precision_overflow_unsafe_errors() { let mut builder = crate::VariantArrayBuilder::new(1); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal128(38, 2), true); @@ -3225,9 +3254,9 @@ mod test { fn get_decimal256_rescaled_to_scale2() { // Build unshredded variant values with different scales using Decimal16 source let mut builder = crate::VariantArrayBuilder::new(4); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 2).unwrap())); // 12.34 - builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 3).unwrap())); // 1.234 - builder.append_variant(Variant::from(VariantDecimal16::try_new(1234, 0).unwrap())); // 1234 + builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into()); // 12.34 + builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into()); // 1.234 + builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into()); // 1234 builder.append_null(); let variant_array: ArrayRef = ArrayRef::from(builder.build()); @@ -3247,13 +3276,13 @@ mod test { #[test] fn get_decimal256_scale_down_rounding() { let mut builder = crate::VariantArrayBuilder::new(7); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1235, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(-1245, 0).unwrap())); - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 2).unwrap())); // 12.35 rounded down to 10 for scale -1 - builder.append_variant(Variant::from(VariantDecimal16::try_new(1235, 3).unwrap())); // 1.235 rounded down to 0 for scale -1 - builder.append_variant(Variant::from(VariantDecimal16::try_new(5235, 3).unwrap())); // 5.235 rounded up to 10 for scale -1 + builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into()); + builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1 + builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1 + builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1 let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal256(76, -1), true); @@ -3277,12 +3306,16 @@ mod test { fn get_decimal256_precision_overflow_safe() { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal256(76, 39), true); @@ -3305,12 +3338,16 @@ mod test { fn get_decimal256_precision_overflow_unsafe_errors() { // Exceed Decimal128 max precision (38) after scaling let mut builder = crate::VariantArrayBuilder::new(2); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1).unwrap(), - )); - builder.append_variant(Variant::from( - VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0).unwrap(), - )); + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1) + .unwrap() + .into(), + ); + builder.append_variant( + VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0) + .unwrap() + .into(), + ); let variant_array: ArrayRef = ArrayRef::from(builder.build()); let field = Field::new("result", DataType::Decimal256(76, 39), true);