From 34e983d5b7e4ca3f0ee67360ff74d3d5ebe9e8a7 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 30 Jun 2025 15:01:22 -0700 Subject: [PATCH 01/11] v1 data files should have a defualt DATA content type --- crates/iceberg/src/spec/manifest/entry.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index 7d2f982d0d..cc0c1dce22 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -563,6 +563,16 @@ pub(super) fn manifest_schema_v2(partition_type: &StructType) -> Result Vec { vec![ + // Content is always 1. + Arc::new(NestedField::builder() + .id(134) + .name("content") + .required(false) + .field_type(Type::Primitive(PrimitiveType::Int)) + .initial_default(Some(serde_json::Value::Number(1.into()))) + .write_default(Some(serde_json::Value::Number(1.into()))) + .build() + ), FILE_PATH.clone(), FILE_FORMAT.clone(), Arc::new(NestedField::required( From 4f595250b7a7d84449b7e13bce47dd4f1c600ed2 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 14:54:14 -0700 Subject: [PATCH 02/11] Add test and change initial_default --- crates/iceberg/src/spec/manifest/_serde.rs | 71 ++++++++++++++++++++++ crates/iceberg/src/spec/manifest/entry.rs | 15 +---- 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 97923c7a86..7629a2f899 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -398,4 +398,75 @@ mod tests { assert_eq!(data_files, actual_data_file); } + + #[tokio::test] + async fn test_data_file_serialize_deserialize_v1() { + let schema = Arc::new( + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "v1", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 2, + "v2", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 3, + "v3", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + ); + let data_files = vec![DataFile { + content: DataContentType::Data, + file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 1, + file_size_in_bytes: 875, + column_sizes: HashMap::from([(1,47),(2,48),(3,52)]), + value_counts: HashMap::from([(1,1),(2,1),(3,1)]), + null_value_counts: HashMap::from([(1,0),(2,0),(3,0)]), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), + upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), + key_metadata: None, + split_offsets: vec![4], + equality_ids: vec![], + sort_order_id: Some(0), + partition_spec_id: 0, + first_row_id: None, + referenced_data_file: None, + content_offset: None, + content_size_in_bytes: None, + }]; + + let mut buffer = Vec::new(); + let _ = write_data_files_to_avro( + &mut buffer, + data_files.clone().into_iter(), + &StructType::new(vec![]), + FormatVersion::V1, + ) + .unwrap(); + + let actual_data_file = read_data_files_from_avro( + &mut Cursor::new(buffer), + &schema, + 0, + &StructType::new(vec![]), + FormatVersion::V1, + ) + .unwrap(); + + assert_eq!(actual_data_file[0].content, DataContentType::Data) + + + } } diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index cc0c1dce22..c01c768783 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -24,8 +24,7 @@ use typed_builder::TypedBuilder; use crate::avro::schema_to_avro_schema; use crate::error::Result; use crate::spec::{ - DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, ManifestFile, MapType, - NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type, + DataContentType, DataFile, ListType, Literal, ManifestFile, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, StructType, Type, INITIAL_SEQUENCE_NUMBER }; use crate::{Error, ErrorKind}; @@ -236,7 +235,7 @@ static CONTENT: Lazy = { 134, "content", Type::Primitive(PrimitiveType::Int), - )) + ).with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(1)))) }) }; @@ -563,16 +562,6 @@ pub(super) fn manifest_schema_v2(partition_type: &StructType) -> Result Vec { vec![ - // Content is always 1. - Arc::new(NestedField::builder() - .id(134) - .name("content") - .required(false) - .field_type(Type::Primitive(PrimitiveType::Int)) - .initial_default(Some(serde_json::Value::Number(1.into()))) - .write_default(Some(serde_json::Value::Number(1.into()))) - .build() - ), FILE_PATH.clone(), FILE_FORMAT.clone(), Arc::new(NestedField::required( From 2ce38f6bd952363ec70e62562a452e0c4981db41 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 14:58:06 -0700 Subject: [PATCH 03/11] fmt changes --- crates/iceberg/src/spec/manifest/_serde.rs | 12 +++++------- crates/iceberg/src/spec/manifest/entry.rs | 12 ++++++------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 7629a2f899..ac5dfc146c 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -319,10 +319,10 @@ mod tests { #[test] fn test_parse_negative_manifest_entry() { - let entries = vec![I64Entry { key: 1, value: -1 }, I64Entry { - key: 2, - value: 3, - }]; + let entries = vec![ + I64Entry { key: 1, value: -1 }, + I64Entry { key: 2, value: 3 }, + ]; let ret = parse_i64_entry(entries).unwrap(); @@ -465,8 +465,6 @@ mod tests { ) .unwrap(); - assert_eq!(actual_data_file[0].content, DataContentType::Data) - - + assert_eq!(actual_data_file[0].content, DataContentType::Data) } } diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index c01c768783..69b6bf5954 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -24,7 +24,8 @@ use typed_builder::TypedBuilder; use crate::avro::schema_to_avro_schema; use crate::error::Result; use crate::spec::{ - DataContentType, DataFile, ListType, Literal, ManifestFile, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, StructType, Type, INITIAL_SEQUENCE_NUMBER + DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, Literal, ManifestFile, MapType, + NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, StructType, Type, }; use crate::{Error, ErrorKind}; @@ -231,11 +232,10 @@ static FILE_SEQUENCE_NUMBER: Lazy = { static CONTENT: Lazy = { Lazy::new(|| { - Arc::new(NestedField::required( - 134, - "content", - Type::Primitive(PrimitiveType::Int), - ).with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(1)))) + Arc::new( + NestedField::required(134, "content", Type::Primitive(PrimitiveType::Int)) + .with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(1))), + ) }) }; From 99e8c2003467fd5bba8403f780eb01c5639dcfb2 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 15:58:29 -0700 Subject: [PATCH 04/11] added initial default logic --- crates/iceberg/src/avro/schema.rs | 40 +++++++++++++++++++++- crates/iceberg/src/spec/manifest/_serde.rs | 10 ++++-- crates/iceberg/src/spec/manifest/entry.rs | 2 +- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 8c3d07a590..38a5f7e7ff 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -43,6 +43,41 @@ const MAP_LOGICAL_TYPE: &str = "map"; // This const may better to maintain in avro-rs. const LOGICAL_TYPE: &str = "logicalType"; +fn literal_to_json(literal: &crate::spec::Literal) -> Result { + match literal { + crate::spec::Literal::Primitive(p) => match p { + crate::spec::PrimitiveLiteral::Boolean(b) => Ok(Value::Bool(*b)), + crate::spec::PrimitiveLiteral::Int(i) => Ok(Value::Number(Number::from(*i))), + crate::spec::PrimitiveLiteral::Long(l) => Ok(Value::Number(Number::from(*l))), + crate::spec::PrimitiveLiteral::Float(f) => Ok(Value::Number( + Number::from_f64(f.0 as f64).ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Failed to convert float to json number", + ) + })?, + )), + crate::spec::PrimitiveLiteral::Double(d) => Ok(Value::Number( + Number::from_f64(d.0).ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Failed to convert double to json number", + ) + })?, + )), + crate::spec::PrimitiveLiteral::String(s) => Ok(Value::String(s.clone())), + _ => Err(Error::new( + ErrorKind::FeatureUnsupported, + "Unsupported literal type to convert to json", + )), + }, + _ => Err(Error::new( + ErrorKind::FeatureUnsupported, + "Unsupported literal type to convert to json", + )), + } +} + struct SchemaToAvroSchema { schema: String, } @@ -92,9 +127,12 @@ impl SchemaVisitor for SchemaToAvroSchema { custom_attributes: Default::default(), }; - if !field.required { + if let Some(default) = &field.initial_default { + avro_record_field.default = Some(literal_to_json(default)?); + } else if !field.required { avro_record_field.default = Some(Value::Null); } + avro_record_field.custom_attributes.insert( FILED_ID_PROP.to_string(), Value::Number(Number::from(field.id)), diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index ac5dfc146c..aff54f070b 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -99,7 +99,7 @@ impl ManifestEntryV1 { #[serde_as] #[derive(Serialize, Deserialize)] pub(super) struct DataFileSerde { - #[serde(default)] + #[serde(default = "default_to_zero")] content: i32, file_path: String, file_format: String, @@ -125,6 +125,10 @@ pub(super) struct DataFileSerde { content_size_in_bytes: Option, } +fn default_to_zero() -> i32 { + 0 +} + impl DataFileSerde { pub fn try_from( value: super::DataFile, @@ -400,7 +404,7 @@ mod tests { } #[tokio::test] - async fn test_data_file_serialize_deserialize_v1() { + async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() { let schema = Arc::new( Schema::builder() .with_fields(vec![ @@ -461,7 +465,7 @@ mod tests { &schema, 0, &StructType::new(vec![]), - FormatVersion::V1, + FormatVersion::V2, ) .unwrap(); diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index 69b6bf5954..3281d5fa0c 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -234,7 +234,7 @@ static CONTENT: Lazy = { Lazy::new(|| { Arc::new( NestedField::required(134, "content", Type::Primitive(PrimitiveType::Int)) - .with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(1))), + .with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(0))), ) }) }; From 1eff2ffd33e54c071f1c376303de98be30f6c29f Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 16:15:43 -0700 Subject: [PATCH 05/11] fmt --- crates/iceberg/src/avro/schema.rs | 8 ++++---- crates/iceberg/src/spec/manifest/_serde.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 38a5f7e7ff..571eef19b1 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -57,14 +57,14 @@ fn literal_to_json(literal: &crate::spec::Literal) -> Result { ) })?, )), - crate::spec::PrimitiveLiteral::Double(d) => Ok(Value::Number( - Number::from_f64(d.0).ok_or_else(|| { + crate::spec::PrimitiveLiteral::Double(d) => { + Ok(Value::Number(Number::from_f64(d.0).ok_or_else(|| { Error::new( ErrorKind::DataInvalid, "Failed to convert double to json number", ) - })?, - )), + })?)) + } crate::spec::PrimitiveLiteral::String(s) => Ok(Value::String(s.clone())), _ => Err(Error::new( ErrorKind::FeatureUnsupported, diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index aff54f070b..63f9103b97 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -323,10 +323,10 @@ mod tests { #[test] fn test_parse_negative_manifest_entry() { - let entries = vec![ - I64Entry { key: 1, value: -1 }, - I64Entry { key: 2, value: 3 }, - ]; + let entries = vec![I64Entry { key: 1, value: -1 }, I64Entry { + key: 2, + value: 3, + }]; let ret = parse_i64_entry(entries).unwrap(); From 93e8fc46c63c2f06430832874d06e828c6bd633c Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 16:17:30 -0700 Subject: [PATCH 06/11] can remove this part --- crates/iceberg/src/spec/manifest/_serde.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 63f9103b97..3321193977 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -99,7 +99,7 @@ impl ManifestEntryV1 { #[serde_as] #[derive(Serialize, Deserialize)] pub(super) struct DataFileSerde { - #[serde(default = "default_to_zero")] + #[serde(default)] content: i32, file_path: String, file_format: String, @@ -125,10 +125,6 @@ pub(super) struct DataFileSerde { content_size_in_bytes: Option, } -fn default_to_zero() -> i32 { - 0 -} - impl DataFileSerde { pub fn try_from( value: super::DataFile, From 09bd23638fcea789ef9e6f130767f971424b79bf Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 7 Jul 2025 11:39:17 -0700 Subject: [PATCH 07/11] first set of changes --- crates/iceberg/src/avro/schema.rs | 16 +++--- crates/iceberg/src/spec/manifest/_serde.rs | 67 ++++++---------------- crates/iceberg/src/spec/manifest/entry.rs | 1 + 3 files changed, 26 insertions(+), 58 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 571eef19b1..ac47791811 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -116,6 +116,14 @@ impl SchemaVisitor for SchemaToAvroSchema { field_schema = avro_optional(field_schema)?; } + let default = if let Some(default) = &field.initial_default { + Some(literal_to_json(default)?) + } else if !field.required { + Some(Value::Null) + } else { + None + }; + let mut avro_record_field = AvroRecordField { name: field.name.clone(), schema: field_schema, @@ -123,16 +131,10 @@ impl SchemaVisitor for SchemaToAvroSchema { position: 0, doc: field.doc.clone(), aliases: None, - default: None, + default: default, custom_attributes: Default::default(), }; - if let Some(default) = &field.initial_default { - avro_record_field.default = Some(literal_to_json(default)?); - } else if !field.required { - avro_record_field.default = Some(Value::Null); - } - avro_record_field.custom_attributes.insert( FILED_ID_PROP.to_string(), Value::Number(Number::from(field.id)), diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 3321193977..fd7bc2e69a 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -330,9 +330,8 @@ mod tests { assert_eq!(ret, expected_ret, "Negative i64 entry should be ignored!"); } - #[tokio::test] - async fn test_data_file_serialize_deserialize() { - let schema = Arc::new( + fn schema() -> Arc { + Arc::new( Schema::builder() .with_fields(vec![ Arc::new(NestedField::optional( @@ -353,8 +352,11 @@ mod tests { ]) .build() .unwrap(), - ); - let data_files = vec![DataFile { + ) + } + + fn data_files() -> Vec { + vec![DataFile { content: DataContentType::Data, file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(), file_format: DataFileFormat::Parquet, @@ -376,7 +378,13 @@ mod tests { referenced_data_file: None, content_offset: None, content_size_in_bytes: None, - }]; + }] + } + + #[tokio::test] + async fn test_data_file_serialize_deserialize() { + let schema = schema(); + let data_files = data_files(); let mut buffer = Vec::new(); let _ = write_data_files_to_avro( @@ -401,51 +409,8 @@ mod tests { #[tokio::test] async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() { - let schema = Arc::new( - Schema::builder() - .with_fields(vec![ - Arc::new(NestedField::optional( - 1, - "v1", - Type::Primitive(PrimitiveType::Int), - )), - Arc::new(NestedField::optional( - 2, - "v2", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 3, - "v3", - Type::Primitive(PrimitiveType::String), - )), - ]) - .build() - .unwrap(), - ); - let data_files = vec![DataFile { - content: DataContentType::Data, - file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(), - file_format: DataFileFormat::Parquet, - partition: Struct::empty(), - record_count: 1, - file_size_in_bytes: 875, - column_sizes: HashMap::from([(1,47),(2,48),(3,52)]), - value_counts: HashMap::from([(1,1),(2,1),(3,1)]), - null_value_counts: HashMap::from([(1,0),(2,0),(3,0)]), - nan_value_counts: HashMap::new(), - lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), - upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), - key_metadata: None, - split_offsets: vec![4], - equality_ids: vec![], - sort_order_id: Some(0), - partition_spec_id: 0, - first_row_id: None, - referenced_data_file: None, - content_offset: None, - content_size_in_bytes: None, - }]; + let schema = schema(); + let data_files = data_files(); let mut buffer = Vec::new(); let _ = write_data_files_to_avro( diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index 3281d5fa0c..7ba9efb3b9 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -234,6 +234,7 @@ static CONTENT: Lazy = { Lazy::new(|| { Arc::new( NestedField::required(134, "content", Type::Primitive(PrimitiveType::Int)) + // 0 refers to DataContentType::DATA .with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(0))), ) }) From 68bd35d75e3ddb42bff845dee14665bbf5d7d4a7 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 7 Jul 2025 12:18:06 -0700 Subject: [PATCH 08/11] new attempt to serialize --- crates/iceberg/src/avro/schema.rs | 50 ++++++++----------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index ac47791811..c539ccddd8 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -16,6 +16,7 @@ // under the License. //! Conversion between iceberg and avro schema. +use std::any::Any; use std::collections::BTreeMap; use apache_avro::Schema as AvroSchema; @@ -28,7 +29,7 @@ use serde_json::{Number, Value}; use crate::spec::{ ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor, - StructType, Type, visit_schema, + StructType, Type, visit_schema, Datum, RawLiteral, }; use crate::{Error, ErrorKind, Result, ensure_data_valid}; @@ -43,40 +44,7 @@ const MAP_LOGICAL_TYPE: &str = "map"; // This const may better to maintain in avro-rs. const LOGICAL_TYPE: &str = "logicalType"; -fn literal_to_json(literal: &crate::spec::Literal) -> Result { - match literal { - crate::spec::Literal::Primitive(p) => match p { - crate::spec::PrimitiveLiteral::Boolean(b) => Ok(Value::Bool(*b)), - crate::spec::PrimitiveLiteral::Int(i) => Ok(Value::Number(Number::from(*i))), - crate::spec::PrimitiveLiteral::Long(l) => Ok(Value::Number(Number::from(*l))), - crate::spec::PrimitiveLiteral::Float(f) => Ok(Value::Number( - Number::from_f64(f.0 as f64).ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "Failed to convert float to json number", - ) - })?, - )), - crate::spec::PrimitiveLiteral::Double(d) => { - Ok(Value::Number(Number::from_f64(d.0).ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "Failed to convert double to json number", - ) - })?)) - } - crate::spec::PrimitiveLiteral::String(s) => Ok(Value::String(s.clone())), - _ => Err(Error::new( - ErrorKind::FeatureUnsupported, - "Unsupported literal type to convert to json", - )), - }, - _ => Err(Error::new( - ErrorKind::FeatureUnsupported, - "Unsupported literal type to convert to json", - )), - } -} + struct SchemaToAvroSchema { schema: String, @@ -116,8 +84,16 @@ impl SchemaVisitor for SchemaToAvroSchema { field_schema = avro_optional(field_schema)?; } - let default = if let Some(default) = &field.initial_default { - Some(literal_to_json(default)?) + let default = if let Some(literal) = &field.initial_default { + let raw_literal = RawLiteral::try_from(literal.clone(), &field.field_type)?; + let json_value = serde_json::to_value(raw_literal).map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + "Failed to serialize default value to json", + ) + .with_source(e) + })?; + Some(json_value) } else if !field.required { Some(Value::Null) } else { From 9f9590a5090d4e14d5149b999ca301c6e911972a Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 7 Jul 2025 15:31:06 -0700 Subject: [PATCH 09/11] formatting --- crates/iceberg/src/avro/schema.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index c539ccddd8..c5669e5ed4 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -28,8 +28,8 @@ use itertools::{Either, Itertools}; use serde_json::{Number, Value}; use crate::spec::{ - ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor, - StructType, Type, visit_schema, Datum, RawLiteral, + Datum, ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, RawLiteral, Schema, + SchemaVisitor, StructType, Type, visit_schema, }; use crate::{Error, ErrorKind, Result, ensure_data_valid}; @@ -44,8 +44,6 @@ const MAP_LOGICAL_TYPE: &str = "map"; // This const may better to maintain in avro-rs. const LOGICAL_TYPE: &str = "logicalType"; - - struct SchemaToAvroSchema { schema: String, } From c7b5e9ba51d3800034620b7ecd3832dca8477a5c Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 7 Jul 2025 16:41:42 -0700 Subject: [PATCH 10/11] clippy --- crates/iceberg/src/avro/schema.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index c5669e5ed4..6b26ba9944 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -16,7 +16,6 @@ // under the License. //! Conversion between iceberg and avro schema. -use std::any::Any; use std::collections::BTreeMap; use apache_avro::Schema as AvroSchema; @@ -28,7 +27,7 @@ use itertools::{Either, Itertools}; use serde_json::{Number, Value}; use crate::spec::{ - Datum, ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, RawLiteral, Schema, + ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, RawLiteral, Schema, SchemaVisitor, StructType, Type, visit_schema, }; use crate::{Error, ErrorKind, Result, ensure_data_valid}; @@ -105,7 +104,7 @@ impl SchemaVisitor for SchemaToAvroSchema { position: 0, doc: field.doc.clone(), aliases: None, - default: default, + default, custom_attributes: Default::default(), }; From 25dc31e2fe27193da25102a87e2510c228b09b11 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Tue, 8 Jul 2025 16:55:55 -0700 Subject: [PATCH 11/11] change method --- crates/iceberg/src/avro/schema.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 6b26ba9944..e53f286792 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -27,8 +27,8 @@ use itertools::{Either, Itertools}; use serde_json::{Number, Value}; use crate::spec::{ - ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, RawLiteral, Schema, - SchemaVisitor, StructType, Type, visit_schema, + ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor, + StructType, Type, visit_schema, }; use crate::{Error, ErrorKind, Result, ensure_data_valid}; @@ -82,15 +82,7 @@ impl SchemaVisitor for SchemaToAvroSchema { } let default = if let Some(literal) = &field.initial_default { - let raw_literal = RawLiteral::try_from(literal.clone(), &field.field_type)?; - let json_value = serde_json::to_value(raw_literal).map_err(|e| { - Error::new( - ErrorKind::DataInvalid, - "Failed to serialize default value to json", - ) - .with_source(e) - })?; - Some(json_value) + Some(literal.clone().try_into_json(&field.field_type)?) } else if !field.required { Some(Value::Null) } else {