diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 8c3d07a590..e53f286792 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -81,6 +81,14 @@ impl SchemaVisitor for SchemaToAvroSchema { field_schema = avro_optional(field_schema)?; } + let default = if let Some(literal) = &field.initial_default { + Some(literal.clone().try_into_json(&field.field_type)?) + } else if !field.required { + Some(Value::Null) + } else { + None + }; + let mut avro_record_field = AvroRecordField { name: field.name.clone(), schema: field_schema, @@ -88,13 +96,10 @@ impl SchemaVisitor for SchemaToAvroSchema { position: 0, doc: field.doc.clone(), aliases: None, - default: None, + default, custom_attributes: Default::default(), }; - if !field.required { - avro_record_field.default = Some(Value::Null); - } avro_record_field.custom_attributes.insert( FILED_ID_PROP.to_string(), Value::Number(Number::from(field.id)), diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 97923c7a86..fd7bc2e69a 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -330,9 +330,8 @@ mod tests { assert_eq!(ret, expected_ret, "Negative i64 entry should be ignored!"); } - #[tokio::test] - async fn test_data_file_serialize_deserialize() { - let schema = Arc::new( + fn schema() -> Arc { + Arc::new( Schema::builder() .with_fields(vec![ Arc::new(NestedField::optional( @@ -353,8 +352,11 @@ mod tests { ]) .build() .unwrap(), - ); - let data_files = vec![DataFile { + ) + } + + fn data_files() -> Vec { + vec![DataFile { content: DataContentType::Data, file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(), file_format: DataFileFormat::Parquet, @@ -376,7 +378,13 @@ mod tests { referenced_data_file: None, content_offset: None, content_size_in_bytes: None, - }]; + }] + } + + #[tokio::test] + async fn test_data_file_serialize_deserialize() { + let schema = schema(); + let data_files = data_files(); let mut buffer = Vec::new(); let _ = write_data_files_to_avro( @@ -398,4 +406,30 @@ mod tests { assert_eq!(data_files, actual_data_file); } + + #[tokio::test] + async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() { + let schema = schema(); + let data_files = data_files(); + + let mut buffer = Vec::new(); + let _ = write_data_files_to_avro( + &mut buffer, + data_files.clone().into_iter(), + &StructType::new(vec![]), + FormatVersion::V1, + ) + .unwrap(); + + let actual_data_file = read_data_files_from_avro( + &mut Cursor::new(buffer), + &schema, + 0, + &StructType::new(vec![]), + FormatVersion::V2, + ) + .unwrap(); + + assert_eq!(actual_data_file[0].content, DataContentType::Data) + } } diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index 7d2f982d0d..7ba9efb3b9 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -24,8 +24,8 @@ use typed_builder::TypedBuilder; use crate::avro::schema_to_avro_schema; use crate::error::Result; use crate::spec::{ - DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, ManifestFile, MapType, - NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type, + DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, Literal, ManifestFile, MapType, + NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, StructType, Type, }; use crate::{Error, ErrorKind}; @@ -232,11 +232,11 @@ static FILE_SEQUENCE_NUMBER: Lazy = { static CONTENT: Lazy = { Lazy::new(|| { - Arc::new(NestedField::required( - 134, - "content", - Type::Primitive(PrimitiveType::Int), - )) + Arc::new( + NestedField::required(134, "content", Type::Primitive(PrimitiveType::Int)) + // 0 refers to DataContentType::DATA + .with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(0))), + ) }) };