Skip to content

Commit 1dd2f2e

Browse files
committed
first set of changes
1 parent 812ca85 commit 1dd2f2e

File tree

3 files changed

+26
-58
lines changed

3 files changed

+26
-58
lines changed

crates/iceberg/src/avro/schema.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,23 +116,25 @@ impl SchemaVisitor for SchemaToAvroSchema {
116116
field_schema = avro_optional(field_schema)?;
117117
}
118118

119+
let default = if let Some(default) = &field.initial_default {
120+
Some(literal_to_json(default)?)
121+
} else if !field.required {
122+
Some(Value::Null)
123+
} else {
124+
None
125+
};
126+
119127
let mut avro_record_field = AvroRecordField {
120128
name: field.name.clone(),
121129
schema: field_schema,
122130
order: RecordFieldOrder::Ignore,
123131
position: 0,
124132
doc: field.doc.clone(),
125133
aliases: None,
126-
default: None,
134+
default: default,
127135
custom_attributes: Default::default(),
128136
};
129137

130-
if let Some(default) = &field.initial_default {
131-
avro_record_field.default = Some(literal_to_json(default)?);
132-
} else if !field.required {
133-
avro_record_field.default = Some(Value::Null);
134-
}
135-
136138
avro_record_field.custom_attributes.insert(
137139
FILED_ID_PROP.to_string(),
138140
Value::Number(Number::from(field.id)),

crates/iceberg/src/spec/manifest/_serde.rs

Lines changed: 16 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,8 @@ mod tests {
330330
assert_eq!(ret, expected_ret, "Negative i64 entry should be ignored!");
331331
}
332332

333-
#[tokio::test]
334-
async fn test_data_file_serialize_deserialize() {
335-
let schema = Arc::new(
333+
fn schema() -> Arc<Schema> {
334+
Arc::new(
336335
Schema::builder()
337336
.with_fields(vec![
338337
Arc::new(NestedField::optional(
@@ -353,8 +352,11 @@ mod tests {
353352
])
354353
.build()
355354
.unwrap(),
356-
);
357-
let data_files = vec![DataFile {
355+
)
356+
}
357+
358+
fn data_files() -> Vec<DataFile> {
359+
vec![DataFile {
358360
content: DataContentType::Data,
359361
file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(),
360362
file_format: DataFileFormat::Parquet,
@@ -376,7 +378,13 @@ mod tests {
376378
referenced_data_file: None,
377379
content_offset: None,
378380
content_size_in_bytes: None,
379-
}];
381+
}]
382+
}
383+
384+
#[tokio::test]
385+
async fn test_data_file_serialize_deserialize() {
386+
let schema = schema();
387+
let data_files = data_files();
380388

381389
let mut buffer = Vec::new();
382390
let _ = write_data_files_to_avro(
@@ -401,51 +409,8 @@ mod tests {
401409

402410
#[tokio::test]
403411
async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() {
404-
let schema = Arc::new(
405-
Schema::builder()
406-
.with_fields(vec![
407-
Arc::new(NestedField::optional(
408-
1,
409-
"v1",
410-
Type::Primitive(PrimitiveType::Int),
411-
)),
412-
Arc::new(NestedField::optional(
413-
2,
414-
"v2",
415-
Type::Primitive(PrimitiveType::String),
416-
)),
417-
Arc::new(NestedField::optional(
418-
3,
419-
"v3",
420-
Type::Primitive(PrimitiveType::String),
421-
)),
422-
])
423-
.build()
424-
.unwrap(),
425-
);
426-
let data_files = vec![DataFile {
427-
content: DataContentType::Data,
428-
file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(),
429-
file_format: DataFileFormat::Parquet,
430-
partition: Struct::empty(),
431-
record_count: 1,
432-
file_size_in_bytes: 875,
433-
column_sizes: HashMap::from([(1,47),(2,48),(3,52)]),
434-
value_counts: HashMap::from([(1,1),(2,1),(3,1)]),
435-
null_value_counts: HashMap::from([(1,0),(2,0),(3,0)]),
436-
nan_value_counts: HashMap::new(),
437-
lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]),
438-
upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]),
439-
key_metadata: None,
440-
split_offsets: vec![4],
441-
equality_ids: vec![],
442-
sort_order_id: Some(0),
443-
partition_spec_id: 0,
444-
first_row_id: None,
445-
referenced_data_file: None,
446-
content_offset: None,
447-
content_size_in_bytes: None,
448-
}];
412+
let schema = schema();
413+
let data_files = data_files();
449414

450415
let mut buffer = Vec::new();
451416
let _ = write_data_files_to_avro(

crates/iceberg/src/spec/manifest/entry.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ static CONTENT: Lazy<NestedFieldRef> = {
234234
Lazy::new(|| {
235235
Arc::new(
236236
NestedField::required(134, "content", Type::Primitive(PrimitiveType::Int))
237+
// 0 refers to DataContentType::DATA
237238
.with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(0))),
238239
)
239240
})

0 commit comments

Comments
 (0)