@@ -398,4 +398,75 @@ mod tests {
398
398
399
399
assert_eq ! ( data_files, actual_data_file) ;
400
400
}
401
+
402
+ #[ tokio:: test]
403
+ async fn test_data_file_serialize_deserialize_v1 ( ) {
404
+ let schema = Arc :: new (
405
+ Schema :: builder ( )
406
+ . with_fields ( vec ! [
407
+ Arc :: new( NestedField :: optional(
408
+ 1 ,
409
+ "v1" ,
410
+ Type :: Primitive ( PrimitiveType :: Int ) ,
411
+ ) ) ,
412
+ Arc :: new( NestedField :: optional(
413
+ 2 ,
414
+ "v2" ,
415
+ Type :: Primitive ( PrimitiveType :: String ) ,
416
+ ) ) ,
417
+ Arc :: new( NestedField :: optional(
418
+ 3 ,
419
+ "v3" ,
420
+ Type :: Primitive ( PrimitiveType :: String ) ,
421
+ ) ) ,
422
+ ] )
423
+ . build ( )
424
+ . unwrap ( ) ,
425
+ ) ;
426
+ let data_files = vec ! [ DataFile {
427
+ content: DataContentType :: Data ,
428
+ file_path: "s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet" . to_string( ) ,
429
+ file_format: DataFileFormat :: Parquet ,
430
+ partition: Struct :: empty( ) ,
431
+ record_count: 1 ,
432
+ file_size_in_bytes: 875 ,
433
+ column_sizes: HashMap :: from( [ ( 1 , 47 ) , ( 2 , 48 ) , ( 3 , 52 ) ] ) ,
434
+ value_counts: HashMap :: from( [ ( 1 , 1 ) , ( 2 , 1 ) , ( 3 , 1 ) ] ) ,
435
+ null_value_counts: HashMap :: from( [ ( 1 , 0 ) , ( 2 , 0 ) , ( 3 , 0 ) ] ) ,
436
+ nan_value_counts: HashMap :: new( ) ,
437
+ lower_bounds: HashMap :: from( [ ( 1 , Datum :: int( 1 ) ) , ( 2 , Datum :: string( "a" ) ) , ( 3 , Datum :: string( "AC/DC" ) ) ] ) ,
438
+ upper_bounds: HashMap :: from( [ ( 1 , Datum :: int( 1 ) ) , ( 2 , Datum :: string( "a" ) ) , ( 3 , Datum :: string( "AC/DC" ) ) ] ) ,
439
+ key_metadata: None ,
440
+ split_offsets: vec![ 4 ] ,
441
+ equality_ids: vec![ ] ,
442
+ sort_order_id: Some ( 0 ) ,
443
+ partition_spec_id: 0 ,
444
+ first_row_id: None ,
445
+ referenced_data_file: None ,
446
+ content_offset: None ,
447
+ content_size_in_bytes: None ,
448
+ } ] ;
449
+
450
+ let mut buffer = Vec :: new ( ) ;
451
+ let _ = write_data_files_to_avro (
452
+ & mut buffer,
453
+ data_files. clone ( ) . into_iter ( ) ,
454
+ & StructType :: new ( vec ! [ ] ) ,
455
+ FormatVersion :: V1 ,
456
+ )
457
+ . unwrap ( ) ;
458
+
459
+ let actual_data_file = read_data_files_from_avro (
460
+ & mut Cursor :: new ( buffer) ,
461
+ & schema,
462
+ 0 ,
463
+ & StructType :: new ( vec ! [ ] ) ,
464
+ FormatVersion :: V1 ,
465
+ )
466
+ . unwrap ( ) ;
467
+
468
+ assert_eq ! ( actual_data_file[ 0 ] . content, DataContentType :: Data )
469
+
470
+
471
+ }
401
472
}
0 commit comments