@@ -54,7 +54,7 @@ async fn test_virtual_column_builder() -> Result<()> {
54
54
0 ,
55
55
) ; // Dummy location
56
56
57
- let builder = VirtualColumnBuilder :: try_create ( ctx, fuse_table , schema) . unwrap ( ) ;
57
+ let mut builder = VirtualColumnBuilder :: try_create ( ctx, schema) . unwrap ( ) ;
58
58
59
59
let block = DataBlock :: new (
60
60
vec ! [
@@ -83,7 +83,8 @@ async fn test_virtual_column_builder() -> Result<()> {
83
83
3 ,
84
84
) ;
85
85
86
- let result = builder. add_block ( & block, & write_settings, & location) ?;
86
+ builder. add_block ( & block) ?;
87
+ let result = builder. finalize ( & write_settings, & location) ?;
87
88
88
89
assert ! ( !result. data. is_empty( ) ) ;
89
90
assert_eq ! (
@@ -195,7 +196,8 @@ async fn test_virtual_column_builder() -> Result<()> {
195
196
8 ,
196
197
) ;
197
198
198
- let result = builder. add_block ( & block, & write_settings, & location) ?;
199
+ builder. add_block ( & block) ?;
200
+ let result = builder. finalize ( & write_settings, & location) ?;
199
201
200
202
// Expected columns: id, create, text, user.id, replies, geo.lat
201
203
assert_eq ! (
@@ -302,14 +304,196 @@ async fn test_virtual_column_builder() -> Result<()> {
302
304
entries, 8 , // Number of rows
303
305
) ;
304
306
305
- let result = builder. add_block ( & block, & write_settings, & location) ?;
307
+ builder. add_block ( & block) ?;
308
+ let result = builder. finalize ( & write_settings, & location) ?;
306
309
307
310
// all columns should be discarded due to > 70% nulls
308
311
assert ! ( result. data. is_empty( ) ) ;
309
312
310
313
Ok ( ( ) )
311
314
}
312
315
316
+ #[ tokio:: test( flavor = "multi_thread" ) ]
317
+ async fn test_virtual_column_builder_stream_write ( ) -> Result < ( ) > {
318
+ let fixture = TestFixture :: setup_with_custom ( EESetup :: new ( ) ) . await ?;
319
+
320
+ fixture
321
+ . default_session ( )
322
+ . get_settings ( )
323
+ . set_enable_experimental_virtual_column ( 1 ) ?;
324
+ fixture. create_default_database ( ) . await ?;
325
+ fixture. create_variant_table ( ) . await ?;
326
+
327
+ let ctx = fixture. new_query_ctx ( ) . await ?;
328
+
329
+ let table = fixture. latest_default_table ( ) . await ?;
330
+ let table_info = table. get_table_info ( ) ;
331
+ let schema = table_info. meta . schema . clone ( ) ;
332
+
333
+ let fuse_table = FuseTable :: try_from_table ( table. as_ref ( ) ) ?;
334
+
335
+ let write_settings = fuse_table. get_write_settings ( ) ;
336
+ let location = (
337
+ "_b/h0196236b460676369cfcf6fec0dedefa_v2.parquet" . to_string ( ) ,
338
+ 0 ,
339
+ ) ; // Dummy location
340
+
341
+ let mut builder = VirtualColumnBuilder :: try_create ( ctx, schema) . unwrap ( ) ;
342
+
343
+ // Create blocks with consistent schema across all blocks
344
+ let blocks = vec ! [
345
+ // Block 1: Simple nested structure
346
+ DataBlock :: new(
347
+ vec![
348
+ ( Int32Type :: from_data( vec![ 1 , 2 , 3 ] ) ) . into( ) ,
349
+ ( VariantType :: from_opt_data( vec![
350
+ Some (
351
+ OwnedJsonb :: from_str( r#"{"user": {"id": 1, "name": "Alice"}, "score": 100}"# )
352
+ . unwrap( )
353
+ . to_vec( ) ,
354
+ ) ,
355
+ Some (
356
+ OwnedJsonb :: from_str( r#"{"user": {"id": 2, "name": "Bob"}, "score": 85}"# )
357
+ . unwrap( )
358
+ . to_vec( ) ,
359
+ ) ,
360
+ Some (
361
+ OwnedJsonb :: from_str( r#"{"user": {"id": 3, "name": "Charlie"}, "score": 92}"# )
362
+ . unwrap( )
363
+ . to_vec( ) ,
364
+ ) ,
365
+ ] ) )
366
+ . into( ) ,
367
+ ] ,
368
+ 3 ,
369
+ ) ,
370
+ // Block 2: Same structure, different values
371
+ DataBlock :: new(
372
+ vec![
373
+ ( Int32Type :: from_data( vec![ 4 , 5 , 6 ] ) ) . into( ) ,
374
+ ( VariantType :: from_opt_data( vec![
375
+ Some (
376
+ OwnedJsonb :: from_str( r#"{"user": {"id": 4, "name": "Dave"}, "score": 78}"# )
377
+ . unwrap( )
378
+ . to_vec( ) ,
379
+ ) ,
380
+ Some (
381
+ OwnedJsonb :: from_str( r#"{"user": {"id": 5, "name": "Eve"}, "score": 95}"# )
382
+ . unwrap( )
383
+ . to_vec( ) ,
384
+ ) ,
385
+ Some (
386
+ OwnedJsonb :: from_str( r#"{"user": {"id": 6, "name": "Frank"}, "score": 88}"# )
387
+ . unwrap( )
388
+ . to_vec( ) ,
389
+ ) ,
390
+ ] ) )
391
+ . into( ) ,
392
+ ] ,
393
+ 3 ,
394
+ ) ,
395
+ // Block 3: Same structure with additional fields
396
+ DataBlock :: new(
397
+ vec![
398
+ ( Int32Type :: from_data( vec![ 7 , 8 , 9 ] ) ) . into( ) ,
399
+ ( VariantType :: from_opt_data( vec![
400
+ Some (
401
+ OwnedJsonb :: from_str( r#"{"user": {"id": 7, "name": "Grace", "active": true}, "score": 91, "tags": ["expert"]}"# )
402
+ . unwrap( )
403
+ . to_vec( ) ,
404
+ ) ,
405
+ Some (
406
+ OwnedJsonb :: from_str( r#"{"user": {"id": 8, "name": "Heidi", "active": false}, "score": 75, "tags": ["novice"]}"# )
407
+ . unwrap( )
408
+ . to_vec( ) ,
409
+ ) ,
410
+ Some (
411
+ OwnedJsonb :: from_str( r#"{"user": {"id": 9, "name": "Ivan", "active": true}, "score": 89, "tags": ["intermediate"]}"# )
412
+ . unwrap( )
413
+ . to_vec( ) ,
414
+ ) ,
415
+ ] ) )
416
+ . into( ) ,
417
+ ] ,
418
+ 3 ,
419
+ ) ,
420
+ ] ;
421
+
422
+ // Stream write: add each block to the builder
423
+ for block in & blocks {
424
+ builder. add_block ( block) ?;
425
+ }
426
+
427
+ // Finalize once after adding all blocks
428
+ let result = builder. finalize ( & write_settings, & location) ?;
429
+
430
+ // Verify the result
431
+ assert ! ( !result. data. is_empty( ) ) ;
432
+
433
+ // We expect virtual columns for user.id, user.name, user.active, score, and tags[0]
434
+ assert_eq ! (
435
+ result. draft_virtual_block_meta. virtual_column_metas. len( ) ,
436
+ 5
437
+ ) ;
438
+
439
+ // Check user.id column
440
+ let meta_user_id = find_virtual_col (
441
+ & result. draft_virtual_block_meta . virtual_column_metas ,
442
+ 1 ,
443
+ "['user']['id']" ,
444
+ )
445
+ . expect ( "Virtual column ['user']['id'] not found" ) ;
446
+ assert_eq ! ( meta_user_id. source_column_id, 1 ) ;
447
+ assert_eq ! ( meta_user_id. name, "['user']['id']" ) ;
448
+ assert_eq ! ( meta_user_id. data_type, VariantDataType :: UInt64 ) ;
449
+
450
+ // Check user.name column
451
+ let meta_user_name = find_virtual_col (
452
+ & result. draft_virtual_block_meta . virtual_column_metas ,
453
+ 1 ,
454
+ "['user']['name']" ,
455
+ )
456
+ . expect ( "Virtual column ['user']['name'] not found" ) ;
457
+ assert_eq ! ( meta_user_name. source_column_id, 1 ) ;
458
+ assert_eq ! ( meta_user_name. name, "['user']['name']" ) ;
459
+ assert_eq ! ( meta_user_name. data_type, VariantDataType :: String ) ;
460
+
461
+ // Check score column
462
+ let meta_score = find_virtual_col (
463
+ & result. draft_virtual_block_meta . virtual_column_metas ,
464
+ 1 ,
465
+ "['score']" ,
466
+ )
467
+ . expect ( "Virtual column ['score'] not found" ) ;
468
+ assert_eq ! ( meta_score. source_column_id, 1 ) ;
469
+ assert_eq ! ( meta_score. name, "['score']" ) ;
470
+ assert_eq ! ( meta_score. data_type, VariantDataType :: UInt64 ) ;
471
+
472
+ // Check user.active column (only present in the third block)
473
+ let meta_user_active = find_virtual_col (
474
+ & result. draft_virtual_block_meta . virtual_column_metas ,
475
+ 1 ,
476
+ "['user']['active']" ,
477
+ )
478
+ . expect ( "Virtual column ['user']['active'] not found" ) ;
479
+ assert_eq ! ( meta_user_active. source_column_id, 1 ) ;
480
+ assert_eq ! ( meta_user_active. name, "['user']['active']" ) ;
481
+ assert_eq ! ( meta_user_active. data_type, VariantDataType :: Boolean ) ;
482
+
483
+ // Check tags[0] column (only present in the third block)
484
+ let meta_tags = find_virtual_col (
485
+ & result. draft_virtual_block_meta . virtual_column_metas ,
486
+ 1 ,
487
+ "['tags'][0]" ,
488
+ )
489
+ . expect ( "Virtual column ['tags'][0] not found" ) ;
490
+ assert_eq ! ( meta_tags. source_column_id, 1 ) ;
491
+ assert_eq ! ( meta_tags. name, "['tags'][0]" ) ;
492
+ assert_eq ! ( meta_tags. data_type, VariantDataType :: String ) ;
493
+
494
+ Ok ( ( ) )
495
+ }
496
+
313
497
fn find_virtual_col < ' a > (
314
498
metas : & ' a [ DraftVirtualColumnMeta ] ,
315
499
source_id : ColumnId ,
0 commit comments