@@ -327,7 +327,8 @@ class RNTupleDescriptorBuilder {
327
327
representationIndex,
328
328
firstElementIndex,
329
329
minValue,
330
- maxValue
330
+ maxValue,
331
+ index : i
331
332
} ;
332
333
column . isDeferred = function ( ) {
333
334
return ( this . flags & RNTupleDescriptorBuilder . kFlagDeferredColumn ) !== 0 ;
@@ -574,7 +575,7 @@ async function readHeaderFooter(tuple) {
574
575
R__unzip ( blobs [ 1 ] , tuple . fLenFooter )
575
576
] ) . then ( unzip_blobs => {
576
577
const header_blob = unzip_blobs [ 0 ] ,
577
- footer_blob = unzip_blobs [ 1 ] ;
578
+ footer_blob = unzip_blobs [ 1 ] ;
578
579
if ( ! header_blob || ! footer_blob )
579
580
return false ;
580
581
@@ -586,21 +587,24 @@ async function readHeaderFooter(tuple) {
586
587
587
588
tuple . builder . deserializeFooter ( footer_blob ) ;
588
589
589
- // Extract first column and corresponding field
590
- const firstColumn = tuple . builder . columnDescriptors ?. [ 0 ] ;
591
- if ( ! firstColumn )
592
- throw new Error ( 'No column descriptor found' ) ;
593
-
594
- const field = tuple . builder . fieldDescriptors ?. [ firstColumn . fieldId ] ,
590
+ // Build fieldToColumns mapping
591
+ tuple . fieldToColumns = { } ;
592
+ for ( const colDesc of tuple . builder . columnDescriptors ) {
593
+ const fieldDesc = tuple . builder . fieldDescriptors [ colDesc . fieldId ] ,
594
+ fieldName = fieldDesc . fieldName ;
595
+ if ( ! tuple . fieldToColumns [ fieldName ] )
596
+ tuple . fieldToColumns [ fieldName ] = [ ] ;
597
+ tuple . fieldToColumns [ fieldName ] . push ( colDesc ) ;
598
+ }
595
599
596
- // Deserialize the Page List Envelope
597
- group = tuple . builder . clusterGroups ?. [ 0 ] ;
600
+ // Deserialize Page List
601
+ const group = tuple . builder . clusterGroups ?. [ 0 ] ;
598
602
if ( ! group || ! group . pageListLocator )
599
603
throw new Error ( 'No valid cluster group or page list locator found' ) ;
600
604
601
605
const offset = Number ( group . pageListLocator . offset ) ,
602
- size = Number ( group . pageListLocator . size ) ,
603
- uncompressedSize = Number ( group . pageListLength ) ;
606
+ size = Number ( group . pageListLocator . size ) ,
607
+ uncompressedSize = Number ( group . pageListLength ) ;
604
608
605
609
return tuple . $file . readBuffer ( [ offset , size ] ) . then ( page_list_blob => {
606
610
if ( ! ( page_list_blob instanceof DataView ) )
@@ -611,32 +615,7 @@ async function readHeaderFooter(tuple) {
611
615
throw new Error ( `Unzipped page list is not a DataView, got ${ Object . prototype . toString . call ( unzipped_blob ) } ` ) ;
612
616
613
617
tuple . builder . deserializePageList ( unzipped_blob ) ;
614
-
615
-
616
- // Access first page metadata
617
- const firstPage = tuple . builder ?. pageLocations ?. [ 0 ] ?. [ 0 ] ?. pages ?. [ 0 ] ;
618
- if ( ! firstPage || ! firstPage . locator )
619
- throw new Error ( 'No valid first page found in pageLocations' ) ;
620
-
621
- const pageOffset = Number ( firstPage . locator . offset ) ,
622
- pageSize = Number ( firstPage . locator . size ) ,
623
- elementSize = firstColumn . bitsOnStorage / 8 ,
624
- numElements = Number ( firstPage . numElements ) ,
625
- uncompressedPageSize = elementSize * numElements ;
626
-
627
-
628
- return tuple . $file . readBuffer ( [ pageOffset , pageSize ] ) . then ( compressedPage => {
629
- if ( ! ( compressedPage instanceof DataView ) )
630
- throw new Error ( 'Compressed page readBuffer did not return a DataView' ) ;
631
-
632
- return R__unzip ( compressedPage , uncompressedPageSize ) . then ( unzippedPage => {
633
- if ( ! ( unzippedPage instanceof DataView ) )
634
- throw new Error ( 'Unzipped page is not a DataView' ) ;
635
-
636
- tuple . builder . deserializePage ( unzippedPage , firstColumn , field ) ;
637
- return true ;
638
- } ) ;
639
- } ) ;
618
+ return true ;
640
619
} ) ;
641
620
} ) ;
642
621
} ) ;
@@ -649,36 +628,46 @@ async function readHeaderFooter(tuple) {
649
628
// Read and process the next data cluster from the RNTuple
650
629
function readNextCluster ( rntuple , selector ) {
651
630
const builder = rntuple . builder ,
652
- clusterSummary = builder . clusterSummaries [ selector . currentCluster ] ,
653
- pages = builder . pageLocations [ selector . currentCluster ] [ 0 ] . pages ;
631
+ clusterIndex = selector . currentCluster ,
632
+ clusterSummary = builder . clusterSummaries [ clusterIndex ] ,
633
+
634
+ // Gather all pages for this cluster from all columns
635
+ pages = [ ] ;
636
+
637
+ for ( const columns of Object . values ( rntuple . fieldToColumns ) ) {
638
+ for ( const colDesc of columns ) {
639
+ const colPages = builder . pageLocations [ clusterIndex ] [ colDesc . index ] . pages ;
640
+ for ( const page of colPages )
641
+ pages . push ( { page, colDesc } ) ;
642
+ }
643
+ }
654
644
655
645
selector . currentCluster ++ ;
656
646
657
647
// Build flat array of [offset, size, offset, size, ...] to read pages
658
- const dataToRead = pages . flatMap ( p => [ Number ( p . locator . offset ) , Number ( p . locator . size ) ] ) ;
648
+ const dataToRead = pages . flatMap ( p =>
649
+ [ Number ( p . page . locator . offset ) , Number ( p . page . locator . size ) ]
650
+ ) ;
659
651
660
652
return rntuple . $file . readBuffer ( dataToRead ) . then ( blobsRaw => {
661
653
const blobs = Array . isArray ( blobsRaw ) ? blobsRaw : [ blobsRaw ] ,
662
- unzipPromises = blobs . map ( ( blob , idx ) => {
663
- const numElements = Number ( pages [ idx ] . numElements ) ;
664
- return R__unzip ( blob , 8 * numElements ) ;
665
- } ) ;
654
+ unzipPromises = blobs . map ( ( blob , idx ) => {
655
+ const { page, colDesc } = pages [ idx ] ,
656
+ numElements = Number ( page . numElements ) ,
657
+ elementSize = colDesc . bitsOnStorage / 8 ;
658
+ return R__unzip ( blob , numElements * elementSize ) ;
659
+ } ) ;
666
660
667
- // Wait for all pages to be decompressed
668
661
return Promise . all ( unzipPromises ) . then ( unzipBlobs => {
669
- const totalSize = unzipBlobs . reduce ( ( sum , b ) => sum + b . byteLength , 0 ) ,
670
- flat = new Uint8Array ( totalSize ) ;
671
-
672
- let offset = 0 ;
673
- for ( const blob of unzipBlobs ) {
674
- flat . set ( new Uint8Array ( blob . buffer || blob ) , offset ) ;
675
- offset += blob . byteLength ;
662
+ for ( let i = 0 ; i < unzipBlobs . length ; ++ i ) {
663
+ const { colDesc } = pages [ i ] ,
664
+ field = builder . fieldDescriptors [ colDesc . fieldId ] ;
665
+ rntuple . builder . deserializePage ( unzipBlobs [ i ] , colDesc , field ) ;
676
666
}
677
667
678
- // Create reader and deserialize doubles from the buffer
679
- const reader = new RBufferReader ( flat . buffer ) ;
668
+ const reader = new RBufferReader ( unzipBlobs [ 0 ] . buffer ) ; // pick one column as example
680
669
for ( let i = 0 ; i < clusterSummary . numEntries ; ++ i ) {
681
- selector . tgtobj . myDouble = reader . readF64 ( ) ;
670
+ selector . tgtobj . myDouble = reader . readF64 ( ) ; // TODO: Replace with real field extraction later
682
671
selector . Process ( ) ;
683
672
}
684
673
0 commit comments