Skip to content

Commit 652ab5d

Browse files
authored
Merge pull request #350 from Krmjn09/feature/Field-to-Columns
field to columns mapping/ no hardcoding of columns
2 parents 714de61 + 9debc25 commit 652ab5d

File tree

2 files changed

+46
-57
lines changed

2 files changed

+46
-57
lines changed

demo/node/rntuple_selector.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { rntupleProcess } from '../../modules/rntuple.mjs';
2-
import { TSelector, openFile} from 'jsroot';
2+
import { TSelector, openFile } from 'jsroot';
33

44
const selector = new TSelector();
55
selector.sum = 0;

modules/rntuple.mjs

Lines changed: 45 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,8 @@ class RNTupleDescriptorBuilder {
327327
representationIndex,
328328
firstElementIndex,
329329
minValue,
330-
maxValue
330+
maxValue,
331+
index: i
331332
};
332333
column.isDeferred = function() {
333334
return (this.flags & RNTupleDescriptorBuilder.kFlagDeferredColumn) !== 0;
@@ -574,7 +575,7 @@ async function readHeaderFooter(tuple) {
574575
R__unzip(blobs[1], tuple.fLenFooter)
575576
]).then(unzip_blobs => {
576577
const header_blob = unzip_blobs[0],
577-
footer_blob = unzip_blobs[1];
578+
footer_blob = unzip_blobs[1];
578579
if (!header_blob || !footer_blob)
579580
return false;
580581

@@ -586,21 +587,24 @@ async function readHeaderFooter(tuple) {
586587

587588
tuple.builder.deserializeFooter(footer_blob);
588589

589-
// Extract first column and corresponding field
590-
const firstColumn = tuple.builder.columnDescriptors?.[0];
591-
if (!firstColumn)
592-
throw new Error('No column descriptor found');
593-
594-
const field = tuple.builder.fieldDescriptors?.[firstColumn.fieldId],
590+
// Build fieldToColumns mapping
591+
tuple.fieldToColumns = {};
592+
for (const colDesc of tuple.builder.columnDescriptors) {
593+
const fieldDesc = tuple.builder.fieldDescriptors[colDesc.fieldId],
594+
fieldName = fieldDesc.fieldName;
595+
if (!tuple.fieldToColumns[fieldName])
596+
tuple.fieldToColumns[fieldName] = [];
597+
tuple.fieldToColumns[fieldName].push(colDesc);
598+
}
595599

596-
// Deserialize the Page List Envelope
597-
group = tuple.builder.clusterGroups?.[0];
600+
// Deserialize Page List
601+
const group = tuple.builder.clusterGroups?.[0];
598602
if (!group || !group.pageListLocator)
599603
throw new Error('No valid cluster group or page list locator found');
600604

601605
const offset = Number(group.pageListLocator.offset),
602-
size = Number(group.pageListLocator.size),
603-
uncompressedSize = Number(group.pageListLength);
606+
size = Number(group.pageListLocator.size),
607+
uncompressedSize = Number(group.pageListLength);
604608

605609
return tuple.$file.readBuffer([offset, size]).then(page_list_blob => {
606610
if (!(page_list_blob instanceof DataView))
@@ -611,32 +615,7 @@ async function readHeaderFooter(tuple) {
611615
throw new Error(`Unzipped page list is not a DataView, got ${Object.prototype.toString.call(unzipped_blob)}`);
612616

613617
tuple.builder.deserializePageList(unzipped_blob);
614-
615-
616-
// Access first page metadata
617-
const firstPage = tuple.builder?.pageLocations?.[0]?.[0]?.pages?.[0];
618-
if (!firstPage || !firstPage.locator)
619-
throw new Error('No valid first page found in pageLocations');
620-
621-
const pageOffset = Number(firstPage.locator.offset),
622-
pageSize = Number(firstPage.locator.size),
623-
elementSize = firstColumn.bitsOnStorage / 8,
624-
numElements = Number(firstPage.numElements),
625-
uncompressedPageSize = elementSize * numElements;
626-
627-
628-
return tuple.$file.readBuffer([pageOffset, pageSize]).then(compressedPage => {
629-
if (!(compressedPage instanceof DataView))
630-
throw new Error('Compressed page readBuffer did not return a DataView');
631-
632-
return R__unzip(compressedPage, uncompressedPageSize).then(unzippedPage => {
633-
if (!(unzippedPage instanceof DataView))
634-
throw new Error('Unzipped page is not a DataView');
635-
636-
tuple.builder.deserializePage(unzippedPage, firstColumn, field);
637-
return true;
638-
});
639-
});
618+
return true;
640619
});
641620
});
642621
});
@@ -649,36 +628,46 @@ async function readHeaderFooter(tuple) {
649628
// Read and process the next data cluster from the RNTuple
650629
function readNextCluster(rntuple, selector) {
651630
const builder = rntuple.builder,
652-
clusterSummary = builder.clusterSummaries[selector.currentCluster],
653-
pages = builder.pageLocations[selector.currentCluster][0].pages;
631+
clusterIndex = selector.currentCluster,
632+
clusterSummary = builder.clusterSummaries[clusterIndex],
633+
634+
// Gather all pages for this cluster from all columns
635+
pages = [];
636+
637+
for (const columns of Object.values(rntuple.fieldToColumns)) {
638+
for (const colDesc of columns) {
639+
const colPages = builder.pageLocations[clusterIndex][colDesc.index].pages;
640+
for (const page of colPages)
641+
pages.push({ page, colDesc });
642+
}
643+
}
654644

655645
selector.currentCluster++;
656646

657647
// Build flat array of [offset, size, offset, size, ...] to read pages
658-
const dataToRead = pages.flatMap(p => [Number(p.locator.offset), Number(p.locator.size)]);
648+
const dataToRead = pages.flatMap(p =>
649+
[Number(p.page.locator.offset), Number(p.page.locator.size)]
650+
);
659651

660652
return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => {
661653
const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw],
662-
unzipPromises = blobs.map((blob, idx) => {
663-
const numElements = Number(pages[idx].numElements);
664-
return R__unzip(blob, 8 * numElements);
665-
});
654+
unzipPromises = blobs.map((blob, idx) => {
655+
const { page, colDesc } = pages[idx],
656+
numElements = Number(page.numElements),
657+
elementSize = colDesc.bitsOnStorage / 8;
658+
return R__unzip(blob, numElements * elementSize);
659+
});
666660

667-
// Wait for all pages to be decompressed
668661
return Promise.all(unzipPromises).then(unzipBlobs => {
669-
const totalSize = unzipBlobs.reduce((sum, b) => sum + b.byteLength, 0),
670-
flat = new Uint8Array(totalSize);
671-
672-
let offset = 0;
673-
for (const blob of unzipBlobs) {
674-
flat.set(new Uint8Array(blob.buffer || blob), offset);
675-
offset += blob.byteLength;
662+
for (let i = 0; i < unzipBlobs.length; ++i) {
663+
const { colDesc } = pages[i],
664+
field = builder.fieldDescriptors[colDesc.fieldId];
665+
rntuple.builder.deserializePage(unzipBlobs[i], colDesc, field);
676666
}
677667

678-
// Create reader and deserialize doubles from the buffer
679-
const reader = new RBufferReader(flat.buffer);
668+
const reader = new RBufferReader(unzipBlobs[0].buffer); // pick one column as example
680669
for (let i = 0; i < clusterSummary.numEntries; ++i) {
681-
selector.tgtobj.myDouble = reader.readF64();
670+
selector.tgtobj.myDouble = reader.readF64(); // TODO: Replace with real field extraction later
682671
selector.Process();
683672
}
684673

0 commit comments

Comments
 (0)