-
Notifications
You must be signed in to change notification settings - Fork 2
Support partial reading of Zarr datasets #106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
792d144
1bd22db
603eb9b
6f763b9
9936f79
7889203
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,38 @@ | |
isZgroup = isfile(fullfile(path, '.zgroup')); | ||
end | ||
|
||
function newParams = validatePartialReadParams(params, dims, defaultValues) | ||
% Validate the parameters for partial read (Start, Stride, | ||
% Count) | ||
|
||
arguments (Output) | ||
newParams (1,:) int64 | ||
end | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Surprised considering the name of the function, you didn't also use arguments block to validate inputs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I am doing the basic arguments block validation for Start/Stride/Count in zarrread (better error message and faster erroring out), so not much left to validate here. Not great to validate different things in different places though.. |
||
|
||
if isempty(params) | ||
newParams = defaultValues; | ||
return | ||
end | ||
|
||
% Allow using a scalar value for indexing into row or column | ||
% datasets | ||
if isscalar(params) && any(dims==1) && numel(dims)==2 | ||
newParams = defaultValues; | ||
% use the provided value for the non-scalar dimension | ||
newParams(dims~=1) = params; | ||
return | ||
end | ||
|
||
if numel(params) ~= numel(dims) | ||
error("MATLAB:Zarr:badPartialReadDimensions",... | ||
"Length of parameters for partial reading " +... | ||
"(Start, Stride, Count) must be the same "+... | ||
"as the number of dataset dimensions.") | ||
end | ||
|
||
newParams = params; | ||
end | ||
|
||
function resolvedPath = getFullPath(path) | ||
% Given a path, resolves it to a full path. The trailing | ||
% directories do not have to exist. | ||
|
@@ -200,7 +232,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath) | |
end | ||
|
||
|
||
function data = read(obj) | ||
function data = read(obj, start, count, stride) | ||
% Function to read the Zarr array | ||
|
||
% If the Zarr array is local, verify that it is a valid folder | ||
|
@@ -214,7 +246,27 @@ function makeZarrGroups(existingParentPath, newGroupsPath) | |
end | ||
end | ||
|
||
ndArrayData = py.ZarrPy.readZarr(obj.KVStoreSchema); | ||
% Validate partial read parameters | ||
info = zarrinfo(obj.Path); | ||
numDims = numel(info.shape); | ||
start = Zarr.validatePartialReadParams(start, info.shape,... | ||
ones([1,numDims])); | ||
stride = Zarr.validatePartialReadParams(stride, info.shape,... | ||
ones([1,numDims])); | ||
maxCount = (int64(info.shape') - start + 1)./stride; % has to be a row vector | ||
count = Zarr.validatePartialReadParams(count, info.shape,... | ||
maxCount); | ||
|
||
% Convert partial read parameters to tensorstore-style | ||
% indexing | ||
start = start - 1; % tensorstore is 0-based | ||
% Tensorstore uses end index instead of count | ||
% (it does NOT include element at the end index) | ||
endInds = start + stride.*count; | ||
|
||
% Read the data | ||
ndArrayData = py.ZarrPy.readZarr(obj.KVStoreSchema,... | ||
start, endInds, stride); | ||
|
||
% Store the datatype | ||
obj.Datatype = ZarrDatatype.fromTensorstoreType(ndArrayData.dtype.name); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"chunks":[3,4],"compressor":null,"dimension_separator":".","dtype":"<f8","fill_value":null,"filters":null,"order":"C","shape":[3,4],"zarr_format":2} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"chunks":[1,10],"compressor":null,"dimension_separator":".","dtype":"<f8","fill_value":null,"filters":null,"order":"C","shape":[1,10],"zarr_format":2} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,37 @@ | ||
function data = zarrread(filepath) | ||
function data = zarrread(filepath, options) | ||
%ZARRREAD Read data from Zarr array | ||
% DATA = ZARRREAD(FILEPATH) retrieves all the data from the Zarr array | ||
% located at FILEPATH. | ||
% The datatype of DATA is the MATLAB equivalent of the Zarr datatype of the | ||
% array located at FILEPATH. | ||
% located at FILEPATH. The datatype of DATA is the MATLAB equivalent of | ||
% the Zarr datatype of the array located at FILEPATH. | ||
% | ||
% DATA = ZARRREAD(FILEPATH, Start=start) retrieves a subset of the data | ||
% from the Zarr array located at FILEPATH. Start is a row vector of | ||
% one-based indices of the first element to be read in each dimension. | ||
% Default is to read all the elements starting from the first (Start= | ||
% [1,1,..]. | ||
% | ||
% DATA = ZARRREAD(FILEPATH, Count=count) retrieves a subset of the data | ||
% from the Zarr array located at FILEPATH. Count is a row vector | ||
% of number of elements to be read in each dimension. Default is to read | ||
% all the available elements (based on dimension size and the specified | ||
% Start and Stride). | ||
% | ||
% DATA = ZARRREAD(FILEPATH, Stride=stride) retrieves a subset of the data | ||
% from the Zarr array located at FILEPATH. Stride is a row vector of | ||
% spaces between indices along each dimension. A value of 1 accesses | ||
% adjacent elements in the corresponding dimension, a value of 2 | ||
% accesses every other element in the corresponding dimension, etc. | ||
% Default is to read all elements without skipping (Stride=[1,1,...]) | ||
|
||
% Copyright 2025 The MathWorks, Inc. | ||
|
||
arguments | ||
filepath {mustBeTextScalar, mustBeNonzeroLengthText} | ||
options.Start (1,:) {mustBeInteger, mustBePositive} = []; | ||
options.Count (1,:) {mustBeInteger, mustBePositive} = []; | ||
options.Stride (1,:) {mustBeInteger, mustBePositive} = []; | ||
end | ||
|
||
zarrObj = Zarr(filepath); | ||
data = zarrObj.read; | ||
data = zarrObj.read(options.Start, options.Count, options.Stride); | ||
end |
Uh oh!
There was an error while loading. Please reload this page.