diff --git a/PythonModule/ZarrPy.py b/PythonModule/ZarrPy.py index 3e51709..6a182b9 100644 --- a/PythonModule/ZarrPy.py +++ b/PythonModule/ZarrPy.py @@ -81,12 +81,16 @@ def writeZarr (kvstore_schema, data): zarr_file[...] = data -def readZarr (kvstore_schema): +def readZarr (kvstore_schema, starts, ends, strides): """ Reads a subset of data from a Zarr file. Parameters: - kvstore_schema (dictionary): Schema for the file store (local or remote) + - starts (numpy.ndarray): Array of start indices for each dimension (0-based) + - ends (numpy.ndarray): Array of end indices for each dimension (elements + at the end index will not be read) + - strides (numpy.ndarray): Array of strides for each dimensions Returns: - numpy.ndarray: The subset of the data read from the Zarr file. @@ -96,6 +100,10 @@ def readZarr (kvstore_schema): 'kvstore': kvstore_schema, }).result() + # Construct the indexing slices + slices = tuple(slice(start, end, stride) for start, end, stride in zip(starts, ends, strides)) + # Read a subset of the data - data = zarr_file[...].read().result() + data = zarr_file[slices].read().result() + return data diff --git a/Zarr.m b/Zarr.m index e8acc5a..3088425 100644 --- a/Zarr.m +++ b/Zarr.m @@ -53,6 +53,46 @@ isZgroup = isfile(fullfile(path, '.zgroup')); end + function newParams = processPartialReadParams(params, dims,... + defaultValues, paramName) + % Process the parameters for partial read (Start, Stride, + % Count) + arguments (Input) + params % Start/Stride/Count parameter to be validated + dims (1,:) double % Zarr array dimensions + defaultValues (1,:) + paramName (1,1) string + end + + arguments (Output) + newParams (1,:) int64 % must be integers for tensorstore + end + + if isempty(params) + newParams = defaultValues; + return + end + + % Allow using a scalar value for indexing into row or column + % datasets + if isscalar(params) && any(dims==1) && numel(dims)==2 + newParams = defaultValues; + % use the provided value for the non-scalar dimension + newParams(dims~=1) = params; + return + end + + if numel(params) ~= numel(dims) + error("MATLAB:Zarr:badPartialReadDimensions",... + "Number of elements in " +... + "%s must be the same "+... + "as the number of Zarr array dimensions.",... + paramName) + end + + newParams = params; + end + function resolvedPath = getFullPath(path) % Given a path, resolves it to a full path. The trailing % directories do not have to exist. @@ -200,7 +240,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath) end - function data = read(obj) + function data = read(obj, start, count, stride) % Function to read the Zarr array % If the Zarr array is local, verify that it is a valid folder @@ -214,7 +254,27 @@ function makeZarrGroups(existingParentPath, newGroupsPath) end end - ndArrayData = py.ZarrPy.readZarr(obj.KVStoreSchema); + % Validate partial read parameters + info = zarrinfo(obj.Path); + numDims = numel(info.shape); + start = Zarr.processPartialReadParams(start, info.shape,... + ones([1,numDims]), "Start"); + stride = Zarr.processPartialReadParams(stride, info.shape,... + ones([1,numDims]), "Stride"); + maxCount = (int64(info.shape') - start + 1)./stride; % has to be a row vector + count = Zarr.processPartialReadParams(count, info.shape,... + maxCount, "Count"); + + % Convert partial read parameters to tensorstore-style + % indexing + start = start - 1; % tensorstore is 0-based + % Tensorstore uses end index instead of count + % (it does NOT include element at the end index) + endInds = start + stride.*count; + + % Read the data + ndArrayData = py.ZarrPy.readZarr(obj.KVStoreSchema,... + start, endInds, stride); % Store the datatype obj.Datatype = ZarrDatatype.fromTensorstoreType(ndArrayData.dtype.name); diff --git a/doc/documentation.md b/doc/documentation.md index 9d6a556..6ae5757 100644 --- a/doc/documentation.md +++ b/doc/documentation.md @@ -68,11 +68,32 @@ do not already exist. If `FILEPATH` exists already, the contents are overwritten Write the MATLAB variable data (specified by DATA) to the path specified by `FILEPATH`. The size of `DATA` must match the size of the Zarr array specified during creation. -## `DATA = zarrread(FILEPATH)` -Retrieve all the data from the Zarr array located at `FILEPATH`. +## `DATA = zarrread(FILEPATH, Name=Value)` +Retrieve data from the Zarr array located at `FILEPATH`. The datatype of DATA is the MATLAB equivalent of the Zarr datatype of the array located at `FILEPATH`. +### Name - Value Pairs +``` +Start - a row vector of one-based indices of the first + elements to be read in each dimension. If you + do not specify start, then the function starts + reading the dataset from the first index along + each dimension. + +Count - a row vector of numbers of elements to + be read in each dimension. If you do not specify + count, then the function reads data until the end + of each dimension. + +Stride - a row vector of differences between indices along + each dimension. A value of 1 accesses adjacent elements + in the corresponding dimension, a value of 2 accesses + every other element in the corresponding dimension, and + so on. If you do not specify stride, then the function + reads data without skipping indices along each dimension. +``` + ## `INFO = zarrinfo(FILEPATH)` Read the metadata associated with a Zarr array or group located at `FILEPATH` and return the information in a structure INFO, whose fields are the names of the metadata keys. If `FILEPATH` is a Zarr array (has a valid `.zarray` file), the value of `node_type` is "array"; if `FILEPATH` is a Zarr group (has a valid `.zgroup` file), the value of the field `node_type` is "group". diff --git a/doc/examples.md b/doc/examples.md index 3233ea1..a573822 100644 --- a/doc/examples.md +++ b/doc/examples.md @@ -6,6 +6,18 @@ filepath = "group1\dset1"; data = zarrread(filepath) ``` +### Read a subset of Zarr array +Read a 2x2 subset of a 3x4 array, starting with the second element in the second dimension, and only reading every other element in the second dimension. +``` MATLAB +filepath = "grp_v2/smallArr"; +d = zarrread(filepath, Start=[1,2], Stride=[1,2], Count=[2,2]) + +d = + + 4 10 + 5 11 +``` + ### Create and write to a Zarr array ``` MATLAB filepath = "myZarrfiles\singleDset"; diff --git a/test/dataFiles/grp_v2/smallArr/.zarray b/test/dataFiles/grp_v2/smallArr/.zarray new file mode 100644 index 0000000..b0f27fa --- /dev/null +++ b/test/dataFiles/grp_v2/smallArr/.zarray @@ -0,0 +1 @@ +{"chunks":[3,4],"compressor":null,"dimension_separator":".","dtype":"