Skip to content

Unfiltering tests and cleaning up edge-cases #105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 46 additions & 38 deletions Zarr.m
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,43 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
end

end

function [bucketName, objectPath] = extractS3BucketNameAndPath(url)
% Helper function to extract S3 bucket name and path to file
% bucketName and objectPath are needed to fill the KVstore hash
% map for tensorstore.
% Define the regular expression patterns for matching S3 URLs and URIs
% S3 URLs can have the following patterns.
patterns = { ...
'^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$', ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile)
'^https://([^.]+)\.s3\.amazonaws\.com/(.+)$', ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile)
'^https://([^.]+)\.s3\.[^/]+/(.+)$', ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile)
'^https://s3\.amazonaws\.com/([^/]+)/(.+)$', ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile)
'^https://s3\.[^/]+/([^/]+)/(.+)$', ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile)
'^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile)
};

% For each pattern, specify which group is bucket and which is path
% regexp will extract multiple tokens from the patterns above.
% For each pattern, the indices below denote the location of
% the bucket and the path name.
bucketIdx = [1, 1, 1, 1, 1, 1];
pathIdx = [3, 2, 2, 2, 2, 2];

% Iterate through the patterns and identify the pattern which matches the
% URI. Extract the bucket name and the path.
for patternIdx = 1:numel(patterns)
tokens = regexp(url, patterns{patternIdx}, 'tokens');
if ~isempty(tokens)
t = tokens{1};
bucketName = t{bucketIdx(patternIdx)};
objectPath = t{pathIdx(patternIdx)};
return;
end
end

error("MATLAB:Zarr:invalidS3URL","Invalid S3 URI format.");
end
end

methods
Expand All @@ -183,7 +220,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
obj.isRemote = matlab.io.internal.vfs.validators.hasIriPrefix(obj.Path);
if obj.isRemote % Remote file (only S3 support at the moment)
% Extract the S3 bucket name and path
[bucketName, objectPath] = obj.extractS3BucketNameAndPath(obj.Path);
[bucketName, objectPath] = Zarr.extractS3BucketNameAndPath(obj.Path);
% Create a Python dictionary for the KV store driver
obj.KVStoreSchema = py.ZarrPy.createKVStore(obj.isRemote, objectPath, bucketName);

Expand Down Expand Up @@ -241,7 +278,13 @@ function create(obj, dtype, data_size, chunk_size, fillvalue, compression)
if isempty(fillvalue)
obj.FillValue = py.None;
else
obj.FillValue = cast(fillvalue, obj.Datatype.MATLABType);
% Fill value must be of the same datatype as data.
if ~isa(fillvalue, dtype)
error("MATLAB:zarrcreate:invalidFillValueType",...
"Fill value must have the same data type (""%s"") as the Zarr array.",...
dtype)
end
obj.FillValue = fillvalue;
end

% see how much of the provided path exists already
Expand Down Expand Up @@ -334,42 +377,7 @@ function write(obj, data)
end
end

function [bucketName, objectPath] = extractS3BucketNameAndPath(~,url)
% Helper function to extract S3 bucket name and path to file
% bucketName and objectPath are needed to fill the KVstore hash
% map for tensorstore.
% Define the regular expression patterns for matching S3 URLs and URIs
% S3 URLs can have the following patterns.
patterns = { ...
'^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$', ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile)
'^https://([^.]+)\.s3\.amazonaws\.com/(.+)$', ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile)
'^https://([^.]+)\.s3\.[^/]+/(.+)$', ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile)
'^https://s3\.amazonaws\.com/([^/]+)/(.+)$', ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile)
'^https://s3\.[^/]+/([^/]+)/(.+)$', ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile)
'^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile)
};

% For each pattern, specify which group is bucket and which is path
% regexp will extract multiple tokens from the patterns above.
% For each pattern, the indices below denote the location of
% the bucket and the path name.
bucketIdx = [1, 1, 1, 1, 1, 1];
pathIdx = [3, 2, 2, 2, 2, 2];

% Iterate through the patterns and identify the pattern which matches the
% URI. Extract the bucket name and the path.
for patternIdx = 1:numel(patterns)
tokens = regexp(url, patterns{patternIdx}, 'tokens');
if ~isempty(tokens)
t = tokens{1};
bucketName = t{bucketIdx(patternIdx)};
objectPath = t{pathIdx(patternIdx)};
return;
end
end

error("MATLAB:Zarr:invalidS3URL","Invalid S3 URI format.");
end

end

end
75 changes: 45 additions & 30 deletions test/tZarrAttributes.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
function createZarrArrayWithAttrs(testcase)
% Create Zarr array and add some attributes.
zarrcreate(testcase.ArrPathWrite,testcase.ArrSize);

% Write array attributes
zarrwriteatt(testcase.ArrPathWrite,'attr1','This is an array attribute.');
zarrwriteatt(testcase.ArrPathWrite,'attr2',{1,2,3});
attr3.numVal = 10;
attr3.strArr = ["array","attribute"];
zarrwriteatt(testcase.ArrPathWrite,'attr3',attr3);
zarrwriteatt(testcase.ArrPathWrite,'scalarText','This is an array attribute.');
zarrwriteatt(testcase.ArrPathWrite,'numericVector',[1,2,3]);
zarrwriteatt(testcase.ArrPathWrite,'numericCellArray',{1,2,3});
zarrwriteatt(testcase.ArrPathWrite,'mixedCellArray',{1,'two',3});
attrStruct.numVal = 10;
attrStruct.strArr = ["array","attribute"];
zarrwriteatt(testcase.ArrPathWrite,'struct',attrStruct);

% Write group attributes
zarrwriteatt(testcase.GrpPathWrite,'grp_description','This is a group');
Expand All @@ -23,38 +25,51 @@ function createZarrArrayWithAttrs(testcase)

methods(Test)
function verifyArrayAttributeInfo(testcase)
% Write attribute info using zarrwriteatt function to an array.

arrInfo = zarrinfo(testcase.ArrPathWrite);
actAttr.attr1 = arrInfo.attr1;

% TODO: Enable code once Issue-34 is fixed.
%actAttr.attr2 = arrInfo.attr2;
%actAttr.attr3 = arrInfo.attr3;

expAttr.attr1 = 'This is an array attribute.';
%expAttr.attr2 = {1,2,3};
%expAttr.attr3.numVal = 10;
%expAttr.attr4.strArr = ["array","attribute"];

testcase.verifyEqual(actAttr,expAttr,'Failed to verify attribute info.');
% Write attribute info using zarrwriteatt function to an array
% (during test setup) and verify written values using zarrinfo

actInfo = zarrinfo(testcase.ArrPathWrite);

testcase.verifyEqual(actInfo.scalarText,...
'This is an array attribute.',...
'Failed to verify attribute info for scalar text.');
testcase.verifyEqual(actInfo.numericVector,...
[1;2;3],... % JSON stores all vectors as column vectors
'Failed to verify attribute info for numeric vector.');
testcase.verifyEqual(actInfo.numericCellArray,...
[1;2;3],... % JSON stores numeric cell array as column vector
'Failed to verify attribute info for numeric cell array.');
testcase.verifyEqual(actInfo.mixedCellArray,...
{1; 'two'; 3},...% JSON stores all vectors as column vectors
'Failed to verify attribute info for mixed cell array.');

expStruct.numVal = 10;
% JSON stores string arrays as column cell arrays of char
% vectors
expStruct.strArr = {'array';'attribute'};
testcase.verifyEqual(actInfo.struct,...
expStruct,...
'Failed to verify attribute info for struct.');
end

function verifyAttrOverwrite(testcase)
% Verify attribute value after overwrite.
expAttrStr = ["new","attribute","value"];
zarrwriteatt(testcase.ArrPathWrite,'attr1',expAttrStr);

expAttrStr = 'New attribute value';
zarrwriteatt(testcase.ArrPathWrite,'scalarText',expAttrStr);

expAttrDbl = 10;
zarrwriteatt(testcase.ArrPathWrite,'attr2',expAttrDbl);
zarrwriteatt(testcase.ArrPathWrite,'numericVector',expAttrDbl);

arrInfo = zarrinfo(testcase.ArrPathWrite);

% TODO: Enable code once Issue-34 is fixed.
%actAttrStr = arrInfo.attr1;
actAttrDbl = arrInfo.attr2;

%testcase.verifyEqual(actAttrStr,expAttrStr,'Failed to verify string attribute info');
testcase.verifyEqual(actAttrDbl,expAttrDbl,'Failed to verify double attribute info');
actAttrStr = arrInfo.scalarText;
actAttrDbl = arrInfo.numericVector;

testcase.verifyEqual(actAttrStr,expAttrStr,...
'Failed to verify string attribute info');
testcase.verifyEqual(actAttrDbl,expAttrDbl,...
'Failed to verify double attribute info');
end

function verifyGroupAttributeInfo(testcase)
Expand Down
67 changes: 56 additions & 11 deletions test/tZarrCreate.m
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@
% Copyright 2025 The MathWorks, Inc.

methods(Test)

function createDefaultArray(testcase)
% Verify that zarrcreate correctly creates a Zarr array with
% all default properties

zarrcreate(testcase.ArrPathWrite,testcase.ArrSize);

expInfo.chunks = testcase.ArrSize';
expInfo.compressor = [];
expInfo.dimension_separator = '.';
expInfo.dtype = '<f8';
expInfo.fill_value = [];
expInfo.filters = [];
expInfo.order = 'C';
expInfo.shape = testcase.ArrSize';
expInfo.zarr_format = 2;
expInfo.node_type = 'array';

actInfo = zarrinfo(testcase.ArrPathWrite);
testcase.verifyEqual(actInfo, expInfo,...
'Failed to verify creating Zarr array with default properties');

end

function createIntermediateZgroups(testcase)
% Verify that zarrcreate creates zarr groups when given a
% nested path
Expand Down Expand Up @@ -36,7 +60,7 @@ function createArrayRelativePath(testcase)
inpPath = fullfile('..','myGrp','myArr');
zarrcreate(inpPath,[10 10]);
arrInfo = zarrinfo(inpPath);
testcase.verifyEqual(arrInfo.zarr_format,2,'Failed to Zarr array format');
testcase.verifyEqual(arrInfo.zarr_format,2,'Failed to verify Zarr array format');
testcase.verifyEqual(arrInfo.node_type,'array','Unexpected Zarr array node type');
end

Expand Down Expand Up @@ -172,29 +196,50 @@ function invalidShuffleBlosc(testcase)

function invalidChunkSize(testcase)
% Verify error when an invalid type for the chunk size is used.
testcase.assumeTrue(false,'Filtered until issue 25 is fixed.');

testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
'ChunkSize',5),testcase.PyException);
'ChunkSize',5),'MATLAB:zarrcreate:chunkDimsMismatch');
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
'ChunkSize',[]),testcase.PyException);
'ChunkSize',[]),'MATLAB:zarrcreate:chunkDimsMismatch');
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
'ChunkSize',[0 0]),testcase.PyException);
'ChunkSize',[0 0]),'MATLAB:validators:mustBePositive');
end

function invalidFillValue(testcase)
% Verify error when an invalid type for the fill value is used.

testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
"FillValue",[-9 -9]),testcase.PyException);
% testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
% "FillValue",NaN),testcase.PyException);
% testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
% "FillValue",inf),testcase.PyException);
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
"FillValue","none"),'MATLAB:validators:mustBeNumericOrLogical');
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize,...
Datatype="int8", FillValue=1.4), 'MATLAB:zarrcreate:invalidFillValueType')
end

function specialFillValue(testcase)
% Verify creating Zarr arrays using special fill values like
% NaN and Inf

expData = [NaN,NaN];
zarrcreate(testcase.ArrPathWrite, [1,2], FillValue=NaN)
actData = zarrread(testcase.ArrPathWrite);
testcase.verifyEqual(expData, actData)

expData = [Inf,Inf];
zarrcreate(testcase.ArrPathWrite, [1,2], FillValue=Inf)
actData = zarrread(testcase.ArrPathWrite);
testcase.verifyEqual(expData, actData)
end

function invalidSizeInput(testcase)
% Verify error when an invalid size input is used.
% testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,[]), ...
% testcase.PyException);

testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,[]), ...
'MATLAB:validators:mustBeNonempty');
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,Inf), ...
'MATLAB:validators:mustBeFinite');
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,-2), ...
'MATLAB:validators:mustBePositive');
end

function invalidDatatype(testcase)
Expand Down
2 changes: 1 addition & 1 deletion test/tZarrWrite.m
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function createArrayLocalUserDefinedSyntax(testcase,DataType,CompId)
% Verify the data when creating and writing to arrays with
% user-defined properties using zarrcreate and zarrwrite locally.
comp.level = 5;
fillValue = -9;
fillValue = cast(-9, DataType);
expData = cast(ones(testcase.ArrSize),DataType);
comp.id = CompId;
zarrcreate(testcase.ArrPathWrite,testcase.ArrSize,'ChunkSize',testcase.ChunkSize, ...
Expand Down
13 changes: 6 additions & 7 deletions zarrcreate.m
Original file line number Diff line number Diff line change
Expand Up @@ -81,30 +81,29 @@ function zarrcreate(filepath, datasize, options)

arguments
filepath {mustBeTextScalar, mustBeNonempty}
datasize (1,:) double {mustBeFinite, mustBeNonnegative}
options.ChunkSize (1,:) double {mustBeFinite, mustBeNonnegative} = datasize
datasize (1,:) double {mustBeFinite, mustBePositive, mustBeNonempty}
options.ChunkSize (1,:) double {mustBeFinite, mustBePositive} = datasize
options.Datatype {mustBeTextScalar, mustBeNonempty} = 'double'
options.FillValue {mustBeNumeric} = []
options.FillValue {mustBeNumericOrLogical} = []
options.Compression {mustBeStructOrEmpty} = []
end

zarrObj = Zarr(filepath);

% Dimensionality of the dataset and the chunk size must be the same
if any(size(datasize) ~= size(options.ChunkSize))
error("MATLAB:zarrcreate:chunkDimsMismatch",...
"Invalid chunk size. Chunk size must have the same number of dimensions as data size.");
"Invalid chunk size. Chunk size must have the same number of dimensions as Zarr array size.");
end

if any(options.ChunkSize > datasize)
error("MATLAB:zarrcreate:chunkSizeGreater",...
"Invalid chunk size. Each entry of ChunkSize must be less than or equal to the corresponding entry of datasize.");
"Invalid chunk size. Each entry of ChunkSize must be less than or equal to the corresponding entry of Zarr array size.");
end
if isscalar(datasize)
datasize = [1 datasize];
options.ChunkSize = [1 options.ChunkSize];
end

zarrObj = Zarr(filepath);
zarrObj.create(options.Datatype, datasize, options.ChunkSize, options.FillValue, options.Compression)

end
Expand Down