From 8a63c4ef471a642741780989183e8c684edd8420 Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Thu, 12 Jun 2025 11:55:27 -0400 Subject: [PATCH 1/3] Tests and edge-cases clean-up --- Zarr.m | 84 +++++++++++++++++++++++------------------- test/tZarrAttributes.m | 73 +++++++++++++++++++++--------------- test/tZarrCreate.m | 67 +++++++++++++++++++++++++++------ test/tZarrWrite.m | 2 +- zarrcreate.m | 9 ++--- 5 files changed, 150 insertions(+), 85 deletions(-) diff --git a/Zarr.m b/Zarr.m index e8acc5a..e430402 100644 --- a/Zarr.m +++ b/Zarr.m @@ -171,6 +171,43 @@ function makeZarrGroups(existingParentPath, newGroupsPath) end end + + function [bucketName, objectPath] = extractS3BucketNameAndPath(url) + % Helper function to extract S3 bucket name and path to file + % bucketName and objectPath are needed to fill the KVstore hash + % map for tensorstore. + % Define the regular expression patterns for matching S3 URLs and URIs + % S3 URLs can have the following patterns. + patterns = { ... + '^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$', ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile) + '^https://([^.]+)\.s3\.amazonaws\.com/(.+)$', ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile) + '^https://([^.]+)\.s3\.[^/]+/(.+)$', ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile) + '^https://s3\.amazonaws\.com/([^/]+)/(.+)$', ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile) + '^https://s3\.[^/]+/([^/]+)/(.+)$', ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile) + '^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile) + }; + + % For each pattern, specify which group is bucket and which is path + % regexp will extract multiple tokens from the patterns above. + % For each pattern, the indices below denote the location of + % the bucket and the path name. + bucketIdx = [1, 1, 1, 1, 1, 1]; + pathIdx = [3, 2, 2, 2, 2, 2]; + + % Iterate through the patterns and identify the pattern which matches the + % URI. Extract the bucket name and the path. + for patternIdx = 1:numel(patterns) + tokens = regexp(url, patterns{patternIdx}, 'tokens'); + if ~isempty(tokens) + t = tokens{1}; + bucketName = t{bucketIdx(patternIdx)}; + objectPath = t{pathIdx(patternIdx)}; + return; + end + end + + error("MATLAB:Zarr:invalidS3URL","Invalid S3 URI format."); + end end methods @@ -183,7 +220,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath) obj.isRemote = matlab.io.internal.vfs.validators.hasIriPrefix(obj.Path); if obj.isRemote % Remote file (only S3 support at the moment) % Extract the S3 bucket name and path - [bucketName, objectPath] = obj.extractS3BucketNameAndPath(obj.Path); + [bucketName, objectPath] = Zarr.extractS3BucketNameAndPath(obj.Path); % Create a Python dictionary for the KV store driver obj.KVStoreSchema = py.ZarrPy.createKVStore(obj.isRemote, objectPath, bucketName); @@ -241,7 +278,13 @@ function create(obj, dtype, data_size, chunk_size, fillvalue, compression) if isempty(fillvalue) obj.FillValue = py.None; else - obj.FillValue = cast(fillvalue, obj.Datatype.MATLABType); + % Fill value must be of the same datatype as data. + if ~isa(fillvalue, dtype) + error("MATLAB:zarrcreate:invalidFillValueType",... + "FillValue must be of the same datatype as data (""%s"").",... + dtype) + end + obj.FillValue = fillvalue; end % see how much of the provided path exists already @@ -334,42 +377,7 @@ function write(obj, data) end end - function [bucketName, objectPath] = extractS3BucketNameAndPath(~,url) - % Helper function to extract S3 bucket name and path to file - % bucketName and objectPath are needed to fill the KVstore hash - % map for tensorstore. - % Define the regular expression patterns for matching S3 URLs and URIs - % S3 URLs can have the following patterns. - patterns = { ... - '^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$', ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile) - '^https://([^.]+)\.s3\.amazonaws\.com/(.+)$', ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile) - '^https://([^.]+)\.s3\.[^/]+/(.+)$', ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile) - '^https://s3\.amazonaws\.com/([^/]+)/(.+)$', ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile) - '^https://s3\.[^/]+/([^/]+)/(.+)$', ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile) - '^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile) - }; - - % For each pattern, specify which group is bucket and which is path - % regexp will extract multiple tokens from the patterns above. - % For each pattern, the indices below denote the location of - % the bucket and the path name. - bucketIdx = [1, 1, 1, 1, 1, 1]; - pathIdx = [3, 2, 2, 2, 2, 2]; - - % Iterate through the patterns and identify the pattern which matches the - % URI. Extract the bucket name and the path. - for patternIdx = 1:numel(patterns) - tokens = regexp(url, patterns{patternIdx}, 'tokens'); - if ~isempty(tokens) - t = tokens{1}; - bucketName = t{bucketIdx(patternIdx)}; - objectPath = t{pathIdx(patternIdx)}; - return; - end - end - - error("MATLAB:Zarr:invalidS3URL","Invalid S3 URI format."); - end + end end \ No newline at end of file diff --git a/test/tZarrAttributes.m b/test/tZarrAttributes.m index b5d4a5a..7a28c61 100644 --- a/test/tZarrAttributes.m +++ b/test/tZarrAttributes.m @@ -7,49 +7,62 @@ function createZarrArrayWithAttrs(testcase) % Create Zarr array and add some attributes. zarrcreate(testcase.ArrPathWrite,testcase.ArrSize); - zarrwriteatt(testcase.ArrPathWrite,'attr1','This is an array attribute.'); - zarrwriteatt(testcase.ArrPathWrite,'attr2',{1,2,3}); - attr3.numVal = 10; - attr3.strArr = ["array","attribute"]; - zarrwriteatt(testcase.ArrPathWrite,'attr3',attr3); + zarrwriteatt(testcase.ArrPathWrite,'scalarText','This is an array attribute.'); + zarrwriteatt(testcase.ArrPathWrite,'numericVector',[1,2,3]); + zarrwriteatt(testcase.ArrPathWrite,'numericCellArray',{1,2,3}); + zarrwriteatt(testcase.ArrPathWrite,'mixedCellArray',{1,'two',3}); + attrStruct.numVal = 10; + attrStruct.strArr = ["array","attribute"]; + zarrwriteatt(testcase.ArrPathWrite,'struct',attrStruct); end end methods(Test) function verifyArrayAttributeInfo(testcase) - % Write attribute info using zarrwriteatt function to an array. - - arrInfo = zarrinfo(testcase.ArrPathWrite); - actAttr.attr1 = arrInfo.attr1; - - % TODO: Enable code once Issue-34 is fixed. - %actAttr.attr2 = arrInfo.attr2; - %actAttr.attr3 = arrInfo.attr3; - - expAttr.attr1 = 'This is an array attribute.'; - %expAttr.attr2 = {1,2,3}; - %expAttr.attr3.numVal = 10; - %expAttr.attr4.strArr = ["array","attribute"]; - - testcase.verifyEqual(actAttr,expAttr,'Failed to verify attribute info.'); + % Write attribute info using zarrwriteatt function to an array + % (during test setup) and verify written values using zarrinfo + + actInfo = zarrinfo(testcase.ArrPathWrite); + + testcase.verifyEqual(actInfo.scalarText,... + 'This is an array attribute.',... + 'Failed to verify attribute info for scalar text.'); + testcase.verifyEqual(actInfo.numericVector,... + [1;2;3],... % JSON stores all vectors as column vectors + 'Failed to verify attribute info for numeric vector.'); + testcase.verifyEqual(actInfo.numericCellArray,... + [1;2;3],... % JSON stores numeric cell array as column vector + 'Failed to verify attribute info for numeric cell array.'); + testcase.verifyEqual(actInfo.mixedCellArray,... + {1; 'two'; 3},...% JSON stores all vectors as column vectors + 'Failed to verify attribute info for mixed cell array.'); + + expStruct.numVal = 10; + % JSON stores string arrays as column cell arrays of char + % vectors + expStruct.strArr = {'array';'attribute'}; + testcase.verifyEqual(actInfo.struct,... + expStruct,... + 'Failed to verify attribute info for struct.'); end function verifyAttrOverwrite(testcase) % Verify attribute value after overwrite. - %testcase.assumeTrue(false,'Filtered until the attributes display is fixed.'); - expAttrStr = ["new","attribute","value"]; - zarrwriteatt(testcase.ArrPathWrite,'attr1',expAttrStr); + + expAttrStr = 'New attribute value'; + zarrwriteatt(testcase.ArrPathWrite,'scalarText',expAttrStr); expAttrDbl = 10; - zarrwriteatt(testcase.ArrPathWrite,'attr2',expAttrDbl); + zarrwriteatt(testcase.ArrPathWrite,'numericVector',expAttrDbl); arrInfo = zarrinfo(testcase.ArrPathWrite); - - % TODO: Enable code once Issue-34 is fixed. - %actAttrStr = arrInfo.attr1; - actAttrDbl = arrInfo.attr2; - %testcase.verifyEqual(actAttrStr,expAttrStr,'Failed to verify string attribute info'); - testcase.verifyEqual(actAttrDbl,expAttrDbl,'Failed to verify double attribute info'); + actAttrStr = arrInfo.scalarText; + actAttrDbl = arrInfo.numericVector; + + testcase.verifyEqual(actAttrStr,expAttrStr,... + 'Failed to verify string attribute info'); + testcase.verifyEqual(actAttrDbl,expAttrDbl,... + 'Failed to verify double attribute info'); end function verifyGroupAttributeInfo(testcase) diff --git a/test/tZarrCreate.m b/test/tZarrCreate.m index 545ad67..7b4878b 100644 --- a/test/tZarrCreate.m +++ b/test/tZarrCreate.m @@ -4,6 +4,30 @@ % Copyright 2025 The MathWorks, Inc. methods(Test) + + function createDefaultArray(testcase) + % Verify that zarrcreate correctly creates a Zarr array with + % all default properties + + zarrcreate(testcase.ArrPathWrite,testcase.ArrSize); + + expInfo.chunks = testcase.ArrSize'; + expInfo.compressor = []; + expInfo.dimension_separator = '.'; + expInfo.dtype = ' Date: Thu, 12 Jun 2025 12:15:44 -0400 Subject: [PATCH 2/3] Fix error ID for invalidFillValue test --- test/tZarrCreate.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tZarrCreate.m b/test/tZarrCreate.m index 7b4878b..8361aef 100644 --- a/test/tZarrCreate.m +++ b/test/tZarrCreate.m @@ -211,7 +211,7 @@ function invalidFillValue(testcase) testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ... "FillValue",[-9 -9]),testcase.PyException); testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ... - "FillValue","none"),'MATLAB:validators:mustBeNumeric'); + "FillValue","none"),'MATLAB:validators:mustBeNumericOrLogical'); testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize,... Datatype="int8", FillValue=1.4), 'MATLAB:zarrcreate:invalidFillValueType') end From 61deac3a99e2739e4e4e22c9ac0322ec19e60753 Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Fri, 20 Jun 2025 10:59:26 -0400 Subject: [PATCH 3/3] Error messages updates --- Zarr.m | 2 +- zarrcreate.m | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Zarr.m b/Zarr.m index e430402..7c6fa9a 100644 --- a/Zarr.m +++ b/Zarr.m @@ -281,7 +281,7 @@ function create(obj, dtype, data_size, chunk_size, fillvalue, compression) % Fill value must be of the same datatype as data. if ~isa(fillvalue, dtype) error("MATLAB:zarrcreate:invalidFillValueType",... - "FillValue must be of the same datatype as data (""%s"").",... + "Fill value must have the same data type (""%s"") as the Zarr array.",... dtype) end obj.FillValue = fillvalue; diff --git a/zarrcreate.m b/zarrcreate.m index 651f2e0..01ceff2 100644 --- a/zarrcreate.m +++ b/zarrcreate.m @@ -91,12 +91,12 @@ function zarrcreate(filepath, datasize, options) % Dimensionality of the dataset and the chunk size must be the same if any(size(datasize) ~= size(options.ChunkSize)) error("MATLAB:zarrcreate:chunkDimsMismatch",... - "Invalid chunk size. Chunk size must have the same number of dimensions as data size."); + "Invalid chunk size. Chunk size must have the same number of dimensions as Zarr array size."); end if any(options.ChunkSize > datasize) error("MATLAB:zarrcreate:chunkSizeGreater",... - "Invalid chunk size. Each entry of ChunkSize must be less than or equal to the corresponding entry of datasize."); + "Invalid chunk size. Each entry of ChunkSize must be less than or equal to the corresponding entry of Zarr array size."); end if isscalar(datasize) datasize = [1 datasize];