Skip to content

[Bug]: Cannot read remote files #228

Open
@mavaylon1

Description

@mavaylon1

What happened?

class TestFSSpecStreaming(unittest.TestCase):

    def setUp(self):
        # PLACEHOLDER test file from Allen Institute for Neural Dynamics
        # TODO: store a small test file and use it to speed up testing
        self.s3_aind_path = (
            "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/"
            "ecephys_625749_2022-08-03_15-15-06_nwb/"
            "ecephys_625749_2022-08-03_15-15-06_experiment1_recording1.nwb.zarr/"
        )
        # DANDISET: 000719/icephys_9_27_2024
        self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/7515c603-9940-4598-aa1b-8bf32dc9b10c/"

the https_s3_path file can be opened but not read.

Steps to Reproduce

Try to read the file.

Traceback

______________________________________________________ TestFSSpecStreaming.test_s3_open_with_consolidated_ _______________________________________________________

self = <tests.unit.test_fsspec_streaming.TestFSSpecStreaming testMethod=test_s3_open_with_consolidated_>

    @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed")
    def test_s3_open_with_consolidated_(self):
        """
        The file is a Zarr file with consolidated metadata.
        """
        with NWBZarrIO(self.https_s3_path, mode='r') as read_io:
            read_io.open()
            self.assertIsInstance(read_io.file.store, zarr.storage.ConsolidatedMetadataStore)
        with NWBZarrIO(self.https_s3_path, mode='-r') as read_io:
            read_io.open()
            breakpoint()
>           read_io.read()

tests/unit/test_fsspec_streaming.py:46: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../hdmf/src/hdmf/utils.py:668: in func_call
    return func(args[0], **pargs)
../hdmf/src/hdmf/backends/io.py:60: in read
    container = self.__manager.construct(f_builder)
../hdmf/src/hdmf/utils.py:668: in func_call
    return func(args[0], **pargs)
../hdmf/src/hdmf/build/manager.py:286: in construct
    result = self.__type_map.construct(builder, self, None)
../hdmf/src/hdmf/utils.py:668: in func_call
    return func(args[0], **pargs)
../hdmf/src/hdmf/build/manager.py:827: in construct
    return obj_mapper.construct(builder, build_manager, parent)
../hdmf/src/hdmf/utils.py:668: in func_call
    return func(args[0], **pargs)
../hdmf/src/hdmf/build/objectmapper.py:1357: in construct
    override = self.__get_override_carg(argname, builder, manager)
../hdmf/src/hdmf/build/objectmapper.py:536: in __get_override_carg
    return func(self, *remaining_args)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <pynwb.io.file.NWBFileMap object at 0x12b385190>
builder = root GroupBuilder {'attributes': {'.specloc': 'specifications', 'namespace': 'core', 'neurodata_type': 'NWBFile', 'nwb_version': '2.3.0', 'object_id': 'ef216af1-2b6e-45d5-98ce-687394b5a132'}, 'groups': {}, 'datasets': {}, 'links': {}}
manager = <hdmf.build.manager.BuildManager object at 0x12baff440>

    @ObjectMapper.constructor_arg('session_start_time')
    def dateconversion(self, builder, manager):
        """Set the constructor arg for 'session_start_time' to a datetime object.
    
        Used when constructing the NWBFile container from a written file.
    
        Dates are read into builders as strings and are parsed into datetime objects
        for user convenience and consistency with how they are written.
        """
>       datestr = builder.get('session_start_time').data
E       AttributeError: 'NoneType' object has no attribute 'data'

../pynwb/src/pynwb/io/file.py:192: AttributeError
======================================================================== warnings summary ========================================================================
../../../anaconda3/envs/nwb_clean/lib/python3.12/site-packages/dateutil/tz/tz.py:37
  /Users/mavaylon/anaconda3/envs/nwb_clean/lib/python3.12/site-packages/dateutil/tz/tz.py:37: DeprecationWarning: datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.fromtimestamp(timestamp, datetime.UTC).
    EPOCH = datetime.datetime.utcfromtimestamp(0)

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
==================================================================== short test summary info =====================================================================
FAILED tests/unit/test_fsspec_streaming.py::TestFSSpecStreaming::test_s3_open_with_consolidated_ - AttributeError: 'NoneType' object has no attribute 'data'
========================================================== 1 failed, 2 deselected, 1 warning in 17.81s ===========================================================
(nwb

Operating System

Windows

Python Executable

Conda

Python Version

3.8

Package Versions

No response

Code of Conduct

Metadata

Metadata

Assignees

Labels

category: bugerrors in the code or code behaviorpriority: mediumnon-critical problem and/or affecting only a small set of users

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions