Skip to content
5 changes: 5 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ jobs:
run: |
pip install ${{ matrix.DEPENDENCIES }}

- name: Install hyperspy dev (with file_handle)
if: ${{ ! contains(matrix.LABEL, 'without-hyperspy')}}
run: |
pip install git+https://github.com/ericpre/hyperspy.git@close_file_with_handle

- name: Pip list
run: |
pip list
Expand Down
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ The **dictionary** contains the following fields:
* ``'metadata'`` -- dictionary containing the parsed metadata
* ``'original_metadata'`` -- dictionary containing the full metadata tree from the
input file
* ``'file_handle'`` -- (compatible format only: :ref:`emd <emd-format>`, :ref:`hspy <hspy-format>`, :ref:`NeXus <nexus-format>`, :ref:`tiff <tiff-format>`, :ref:`USID <usid-format>`) the file handle when ``lazy`` is ``True`` otherwise ``None``

Interfacing the reader from one of the IO plugins:

Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ doc = [
"sphinx-favicon",
"sphinxcontrib-towncrier",
"sphinx-copybutton",
# unpin when sphinxcontrib-towncrier supports towncrier >=24
"towncrier<24",
"towncrier",
"sphinx_design",
]
all = [
Expand Down
1 change: 1 addition & 0 deletions rsciio/_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
containing the full axes vector
- 'metadata' – dictionary containing the parsed metadata
- 'original_metadata' – dictionary containing the full metadata tree from the input file
- 'file_handle' - (compatible format only) the file handle when ``lazy`` is ``True`` otherwise ``None``
When the file contains several datasets, each dataset will be loaded as separate dictionary.
"""
4 changes: 4 additions & 0 deletions rsciio/_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def group2signaldict(self, group, lazy=False):
if not isinstance(data, da.Array):
data = da.from_array(data, chunks=data.chunks)
exp["attributes"]["_lazy"] = True
exp["file_handle"] = self.file
else:
if isinstance(data, da.Array):
data = data.compute()
Expand Down Expand Up @@ -859,6 +860,9 @@ def write_signal(
learning_results = group.require_group("learning_results")
self.dict2group(signal["learning_results"], learning_results, **kwds)
attributes = group.require_group("attributes")
if "_file_handle" in signal["attributes"].keys():
# no need to save the file handle!
del signal["attributes"]["_file_handle"]
self.dict2group(signal["attributes"], attributes, **kwds)

if signal["models"]:
Expand Down
1 change: 1 addition & 0 deletions rsciio/emd/_emd_ncem.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def read_file(self, file, lazy=None, dataset_path=None, stack_group=None):
"axes": axes,
"metadata": md,
"original_metadata": om,
"file_handle": self.file if lazy else None,
}
self.dictionaries.append(d)

Expand Down
8 changes: 5 additions & 3 deletions rsciio/emd/_emd_velox.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ def __init__(
):
# TODO: Finish lazy implementation using the `FrameLocationTable`
# Parallelise streams reading
self.filename = filename
self.select_type = select_type
self.dictionaries = []
self.first_frame = first_frame
Expand All @@ -119,7 +118,7 @@ def __init__(
self._map_label_dict = {}

def read_file(self, f):
self.filename = f.filename
self.file = f
self.version = _parse_json(f["Version"][0])["version"]
_logger.info(f"EMD file version: {self.version}")
self.d_grp = f.get("Data")
Expand Down Expand Up @@ -238,6 +237,7 @@ def _read_spectrum(self, spectrum_group, spectrum_sub_group_key):
"metadata": md,
"original_metadata": original_metadata,
"mapping": self._get_mapping(),
"file_handle": self.file if self.lazy else None,
}

def _read_images(self):
Expand Down Expand Up @@ -406,6 +406,7 @@ def _read_image(self, image_group, image_sub_group_key):
"mapping": self._get_mapping(
map_selected_element=False, parse_individual_EDS_detector_metadata=False
),
"file_handle": self.file if self.lazy else None,
}

def _get_detector_name(self, key):
Expand Down Expand Up @@ -691,6 +692,7 @@ def _read_stream(key):
"mapping": self._get_mapping(
parse_individual_EDS_detector_metadata=not self.sum_frames
),
"file_handle": self.file if self.lazy else None,
}
)

Expand Down Expand Up @@ -723,7 +725,7 @@ def _convert_scale_units(self, value, units, factor=1):

def _get_metadata_dict(self, om):
meta_gen = {}
meta_gen["original_filename"] = os.path.split(self.filename)[1]
meta_gen["original_filename"] = os.path.split(self.file.filename)[1]
if self.detector_name is not None:
meta_gen["title"] = self.detector_name
# We have only one entry in the original_metadata, so we can't use
Expand Down
4 changes: 2 additions & 2 deletions rsciio/hspy/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
SIGNAL_DOC,
)
from rsciio._hierarchical import HierarchicalReader, HierarchicalWriter, version
from rsciio.utils.tools import dummy_context_manager, get_file_handle
from rsciio.utils.tools import dummy_context_manager

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -216,7 +216,7 @@ def file_writer(

f = None
if signal["attributes"]["_lazy"] and Path(filename).absolute() == original_path:
f = get_file_handle(signal["data"], warn=False)
f = signal["attributes"].get("_file_handle", None)
if f is not None and f.mode == "r":
# when the file is read only, force to reopen it in writing mode
raise OSError(
Expand Down
2 changes: 2 additions & 0 deletions rsciio/nexus/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ def file_reader(
dictionary["metadata"] = hyper_metadata
else:
dictionary["original_metadata"] = {}
dictionary["file_handle"] = fin if lazy else None

signal_dict_list.append(dictionary)

Expand All @@ -628,6 +629,7 @@ def file_reader(
}
}
datadict["metadata"].update(basic_metadata)
datadict["file_handle"] = fin if lazy else None
signal_dict_list.append(datadict)

return signal_dict_list
Expand Down
49 changes: 26 additions & 23 deletions rsciio/tests/test_emd_ncem.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
import tempfile
from pathlib import Path

import dask.array as da
import numpy as np
import pytest
from packaging.version import Version

h5py = pytest.importorskip("h5py", reason="h5py not installed")
hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed")

import hyperspy # noqa: E402

TEST_DATA_PATH = Path(__file__).parent / "data" / "emd"


Expand All @@ -52,11 +54,17 @@
test_title = "This is a test!"


@pytest.mark.parametrize("lazy", (True, False))
@pytest.mark.skipif(
Version(hyperspy.__version__) <= Version("2.3.0"),
reason="HyperSpy > 2.3.0 required.",
)
@pytest.mark.parametrize("lazy", (False, True))
def test_signal_3d_loading(lazy):
signal = hs.load(TEST_DATA_PATH / "example_signal.emd", lazy=lazy)
if lazy:
assert signal._file_handle
signal.compute(close_file=True)
assert not signal._file_handle
np.testing.assert_equal(signal.data, data_signal)
assert isinstance(signal, hs.signals.BaseSignal)

Expand Down Expand Up @@ -197,7 +205,7 @@ def test_load_file(tmp_path):
assert _s.metadata.General.title in path


@pytest.mark.parametrize("lazy", (True, False))
@pytest.mark.parametrize("lazy", (False, True))
def test_save_and_read(lazy, tmp_path):
signal_ref = hs.signals.BaseSignal(np.arange(24).reshape((2, 3, 4)))
signal_ref.metadata.General.title = test_title
Expand Down Expand Up @@ -245,25 +253,20 @@ def test_save_and_read(lazy, tmp_path):
assert isinstance(signal, hs.signals.BaseSignal)


def test_chunking_saving_lazy(tmp_path):
s = hs.signals.Signal2D(da.zeros((50, 100, 100))).as_lazy()
s.data = s.data.rechunk([50, 25, 25])
@pytest.mark.parametrize("save_kwargs", ({}, {"chunks": (50, 20, 20)}))
def test_chunking_saving_lazy(tmp_path, save_kwargs):
original_chunks = (50, 25, 25)
s = hs.signals.Signal2D(np.zeros((50, 100, 100))).as_lazy(chunks=original_chunks)
filename = tmp_path / "test_chunking_saving_lazy.emd"
filename2 = tmp_path / "test_chunking_saving_lazy_chunks_True.emd"
filename3 = tmp_path / "test_chunking_saving_lazy_chunks_specify.emd"
s.save(filename)
s.save(filename, **save_kwargs)

chunks = save_kwargs.get("chunks")
if isinstance(chunks, tuple):
# chunks passed as an argument
expected_chunks = chunks
else:
# current chunks is used for saving
expected_chunks = original_chunks

s1 = hs.load(filename, lazy=True)
assert s.data.chunks == s1.data.chunks

# with chunks=True, use h5py chunking
s.save(filename2, chunks=True)
s2 = hs.load(filename2, lazy=True)
assert tuple([c[0] for c in s2.data.chunks]) == (13, 25, 13)
s1.close_file()
s2.close_file()

# Specify chunks
chunks = (50, 20, 20)
s.save(filename3, chunks=chunks)
s3 = hs.load(filename3, lazy=True)
assert tuple([c[0] for c in s3.data.chunks]) == chunks
assert tuple([c[0] for c in s1.data.chunks]) == expected_chunks
4 changes: 2 additions & 2 deletions rsciio/tests/test_emd_prismatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_load_specific_datasets():
assert len(s) == 2


@pytest.mark.parametrize("lazy", (True, False))
@pytest.mark.parametrize("lazy", (False, True))
def test_3D_only(lazy):
filename = TEST_DATA_PATH / "Si100_3D.emd"
s = hs.load(filename, lazy=lazy)
Expand Down Expand Up @@ -195,7 +195,7 @@ def test_non_square_3D():
np.testing.assert_allclose(axis.offset, 0)


@pytest.mark.parametrize("lazy", (True, False))
@pytest.mark.parametrize("lazy", (False, True))
def test_4D(lazy):
filename = TEST_DATA_PATH / "Si100_4D.emd"
s = hs.load(filename, lazy=lazy)
Expand Down
17 changes: 12 additions & 5 deletions rsciio/tests/test_emd_velox.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
import numpy as np
import pytest
from dateutil import tz
from packaging.version import Version

from rsciio.utils.tests import assert_deep_almost_equal

hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed")
pytest.importorskip("sparse")

import hyperspy # noqa: E402

TEST_DATA_PATH = Path(__file__).parent / "data" / "emd"

Expand Down Expand Up @@ -154,6 +156,10 @@ def test_fei_emd_si(self, lazy):
np.testing.assert_equal(signal[1].data, fei_si)
assert isinstance(signal[1], hs.signals.Signal1D)

@pytest.mark.skipif(
Version(hyperspy.__version__) <= Version("2.3.0"),
reason="HyperSpy > 2.3.0 required.",
)
@pytest.mark.parametrize("lazy", (True, False))
def test_fei_emd_si_non_square_10frames(self, lazy):
s = hs.load(
Expand All @@ -163,7 +169,7 @@ def test_fei_emd_si_non_square_10frames(self, lazy):
signal = s[1]
if lazy:
assert signal._lazy
signal.compute(close_file=True)
signal.compute(close_file=False)
assert signal.metadata.Signal.signal_type == "EDS_TEM"
assert isinstance(signal, hs.signals.Signal1D)
assert signal.axes_manager[0].name == "x"
Expand All @@ -182,7 +188,7 @@ def test_fei_emd_si_non_square_10frames(self, lazy):
signal0 = s[0]
if lazy:
assert signal0._lazy
signal0.compute(close_file=True)
signal0.compute(close_file=False)
assert isinstance(signal0, hs.signals.Signal2D)
assert signal0.axes_manager[0].name == "x"
assert signal0.axes_manager[0].size == 10
Expand All @@ -200,7 +206,7 @@ def test_fei_emd_si_non_square_10frames(self, lazy):
signal = s[1]
if lazy:
assert signal._lazy
signal.compute(close_file=True)
signal.compute(close_file=False)
assert signal.metadata.Signal.signal_type == "EDS_TEM"
assert isinstance(signal, hs.signals.Signal1D)
assert signal.axes_manager[0].name == "x"
Expand All @@ -219,7 +225,7 @@ def test_fei_emd_si_non_square_10frames(self, lazy):
signal0 = s[0]
if lazy:
assert signal0._lazy
signal0.compute(close_file=True)
signal0.compute(close_file=False)
assert isinstance(signal0, hs.signals.Signal2D)
assert signal0.axes_manager[0].name == "Time"
assert signal0.axes_manager[0].size == 10
Expand All @@ -242,7 +248,7 @@ def test_fei_emd_si_non_square_10frames(self, lazy):
signal = s[1]
if lazy:
assert signal._lazy
signal.compute(close_file=True)
signal.compute(close_file=False)
assert signal.metadata.Signal.signal_type == "EDS_TEM"
assert isinstance(signal, hs.signals.Signal1D)
assert signal.axes_manager.navigation_shape == (10, 50, 10)
Expand Down Expand Up @@ -307,6 +313,7 @@ def test_fei_emd_si_non_square_10frames(self, lazy):
if lazy:
assert signal._lazy
signal.compute(close_file=True)
assert not signal._file_handle
assert signal.metadata.Signal.signal_type == "EDS_TEM"
assert isinstance(signal, hs.signals.Signal1D)
assert signal.axes_manager.navigation_shape == (10, 50, 6)
Expand Down
Loading
Loading