Skip to content

Conversation

@ericpre
Copy link
Member

@ericpre ericpre commented Jun 28, 2025

Attempt to fix the following example:

import hyperspy.api as hs
import numpy as np

s = hs.signals.Signal1D(np.arange(100))
s.save("test.rpl", overwrite=True)

s2 = hs.load("test.rpl", lazy=True, chunks=(50,))
s2.compute(close_file=True)

which gives the error:

Error traceback
     11 s = hs.signals.Signal1D(np.arange(100))
     12 s.save("test.rpl", overwrite=True)
---> 14 s2 = hs.load("test.rpl", lazy=True, chunks=(50,))
     15 s2.compute(close_file=True)
     18 # from rsciio.utils.distributed import get_chunk_slice
     19 
     20 # slices, chunks = get_chunk_slice(shape=(100, ), chunks=(50,), dtype=int)

File ~\Dev\hyperspy\hyperspy\io.py:550, in load(filenames, signal_type, stack, stack_axis, new_axis_name, lazy, convert_units, escape_square_brackets, stack_metadata, load_original_metadata, show_progressbar, **kwds)
    546         objects.append(signal)
    547 else:
    548     # No stack, so simply we load all signals in all files separately
    549     objects = [
--> 550         load_single_file(filename, lazy=lazy, **kwds) for filename in filenames
    551     ]
    553 if len(objects) == 1:
    554     objects = objects[0]

File ~\Dev\hyperspy\hyperspy\io.py:609, in load_single_file(filename, **kwds)
    603     raise ValueError(
    604         "`reader` should be one of None, str, or a custom file reader object"
    605     )
    607 try:
    608     # Try and load the file
--> 609     return load_with_reader(filename=filename, reader=reader, **kwds)
    611 except BaseException:
    612     _logger.error(
    613         "If this file format is supported, please "
    614         "report this error to the RosettaSciIO developers at "
    615         "https://github.com/hyperspy/rosettasciio/issues"
    616     )

File ~\Dev\hyperspy\hyperspy\io.py:631, in load_with_reader(filename, reader, signal_type, convert_units, load_original_metadata, **kwds)
    629 lazy = kwds.get("lazy", False)
    630 if isinstance(reader, dict):
--> 631     file_data_list = importlib.import_module(reader["api"]).file_reader(
    632         filename, **kwds
    633     )
    634 else:
    635     # We assume it is a module
    636     file_data_list = reader.file_reader(filename, **kwds)

File ~\Dev\rosettasciio\rsciio\utils\_deprecated.py:154, in deprecated_argument.__call__.<locals>.wrapped(*args, **kwargs)
    147     func_code = func.__code__
    148     warnings.warn_explicit(
    149         message=msg,
    150         category=VisibleDeprecationWarning,
    151         filename=func_code.co_filename,
    152         lineno=func_code.co_firstlineno + 1,
    153     )
--> 154 return func(*args, **kwargs)

File ~\Dev\rosettasciio\rsciio\utils\_deprecated.py:154, in deprecated_argument.__call__.<locals>.wrapped(*args, **kwargs)
    147     func_code = func.__code__
    148     warnings.warn_explicit(
    149         message=msg,
    150         category=VisibleDeprecationWarning,
    151         filename=func_code.co_filename,
    152         lineno=func_code.co_firstlineno + 1,
    153     )
--> 154 return func(*args, **kwargs)

File ~\Dev\rosettasciio\rsciio\ripple\_api.py:308, in file_reader(filename, lazy, rpl_info, encoding, chunks)
    305 if not rawfname:
    306     raise IOError(f'RAW file "{rawfname}" does not exists')
--> 308 data = read_raw(rpl_info, rawfname, chunks=chunks)
    310 if not lazy:
    311     data = data.compute()

File ~\Dev\rosettasciio\rsciio\utils\_deprecated.py:154, in deprecated_argument.__call__.<locals>.wrapped(*args, **kwargs)
    147     func_code = func.__code__
    148     warnings.warn_explicit(
    149         message=msg,
    150         category=VisibleDeprecationWarning,
    151         filename=func_code.co_filename,
    152         lineno=func_code.co_firstlineno + 1,
    153     )
--> 154 return func(*args, **kwargs)

File ~\Dev\rosettasciio\rsciio\utils\_deprecated.py:154, in deprecated_argument.__call__.<locals>.wrapped(*args, **kwargs)
    147     func_code = func.__code__
    148     warnings.warn_explicit(
    149         message=msg,
    150         category=VisibleDeprecationWarning,
    151         filename=func_code.co_filename,
    152         lineno=func_code.co_firstlineno + 1,
    153     )
--> 154 return func(*args, **kwargs)

File ~\Dev\rosettasciio\rsciio\ripple\_api.py:253, in read_raw(rpl_info, filename, chunks)
    250 elif record_by == "dont-care":  # stack of images
    251     shape = (height, width)
--> 253 data = memmap_distributed(
    254     filename,
    255     offset=offset,
    256     shape=shape,
    257     dtype=data_type,
    258     chunks=chunks,
    259 )
    261 return data.squeeze()

File ~\Dev\rosettasciio\rsciio\utils\distributed.py:266, in memmap_distributed(filename, dtype, positions, offset, shape, order, chunks, block_size_limit, key)
    263     shape = (len(positions),) + shape[-2:]  # update the shape to be linear
    264 else:
    265     # Separates slices into appropriately sized chunks.
--> 266     chunked_slices, data_chunks = get_chunk_slice(
    267         shape=shape + sub_array_shape,
    268         chunks=chunks,
    269         block_size_limit=block_size_limit,
    270         dtype=array_dtype,
    271     )
    272     drop_axes = (
    273         num_dim,
    274         num_dim + 1,
    275     )  # Dask 2021.10.0 minimum to use negative indexing
    276     use_positions = False

File ~\Dev\rosettasciio\rsciio\utils\distributed.py:60, in get_chunk_slice(shape, chunks, block_size_limit, dtype)
     25 def get_chunk_slice(
     26     shape,
     27     chunks="auto",
     28     block_size_limit=None,
     29     dtype=None,
     30 ):
     31     """
     32     Get chunk slices for the :func:`rsciio.utils.distributed.slice_memmap` function.
     33 
   (...)
     57         Tuple of the chunks.
     58     """
---> 60     chunks = da.core.normalize_chunks(
     61         chunks=chunks, shape=shape, limit=block_size_limit, dtype=dtype
     62     )
     63     chunks_shape = tuple([len(c) for c in chunks])
     64     slices = np.empty(
     65         shape=chunks_shape + (len(chunks_shape), 2),
     66         dtype=int,
     67     )

File ~\miniforge3\Lib\site-packages\dask\array\core.py:3184, in normalize_chunks(chunks, shape, limit, dtype, previous_chunks)
   3181     chunks = (chunks,)
   3183 if shape and len(chunks) != len(shape):
-> 3184     raise ValueError(
   3185         "Chunks and shape must be of the same length/dimension. "
   3186         "Got chunks=%s, shape=%s" % (chunks, shape)
   3187     )
   3188 if -1 in chunks or None in chunks:
   3189     chunks = tuple(s if c == -1 or c is None else c for c, s in zip(chunks, shape))

ValueError: Chunks and shape must be of the same length/dimension. Got chunks=(50,), shape=(1, 1, 100)

After fixing the shape (this PR), there is a different error, which I have seen before and I was actually trying to produce in the context of #418:

Error traceback
     12 s.save("test.rpl", overwrite=True)
     14 s2 = hs.load("test.rpl", lazy=True, chunks=(50,))
---> 15 s2.compute(close_file=True)
     18 # from rsciio.utils.distributed import get_chunk_slice
     19 
     20 # slices, chunks = get_chunk_slice(shape=(100, ), chunks=(50,), dtype=int)

File ~\Dev\hyperspy\hyperspy\_signals\lazy.py:244, in LazySignal.compute(self, close_file, show_progressbar, **kwargs)
    196 def compute(self, close_file=False, show_progressbar=None, **kwargs):
    197     """
    198     Attempt to store the full signal in memory.
    199 
   (...)
    242 
    243     """
--> 244     self.data = _compute(self.data, show_progressbar=show_progressbar, **kwargs)
    245     if close_file:
    246         self.close_file()

File ~\Dev\hyperspy\hyperspy\misc\utils.py:1463, in _compute(array, store_to, show_progressbar, **kwargs)
   1459     da.store(
   1460         array, store_to, dtype=array.dtype, compute=True, lock=False, **kwargs
   1461     )
   1462 else:
-> 1463     return array.compute(**kwargs)

File ~\miniforge3\Lib\site-packages\dask\base.py:373, in DaskMethodsMixin.compute(self, **kwargs)
    349 def compute(self, **kwargs):
    350     """Compute this dask collection
    351 
    352     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    371     dask.compute
    372     """
--> 373     (result,) = compute(self, traverse=False, **kwargs)
    374     return result

File ~\miniforge3\Lib\site-packages\dask\base.py:681, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    678     expr = expr.optimize()
    679     keys = list(flatten(expr.__dask_keys__()))
--> 681     results = schedule(expr, keys, **kwargs)
    683 return repack(results)

File ~\Dev\rosettasciio\rsciio\utils\distributed.py:177, in slice_memmap(slices, file, dtypes, shape, key, positions, **kwargs)
    175         return data[slices_]
    176 else:
--> 177     slices_ = tuple([slice(s[0], s[1]) for s in slices_])
    178     return data[slices_]

IndexError: invalid index to scalar variable.

@CSSFrancis, it looks to me that in the case of array with a single dimension, there is an empty slice that cause the error. Do you have an idea what is a good fix for it?

Progress of the PR

  • Change implemented (can be split into several points),
  • update docstring (if appropriate),
  • update user guide (if appropriate),
  • add a changelog entry in the upcoming_changes folder (see upcoming_changes/README.rst),
  • Check formatting of the changelog entry (and eventual user guide changes) in the docs/readthedocs.org:rosettasciio build of this PR (link in github checks)
  • add tests,
  • ready for review.

@codecov
Copy link

codecov bot commented Jun 28, 2025

Codecov Report

✅ All modified and coverable lines are covered by tests.
✅ Project coverage is 87.90%. Comparing base (a647f2e) to head (63b6d3f).
⚠️ Report is 58 commits behind head on main.

Additional details and impacted files
@@           Coverage Diff           @@
##             main     #419   +/-   ##
=======================================
  Coverage   87.90%   87.90%           
=======================================
  Files          89       89           
  Lines       11464    11465    +1     
  Branches     2116     2116           
=======================================
+ Hits        10077    10078    +1     
  Misses        878      878           
  Partials      509      509           

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

@CSSFrancis
Copy link
Member

@ericpre hmmm sorry it's taken me a bit to get around to this. I've been sick the last couple of weeks.

I'd have to step through the code to see where this goes wrong. It's probably just something with get_chunk_slice I don't know if I fully considered the 1D case.

@ericpre ericpre added this to the v0.11 milestone Jul 26, 2025
@ericpre ericpre added the type: bug Something isn't working label Jul 26, 2025
@ericpre ericpre modified the milestones: v0.11, v0.12 Oct 12, 2025
@ericpre
Copy link
Member Author

ericpre commented Nov 1, 2025

@CSSFrancis, any chance, you could help with this PR? Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

type: bug Something isn't working

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants