From 5788385b6717142fb94f068e546606ec3efda957 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 3 Jul 2025 10:07:14 -0700 Subject: [PATCH 01/21] add zarrs and organize dependency groups --- pyproject.toml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e3660fe0..60185c60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,12 @@ dependencies = [ [project.optional-dependencies] # note that dev dependencies are only pinned to major versions +segmentation = ["cellpose"] +sharding = ["zarrs"] +build = ["build", "twine"] +all = ["biahub[segmentation,sharding,build]"] dev = [ + "biahub[all]", "black==25.1", "flake8==7.2", "isort==6.0", @@ -54,12 +59,6 @@ dev = [ "pre-commit~=4.2", ] -segmentation = [ - "cellpose", -] - -build = ["build", "twine"] - [project.scripts] biahub = "biahub.cli.main:cli" From 838e46be2dcc51e64af2fbeb0dc2f4f303575487 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 3 Jul 2025 10:07:49 -0700 Subject: [PATCH 02/21] configurable sharding --- biahub/concatenate.py | 14 ++++++++++++++ biahub/settings.py | 2 ++ 2 files changed, 16 insertions(+) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index 0306fdef..62363f0b 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -1,5 +1,6 @@ import glob import os +import warnings from pathlib import Path @@ -332,10 +333,23 @@ def concatenate( else: chunk_size = settings.chunks_czyx + if settings.shards_ratio is not None: + try: + import zarr + import zarrs # noqa: F401 + + zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) + except ImportError: + warnings.warning( + "zarrs is not installed. Writing sharded array will be very slow." + ) + # Logic for creation of zarr and metadata output_metadata = { "shape": (len(input_time_indices), len(all_channel_names)) + tuple(cropped_shape_zyx), "chunks": chunk_size, + "shards_ratio": settings.shards_ratio, + "version": settings.output_ome_zarr_version, "scale": (1,) * 2 + tuple(output_voxel_size), "channel_names": all_channel_names, "dtype": dtype, diff --git a/biahub/settings.py b/biahub/settings.py index b0f8b143..680dd225 100644 --- a/biahub/settings.py +++ b/biahub/settings.py @@ -158,7 +158,9 @@ class ConcatenateSettings(MyBaseModel): Y_slice: Union[list, list[Union[list, Literal["all"]]], Literal["all"]] = "all" Z_slice: Union[list, list[Union[list, Literal["all"]]], Literal["all"]] = "all" chunks_czyx: Union[Literal[None], list[int]] = None + shards_ratio: list[int] | None = None ensure_unique_positions: Optional[bool] = False + output_ome_zarr_version: Literal["0.4", "0.5"] = "0.4" @field_validator("concat_data_paths") @classmethod From cc8bf6d6ef2f28f704cf429ba697c6403a57286b Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 3 Jul 2025 10:08:11 -0700 Subject: [PATCH 03/21] update chunking test --- biahub/tests/test_concatenate.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/biahub/tests/test_concatenate.py b/biahub/tests/test_concatenate.py index 03edfe63..ba505bab 100644 --- a/biahub/tests/test_concatenate.py +++ b/biahub/tests/test_concatenate.py @@ -1,3 +1,5 @@ +import pytest + from iohub import open_ome_zarr from biahub.concatenate import concatenate @@ -204,7 +206,8 @@ def test_concatenate_with_cropping(create_custom_plate, tmp_path, sbatch_file): assert output_X == x_end - x_start -def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_file): +@pytest.mark.parametrize("version", ["0.4", "0.5"]) +def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_file, version): """ Test concatenating with custom chunk sizes """ @@ -227,13 +230,19 @@ def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_fi ) # Define custom chunk sizes - custom_chunks = [1, 2, 4, 3] # [C, Z, Y, X] + chunks = [1, 1, 2, 4, 3] # [C, Z, Y, X] + if version == "0.5": + shards_ratio = [1, 1, 1, 2, 2] + elif version == "0.4": + shards_ratio = None settings = ConcatenateSettings( concat_data_paths=[str(plate_1_path) + "/*/*/*", str(plate_2_path) + "/*/*/*"], channel_names=['all', 'all'], time_indices='all', - chunks_czyx=custom_chunks, + chunks_czyx=chunks[1:], + shards_ratio=shards_ratio, + output_ome_zarr_version=version, ) output_path = tmp_path / "output.zarr" @@ -244,8 +253,11 @@ def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_fi local=True, ) - # We can't easily check the chunks directly, but we can verify the operation completed successfully output_plate = open_ome_zarr(output_path) + for pos_name, pos in output_plate.positions(): + assert pos.data.chunks == tuple(chunks) + if version == "0.5": + assert pos.data.shards == tuple(c * s for c, s in zip(chunks, shards_ratio)) # Check that the output plate has all the channels from the input plates output_channels = output_plate.channel_names From dd0771cfd3d12a83ce8a232de2e2643f09bbff0c Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 3 Jul 2025 10:13:59 -0700 Subject: [PATCH 04/21] use clean env helper https://github.com/czbiohub-sf/biahub/pull/96#issuecomment-2921169316 --- biahub/concatenate.py | 65 ++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index 62363f0b..b78ddb02 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -392,38 +392,39 @@ def concatenate( click.echo("Submitting SLURM jobs...") jobs = [] - with executor.batch(): - for i, ( - input_position_path, - output_position_path, - input_channel_idx, - output_channel_idx, - zyx_slicing_params, - ) in enumerate( - zip( - all_data_paths, - output_position_paths_list, - input_channel_idx_list, - output_channel_idx_list, - all_slicing_params, - ) - ): - # Create slicing parameters for this specific path - copy_n_paste_kwargs = {"zyx_slicing_params": zyx_slicing_params} - - job = executor.submit( - process_single_position, - copy_n_paste, - input_position_path=input_position_path, - output_position_path=output_position_path, - input_channel_indices=input_channel_idx, - output_channel_indices=output_channel_idx, - input_time_indices=input_time_indices, - output_time_indices=list(range(len(input_time_indices))), - num_processes=int(slurm_args["slurm_cpus_per_task"]), - **copy_n_paste_kwargs, - ) - jobs.append(job) + with submitit.helpers.clean_env(): + with executor.batch(): + for i, ( + input_position_path, + output_position_path, + input_channel_idx, + output_channel_idx, + zyx_slicing_params, + ) in enumerate( + zip( + all_data_paths, + output_position_paths_list, + input_channel_idx_list, + output_channel_idx_list, + all_slicing_params, + ) + ): + # Create slicing parameters for this specific path + copy_n_paste_kwargs = {"zyx_slicing_params": zyx_slicing_params} + + job = executor.submit( + process_single_position, + copy_n_paste, + input_position_path=input_position_path, + output_position_path=output_position_path, + input_channel_indices=input_channel_idx, + output_channel_indices=output_channel_idx, + input_time_indices=input_time_indices, + output_time_indices=list(range(len(input_time_indices))), + num_processes=int(slurm_args["slurm_cpus_per_task"]), + **copy_n_paste_kwargs, + ) + jobs.append(job) # monitor_jobs(jobs, all_data_paths) From 8c162c5856c07255bc3f61ed7d2e7190a6af1126 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 3 Jul 2025 11:44:33 -0700 Subject: [PATCH 05/21] update example config --- settings/example_concatenate_settings.yml | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/settings/example_concatenate_settings.yml b/settings/example_concatenate_settings.yml index 045b35e3..476c27b3 100644 --- a/settings/example_concatenate_settings.yml +++ b/settings/example_concatenate_settings.yml @@ -4,8 +4,8 @@ # List of paths to concatenate - can use glob patterns # Each path will be treated as a separate input dataset concat_data_paths: - - "/path/to/data1.zarr/*/*/*" # First dataset - - "/path/to/data2.zarr/*/*/*" # Second dataset + - "/path/to/data1.zarr/*/*/*" # First dataset + - "/path/to/data2.zarr/*/*/*" # Second dataset # - "/path/to/data3.zarr/A/1/0" # You can also specify exact positions # Time indices to include in the output @@ -22,8 +22,8 @@ time_indices: "all" # - For multiple datasets, specify channels for each: # [["DAPI"], ["GFP", "RFP"]] - Take DAPI from first dataset, GFP and RFP from second channel_names: - - "all" # Include all channels from first dataset - - "all" # Include all channels from second dataset + - "all" # Include all channels from first dataset + - "all" # Include all channels from second dataset # Spatial cropping options for X dimension # Options: @@ -55,12 +55,23 @@ Z_slice: "all" # - [1, 10, 100, 100]: Specify custom chunk sizes chunks_czyx: null +# Number of chunks in a shard for each dimension [T, C, Z, Y, X] +# Options: +# - null: No sharding +# - [1, 1, 4, 8, 8]: Specify custom shards ratio +shards_ratio: null + +# Version of the OME-Zarr format to use for the output +# Options: +# - "0.4" (default) +# - "0.5" +output_ome_zarr_version: "0.4" + # Whether to ensure unique position names in the output # Options: # - false or null: Positions with the same name will overwrite each other # - true: Ensure unique position names by adding suffixes (e.g., A/1d1/0) ensure_unique_positions: null - # EXAMPLE USE CASES: # 1. Basic concatenation of all data: From 83cd243d581d1832b430df1fc1c8d65ce9aa3e38 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 3 Jul 2025 11:50:30 -0700 Subject: [PATCH 06/21] disable threading for the zarrs codec --- biahub/concatenate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index b78ddb02..6e11e258 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -338,11 +338,11 @@ def concatenate( import zarr import zarrs # noqa: F401 - zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) - except ImportError: - warnings.warning( - "zarrs is not installed. Writing sharded array will be very slow." + zarr.config.set( + {"codec_pipeline.path": "zarrs.ZarrsCodecPipeline", "threading.max_workers": 1} ) + except ImportError: + warnings.warn("zarrs is not installed. Writing sharded array will be very slow.") # Logic for creation of zarr and metadata output_metadata = { From 0ee2e3384899004b429c925d67aa0d4783cc1033 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Wed, 9 Jul 2025 15:56:18 -0700 Subject: [PATCH 07/21] test variable sharding in time --- biahub/tests/test_concatenate.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/biahub/tests/test_concatenate.py b/biahub/tests/test_concatenate.py index ba505bab..51035715 100644 --- a/biahub/tests/test_concatenate.py +++ b/biahub/tests/test_concatenate.py @@ -206,8 +206,13 @@ def test_concatenate_with_cropping(create_custom_plate, tmp_path, sbatch_file): assert output_X == x_end - x_start -@pytest.mark.parametrize("version", ["0.4", "0.5"]) -def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_file, version): +@pytest.mark.parametrize( + ["version", "shards_ratio_time"], [["0.4", 1], ["0.5", 1], ["0.5", 2], ["0.5", 5]] +) +@pytest.mark.parametrize("time_points", [3, 4]) +def test_concatenate_with_custom_chunks( + create_custom_plate, tmp_path, sbatch_file, version, time_points, shards_ratio_time +): """ Test concatenating with custom chunk sizes """ @@ -215,7 +220,7 @@ def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_fi plate_1_path, plate_1 = create_custom_plate( tmp_path / 'zarr1', channel_names=["DAPI", "Cy5"], - time_points=3, + time_points=time_points, z_size=4, y_size=8, x_size=6, @@ -223,7 +228,7 @@ def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_fi plate_2_path, plate_2 = create_custom_plate( tmp_path / 'zarr2', channel_names=["GFP", "RFP"], - time_points=3, + time_points=time_points, z_size=4, y_size=8, x_size=6, @@ -232,7 +237,7 @@ def test_concatenate_with_custom_chunks(create_custom_plate, tmp_path, sbatch_fi # Define custom chunk sizes chunks = [1, 1, 2, 4, 3] # [C, Z, Y, X] if version == "0.5": - shards_ratio = [1, 1, 1, 2, 2] + shards_ratio = [shards_ratio_time, 1, 1, 2, 2] elif version == "0.4": shards_ratio = None From edef62659617128b1c95b472f55c26c7c37b1ba2 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Wed, 9 Jul 2025 16:33:22 -0700 Subject: [PATCH 08/21] print the correct cluster name --- biahub/concatenate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index 6e11e258..911e9c88 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -389,7 +389,7 @@ def concatenate( executor = submitit.AutoExecutor(folder=slurm_out_path, cluster=cluster) executor.update_parameters(**slurm_args) - click.echo("Submitting SLURM jobs...") + click.echo(f"Submitting {cluster} jobs...") jobs = [] with submitit.helpers.clean_env(): From 203054a75453002ede8814ab2d21fce3ed90570a Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 10:51:48 -0700 Subject: [PATCH 09/21] allow blocking --- biahub/concatenate.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index 911e9c88..ad7ce388 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -237,13 +237,25 @@ def calculate_cropped_size( def concatenate( settings: ConcatenateSettings, output_dirpath: Path, - sbatch_filepath: str = None, + sbatch_filepath: str | None = None, local: bool = False, + block: bool = False, ): - """ - Concatenate datasets (with optional cropping) - - >> biahub concatenate -c ./concat.yml -o ./output_concat.zarr -j 8 + """Concatenate datasets (with optional cropping). + + Parameters + ---------- + settings : ConcatenateSettings + Configuration settings for concatenation + output_dirpath : Path + Path to the output dataset + sbatch_filepath : str | None, optional + Path to the SLURM batch file, by default None + local : bool, optional + Whether to run locally or on a cluster, by default False + block : bool, optional + Whether to block until all the jobs are complete, + by default False """ slurm_out_path = output_dirpath.parent / "slurm_output" @@ -437,6 +449,9 @@ def concatenate( with log_path.open("w") as log_file: log_file.write("\n".join(job_ids)) + if block: + _ = [job.result() for job in jobs] + @click.command("concatenate") @config_filepath() From 25c55620cb7a6c3b54a810dd1d1f0e4dd7fb0646 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 10:54:57 -0700 Subject: [PATCH 10/21] fix typing --- biahub/concatenate.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index ad7ce388..4587138a 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -259,11 +259,7 @@ def concatenate( """ slurm_out_path = output_dirpath.parent / "slurm_output" - slicing_params = [ - settings.Z_slice, - settings.Y_slice, - settings.X_slice, - ] + slicing_params = [settings.Z_slice, settings.Y_slice, settings.X_slice] ( all_data_paths, all_channel_names, @@ -375,7 +371,7 @@ def concatenate( ) # Estimate resources - num_cpus, gb_ram_per_cpu = estimate_resources(shape=[T, C, Z, Y, X], ram_multiplier=16) + num_cpus, gb_ram_per_cpu = estimate_resources(shape=(T, C, Z, Y, X), ram_multiplier=16) # Prepare SLURM arguments slurm_args = { "slurm_job_name": "concatenate", @@ -461,7 +457,7 @@ def concatenate( def concatenate_cli( config_filepath: str, output_dirpath: str, - sbatch_filepath: str = None, + sbatch_filepath: str | None = None, local: bool = False, ): """ From 01883f4fb3f806e8c461a727031c8850efcb2149 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 10:56:24 -0700 Subject: [PATCH 11/21] wip: test values of the concatenated array --- biahub/tests/test_concatenate.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/biahub/tests/test_concatenate.py b/biahub/tests/test_concatenate.py index 51035715..58f30ec1 100644 --- a/biahub/tests/test_concatenate.py +++ b/biahub/tests/test_concatenate.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from iohub import open_ome_zarr @@ -207,11 +208,11 @@ def test_concatenate_with_cropping(create_custom_plate, tmp_path, sbatch_file): @pytest.mark.parametrize( - ["version", "shards_ratio_time"], [["0.4", 1], ["0.5", 1], ["0.5", 2], ["0.5", 5]] + ["version", "shards_ratio_time"], + [["0.4", 1], ["0.5", None], ["0.5", 1], ["0.5", 2], ["0.5", 5]], ) -@pytest.mark.parametrize("time_points", [3, 4]) def test_concatenate_with_custom_chunks( - create_custom_plate, tmp_path, sbatch_file, version, time_points, shards_ratio_time + create_custom_plate, tmp_path, sbatch_file, version, shards_ratio_time ): """ Test concatenating with custom chunk sizes @@ -220,7 +221,7 @@ def test_concatenate_with_custom_chunks( plate_1_path, plate_1 = create_custom_plate( tmp_path / 'zarr1', channel_names=["DAPI", "Cy5"], - time_points=time_points, + time_points=3, z_size=4, y_size=8, x_size=6, @@ -228,7 +229,7 @@ def test_concatenate_with_custom_chunks( plate_2_path, plate_2 = create_custom_plate( tmp_path / 'zarr2', channel_names=["GFP", "RFP"], - time_points=time_points, + time_points=3, z_size=4, y_size=8, x_size=6, @@ -237,7 +238,10 @@ def test_concatenate_with_custom_chunks( # Define custom chunk sizes chunks = [1, 1, 2, 4, 3] # [C, Z, Y, X] if version == "0.5": - shards_ratio = [shards_ratio_time, 1, 1, 2, 2] + if shards_ratio_time is None: + shards_ratio = None + else: + shards_ratio = [shards_ratio_time, 1, 1, 2, 2] elif version == "0.4": shards_ratio = None @@ -261,8 +265,14 @@ def test_concatenate_with_custom_chunks( output_plate = open_ome_zarr(output_path) for pos_name, pos in output_plate.positions(): assert pos.data.chunks == tuple(chunks) - if version == "0.5": + if version == "0.5" and shards_ratio is not None: assert pos.data.shards == tuple(c * s for c, s in zip(chunks, shards_ratio)) + np.testing.assert_array_equal( + pos.data.numpy(), + np.concatenate( + [plate_1[pos_name].data.numpy(), plate_2[pos_name].data.numpy()], axis=1 + ), + ) # Check that the output plate has all the channels from the input plates output_channels = output_plate.channel_names From d25a090dc5d484e07db47a63ad3a6b230fa5fb23 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 10:58:38 -0700 Subject: [PATCH 12/21] fix monitoring --- biahub/concatenate.py | 2 ++ biahub/tests/test_concatenate.py | 1 + 2 files changed, 3 insertions(+) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index 62a0d1c6..d9b92cf4 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -259,6 +259,8 @@ def concatenate( block : bool, optional Whether to block until all the jobs are complete, by default False + monitor : bool, optional + Whether to monitor the jobs, by default True """ slurm_out_path = output_dirpath.parent / "slurm_output" diff --git a/biahub/tests/test_concatenate.py b/biahub/tests/test_concatenate.py index 58f30ec1..b33363e4 100644 --- a/biahub/tests/test_concatenate.py +++ b/biahub/tests/test_concatenate.py @@ -260,6 +260,7 @@ def test_concatenate_with_custom_chunks( output_dirpath=output_path, sbatch_filepath=sbatch_file, local=True, + monitor=False, ) output_plate = open_ome_zarr(output_path) From f7e3555b5588e1dac0001cc6e9e0ad78210c564d Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 15:21:58 -0700 Subject: [PATCH 13/21] remove zarrs codec --- biahub/concatenate.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index d9b92cf4..0543d118 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -1,6 +1,5 @@ import glob import os -import warnings from pathlib import Path @@ -345,18 +344,6 @@ def concatenate( chunk_size = [1] + list(settings.chunks_czyx) else: chunk_size = settings.chunks_czyx - - if settings.shards_ratio is not None: - try: - import zarr - import zarrs # noqa: F401 - - zarr.config.set( - {"codec_pipeline.path": "zarrs.ZarrsCodecPipeline", "threading.max_workers": 1} - ) - except ImportError: - warnings.warn("zarrs is not installed. Writing sharded array will be very slow.") - # Logic for creation of zarr and metadata output_metadata = { "shape": (len(input_time_indices), len(all_channel_names)) + tuple(cropped_shape_zyx), @@ -471,13 +458,14 @@ def concatenate_cli( """ Concatenate datasets (with optional cropping) - >> biahub concatenate -c ./concat.yml -o ./output_concat.zarr -j 8 + >> biahub concatenate -c ./concat.yml -o ./output_concat.zarr """ concatenate( settings=yaml_to_model(config_filepath, ConcatenateSettings), output_dirpath=Path(output_dirpath), sbatch_filepath=sbatch_filepath, local=local, + block=False, monitor=monitor, ) From 65cbea52e1be6419d1a31c102ecf210aa57caf8c Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 16:55:56 -0700 Subject: [PATCH 14/21] tweak resource estimation --- biahub/concatenate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index 0543d118..b9303d4e 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -363,7 +363,10 @@ def concatenate( ) # Estimate resources - num_cpus, gb_ram_per_cpu = estimate_resources(shape=(T, C, Z, Y, X), ram_multiplier=16) + batch_size = settings.shards_ratio[0] if settings.shards_ratio else 1 + num_cpus, gb_ram_per_cpu = estimate_resources( + shape=(T // batch_size, C, Z, Y, X), ram_multiplier=4 * batch_size, max_num_cpus=48 + ) # Prepare SLURM arguments slurm_args = { "slurm_job_name": "concatenate", From 71f5fae09eaeb3087c1b0f489c137ff33bad5777 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 16:56:07 -0700 Subject: [PATCH 15/21] block in testing --- biahub/tests/test_concatenate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/biahub/tests/test_concatenate.py b/biahub/tests/test_concatenate.py index b33363e4..19ad8522 100644 --- a/biahub/tests/test_concatenate.py +++ b/biahub/tests/test_concatenate.py @@ -261,6 +261,7 @@ def test_concatenate_with_custom_chunks( sbatch_filepath=sbatch_file, local=True, monitor=False, + block=True, ) output_plate = open_ome_zarr(output_path) From 606a0c4c7f010d4f1a6a638dbdf99f239d120c69 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 16:56:18 -0700 Subject: [PATCH 16/21] require tensorstore --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ac42f7a7..46a911a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ # list package dependencies here dependencies = [ - "iohub>=0.2,<0.3", + "iohub[tensorstore]>=0.3,<0.4", "matplotlib", "napari", "PyQt6", From 8394696a7ed6c5a540502a575e81e756ba6e47a0 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 10 Jul 2025 17:36:18 -0700 Subject: [PATCH 17/21] update dependency groups --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 46a911a2..f16715be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ # list package dependencies here dependencies = [ - "iohub[tensorstore]>=0.3,<0.4", + "iohub>=0.3,<0.4", "matplotlib", "napari", "PyQt6", @@ -48,7 +48,7 @@ dependencies = [ [project.optional-dependencies] # note that dev dependencies are only pinned to major versions segmentation = ["cellpose"] -sharding = ["zarrs"] +sharding = ["tensorstore"] build = ["build", "twine"] all = ["biahub[segmentation,sharding,build]"] dev = [ From ca97861595e5d00ddae49e2bb868b542f0a5c656 Mon Sep 17 00:00:00 2001 From: Ivan Ivanov Date: Fri, 8 Aug 2025 14:57:40 -0700 Subject: [PATCH 18/21] combine context managers --- biahub/concatenate.py | 65 +++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/biahub/concatenate.py b/biahub/concatenate.py index b9303d4e..3b5ae3af 100644 --- a/biahub/concatenate.py +++ b/biahub/concatenate.py @@ -395,39 +395,38 @@ def concatenate( click.echo(f"Submitting {cluster} jobs...") jobs = [] - with submitit.helpers.clean_env(): - with executor.batch(): - for i, ( - input_position_path, - output_position_path, - input_channel_idx, - output_channel_idx, - zyx_slicing_params, - ) in enumerate( - zip( - all_data_paths, - output_position_paths_list, - input_channel_idx_list, - output_channel_idx_list, - all_slicing_params, - ) - ): - # Create slicing parameters for this specific path - copy_n_paste_kwargs = {"zyx_slicing_params": zyx_slicing_params} - - job = executor.submit( - process_single_position, - copy_n_paste, - input_position_path=input_position_path, - output_position_path=output_position_path, - input_channel_indices=input_channel_idx, - output_channel_indices=output_channel_idx, - input_time_indices=input_time_indices, - output_time_indices=list(range(len(input_time_indices))), - num_processes=int(slurm_args["slurm_cpus_per_task"]), - **copy_n_paste_kwargs, - ) - jobs.append(job) + with submitit.helpers.clean_env(), executor.batch(): + for i, ( + input_position_path, + output_position_path, + input_channel_idx, + output_channel_idx, + zyx_slicing_params, + ) in enumerate( + zip( + all_data_paths, + output_position_paths_list, + input_channel_idx_list, + output_channel_idx_list, + all_slicing_params, + ) + ): + # Create slicing parameters for this specific path + copy_n_paste_kwargs = {"zyx_slicing_params": zyx_slicing_params} + + job = executor.submit( + process_single_position, + copy_n_paste, + input_position_path=input_position_path, + output_position_path=output_position_path, + input_channel_indices=input_channel_idx, + output_channel_indices=output_channel_idx, + input_time_indices=input_time_indices, + output_time_indices=list(range(len(input_time_indices))), + num_processes=int(slurm_args["slurm_cpus_per_task"]), + **copy_n_paste_kwargs, + ) + jobs.append(job) job_ids = [job.job_id for job in jobs] # Access job IDs after batch submission From f2cc94855437000e0cea200a98a2d1e7d04b0487 Mon Sep 17 00:00:00 2001 From: Ivan Ivanov Date: Fri, 15 Aug 2025 16:13:03 -0700 Subject: [PATCH 19/21] ultrack lazy import --- biahub/cli/resolve_function.py | 13 ++++++++++--- biahub/track.py | 16 +++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/biahub/cli/resolve_function.py b/biahub/cli/resolve_function.py index ea8eb0c5..3e901cbf 100644 --- a/biahub/cli/resolve_function.py +++ b/biahub/cli/resolve_function.py @@ -1,8 +1,15 @@ import numpy as np -import ultrack -# List of modules to scan for functions -VALID_MODULES = {"np": np, "ultrack.imgproc": ultrack.imgproc} +# List of modules to scan for functions - ultrack imported only if available +VALID_MODULES = {"np": np} + +# Try to import ultrack and add to VALID_MODULES if available +try: + import ultrack + VALID_MODULES["ultrack.imgproc"] = ultrack.imgproc +except ImportError: + # ultrack is not installed, skip adding it to VALID_MODULES + pass # Dynamically populate FUNCTION_MAP with functions from VALID_MODULES FUNCTION_MAP = { diff --git a/biahub/track.py b/biahub/track.py index 5c090361..285cb3ad 100644 --- a/biahub/track.py +++ b/biahub/track.py @@ -16,8 +16,7 @@ from iohub import open_ome_zarr from iohub.ngff.utils import create_empty_plate from numpy.typing import ArrayLike -from ultrack import MainConfig, Tracker -from ultrack.utils.array import array_apply +# Lazy imports for ultrack - imported only when needed in specific functions from biahub.cli.parsing import ( config_filepath, @@ -302,7 +301,7 @@ def resolve_z_slice(z_range: Tuple[int, int], z_shape: int) -> Tuple[slice, int] def run_ultrack( - tracking_config: MainConfig, + tracking_config: 'MainConfig', # MainConfig type, imported lazily foreground_mask: ArrayLike, contour_gradient_map: ArrayLike, scale: Union[Tuple[float, float], Tuple[float, float, float]], @@ -310,6 +309,8 @@ def run_ultrack( ): """ Run object tracking using the Ultrack library. + + Note: ultrack is imported lazily within this function. This function performs object tracking on time-series image data using a binary foreground mask and a contour gradient map. It outputs labeled segmentation results, @@ -358,7 +359,9 @@ def run_ultrack( ... database_path=Path("results/posA") ... ) """ - cfg = tracking_config + from ultrack import Tracker, MainConfig + + cfg: MainConfig = tracking_config cfg.data_config.working_dir = database_path @@ -450,6 +453,8 @@ def run_preprocessing_pipeline( >>> output["raw"].shape (10, 256, 256) # Z-averaged """ + from ultrack.utils.array import array_apply + for image in input_images: for channel_name, pipeline in image.channels.items(): for step in pipeline: @@ -653,7 +658,7 @@ def track_one_position( position_key: str, input_images: List[ProcessingInputChannel], output_dirpath: Path, - tracking_config: MainConfig, + tracking_config: 'MainConfig', blank_frames_path: Path = None, z_slices: Tuple[int, int] = (0, 0), scale: Tuple[float, float, float, float, float] = (1, 1, 1, 1, 1), @@ -792,6 +797,7 @@ def track( ... local=False, ... ) """ + from ultrack import MainConfig output_dirpath = Path(output_dirpath) dataset_name = output_dirpath.stem From b6442be18fa37844249f00bb6e0fd989edde0c57 Mon Sep 17 00:00:00 2001 From: Ivan Ivanov Date: Fri, 15 Aug 2025 16:40:56 -0700 Subject: [PATCH 20/21] style --- biahub/cli/resolve_function.py | 1 + biahub/track.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/biahub/cli/resolve_function.py b/biahub/cli/resolve_function.py index 3e901cbf..7fa80ca0 100644 --- a/biahub/cli/resolve_function.py +++ b/biahub/cli/resolve_function.py @@ -6,6 +6,7 @@ # Try to import ultrack and add to VALID_MODULES if available try: import ultrack + VALID_MODULES["ultrack.imgproc"] = ultrack.imgproc except ImportError: # ultrack is not installed, skip adding it to VALID_MODULES diff --git a/biahub/track.py b/biahub/track.py index d1a7fd2b..008d5ada 100644 --- a/biahub/track.py +++ b/biahub/track.py @@ -3,7 +3,10 @@ from glob import glob from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Tuple, Union + +if TYPE_CHECKING: + from ultrack import MainConfig import click import dask.array as da @@ -16,7 +19,6 @@ from iohub import open_ome_zarr from iohub.ngff.utils import create_empty_plate from numpy.typing import ArrayLike -# Lazy imports for ultrack - imported only when needed in specific functions from biahub.cli.parsing import ( config_filepath, @@ -33,6 +35,8 @@ ) from biahub.settings import ProcessingInputChannel, TrackingSettings +# Lazy imports for ultrack - imported only when needed in specific functions + def mem_nuc_contour(nuclei_prediction: ArrayLike, membrane_prediction: ArrayLike) -> ArrayLike: """ @@ -301,7 +305,7 @@ def resolve_z_slice(z_range: Tuple[int, int], z_shape: int) -> Tuple[slice, int] def run_ultrack( - tracking_config: 'MainConfig', # MainConfig type, imported lazily + tracking_config: MainConfig, foreground_mask: ArrayLike, contour_gradient_map: ArrayLike, scale: Union[Tuple[float, float], Tuple[float, float, float]], @@ -309,7 +313,7 @@ def run_ultrack( ): """ Run object tracking using the Ultrack library. - + Note: ultrack is imported lazily within this function. This function performs object tracking on time-series image data using a binary @@ -359,8 +363,8 @@ def run_ultrack( ... database_path=Path("results/posA") ... ) """ - from ultrack import Tracker, MainConfig - + from ultrack import MainConfig, Tracker + cfg: MainConfig = tracking_config cfg.data_config.working_dir = database_path @@ -660,7 +664,7 @@ def track_one_position( position_key: str, input_images: List[ProcessingInputChannel], output_dirpath: Path, - tracking_config: 'MainConfig', + tracking_config: MainConfig, blank_frames_path: Path = None, z_slices: Tuple[int, int] = (0, 0), scale: Tuple[float, float, float, float, float] = (1, 1, 1, 1, 1), From 980f8b819f301de8b4f984ced92dede8f1db94d6 Mon Sep 17 00:00:00 2001 From: Ziwen Liu Date: Thu, 11 Sep 2025 17:31:34 -0700 Subject: [PATCH 21/21] point to the pre-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4922306a..cb26183f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ # list package dependencies here dependencies = [ - "iohub>=0.3,<0.4", + "iohub>=0.3.0a2,<0.4", "matplotlib", "napari", "PyQt6",