Skip to content

Commit 2aeee99

Browse files
authored
Merge pull request #155 from thewtex/tensorstore-crash
BUG: Large tensorstore already-exists failure
2 parents 4ac1fc0 + 133de45 commit 2aeee99

File tree

3 files changed

+254
-160
lines changed

3 files changed

+254
-160
lines changed

ngff_zarr/to_ngff_zarr.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,22 @@ def _write_with_tensorstore(
116116
zarr_format,
117117
dimension_names=None,
118118
internal_chunk_shape=None,
119+
full_array_shape=None,
120+
create_dataset=True,
119121
) -> None:
120122
"""Write array using tensorstore backend"""
121123
import tensorstore as ts
122124

125+
# Use full array shape if provided, otherwise use the region array shape
126+
dataset_shape = full_array_shape if full_array_shape is not None else array.shape
127+
123128
spec = {
124129
"kvstore": {
125130
"driver": "file",
126131
"path": store_path,
127132
},
128133
"metadata": {
129-
"shape": array.shape,
134+
"shape": dataset_shape,
130135
},
131136
}
132137
if zarr_format == 2:
@@ -152,8 +157,21 @@ def _write_with_tensorstore(
152157
]
153158
else:
154159
raise ValueError(f"Unsupported zarr format: {zarr_format}")
155-
dataset = ts.open(spec, create=True, dtype=array.dtype).result()
156-
dataset[...] = array[region]
160+
161+
# Try to open existing dataset first, create only if needed
162+
try:
163+
if create_dataset:
164+
dataset = ts.open(spec, create=True, dtype=array.dtype).result()
165+
else:
166+
dataset = ts.open(spec, create=False, dtype=array.dtype).result()
167+
except Exception as e:
168+
if "ALREADY_EXISTS" in str(e) and create_dataset:
169+
# Dataset already exists, open it without creating
170+
dataset = ts.open(spec, create=False, dtype=array.dtype).result()
171+
else:
172+
raise
173+
174+
dataset[region] = array
157175

158176

159177
def _validate_ngff_parameters(
@@ -317,6 +335,8 @@ def _write_array_with_tensorstore(
317335
zarr_format: int,
318336
dimension_names: Optional[Tuple[str, ...]],
319337
region: Tuple[slice, ...],
338+
full_array_shape: Optional[Tuple[int, ...]] = None,
339+
create_dataset: bool = True,
320340
**kwargs,
321341
) -> None:
322342
"""Write an array using the TensorStore backend."""
@@ -329,6 +349,8 @@ def _write_array_with_tensorstore(
329349
chunks,
330350
zarr_format=zarr_format,
331351
dimension_names=dimension_names,
352+
full_array_shape=full_array_shape,
353+
create_dataset=create_dataset,
332354
**kwargs,
333355
)
334356
else: # Sharding
@@ -340,6 +362,8 @@ def _write_array_with_tensorstore(
340362
zarr_format=zarr_format,
341363
dimension_names=dimension_names,
342364
internal_chunk_shape=internal_chunk_shape,
365+
full_array_shape=full_array_shape,
366+
create_dataset=create_dataset,
343367
**kwargs,
344368
)
345369

@@ -472,6 +496,8 @@ def _handle_large_array_writing(
472496
zarr_format,
473497
dimension_names,
474498
region,
499+
full_array_shape=arr.shape,
500+
create_dataset=(region_index == 0), # Only create on first region
475501
**kwargs,
476502
)
477503
else:
@@ -854,6 +880,8 @@ def to_ngff_zarr(
854880
zarr_format,
855881
dimension_names,
856882
region,
883+
full_array_shape=arr.shape,
884+
create_dataset=True, # Always create for small arrays
857885
**kwargs,
858886
)
859887
else:

0 commit comments

Comments
 (0)