Skip to content

Commit 133de45

Browse files
committed
BUG: Large tensorstore already-exists failure
When writing large arrays in regions, the code calls _write_array_with_tensorstore multiple times (once for each region), and each time it tries to create a new TensorStore dataset with create=True. After the first region is written, the dataset already exists, so subsequent calls fail with "ALREADY_EXISTS". Modify the _write_with_tensorstore function to handle the case where the dataset already exists. Check if it's the first region being written (or create the dataset only once) and then open existing datasets for subsequent regions. Re: #152
1 parent 4ac1fc0 commit 133de45

File tree

3 files changed

+254
-160
lines changed

3 files changed

+254
-160
lines changed

ngff_zarr/to_ngff_zarr.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,22 @@ def _write_with_tensorstore(
116116
zarr_format,
117117
dimension_names=None,
118118
internal_chunk_shape=None,
119+
full_array_shape=None,
120+
create_dataset=True,
119121
) -> None:
120122
"""Write array using tensorstore backend"""
121123
import tensorstore as ts
122124

125+
# Use full array shape if provided, otherwise use the region array shape
126+
dataset_shape = full_array_shape if full_array_shape is not None else array.shape
127+
123128
spec = {
124129
"kvstore": {
125130
"driver": "file",
126131
"path": store_path,
127132
},
128133
"metadata": {
129-
"shape": array.shape,
134+
"shape": dataset_shape,
130135
},
131136
}
132137
if zarr_format == 2:
@@ -152,8 +157,21 @@ def _write_with_tensorstore(
152157
]
153158
else:
154159
raise ValueError(f"Unsupported zarr format: {zarr_format}")
155-
dataset = ts.open(spec, create=True, dtype=array.dtype).result()
156-
dataset[...] = array[region]
160+
161+
# Try to open existing dataset first, create only if needed
162+
try:
163+
if create_dataset:
164+
dataset = ts.open(spec, create=True, dtype=array.dtype).result()
165+
else:
166+
dataset = ts.open(spec, create=False, dtype=array.dtype).result()
167+
except Exception as e:
168+
if "ALREADY_EXISTS" in str(e) and create_dataset:
169+
# Dataset already exists, open it without creating
170+
dataset = ts.open(spec, create=False, dtype=array.dtype).result()
171+
else:
172+
raise
173+
174+
dataset[region] = array
157175

158176

159177
def _validate_ngff_parameters(
@@ -317,6 +335,8 @@ def _write_array_with_tensorstore(
317335
zarr_format: int,
318336
dimension_names: Optional[Tuple[str, ...]],
319337
region: Tuple[slice, ...],
338+
full_array_shape: Optional[Tuple[int, ...]] = None,
339+
create_dataset: bool = True,
320340
**kwargs,
321341
) -> None:
322342
"""Write an array using the TensorStore backend."""
@@ -329,6 +349,8 @@ def _write_array_with_tensorstore(
329349
chunks,
330350
zarr_format=zarr_format,
331351
dimension_names=dimension_names,
352+
full_array_shape=full_array_shape,
353+
create_dataset=create_dataset,
332354
**kwargs,
333355
)
334356
else: # Sharding
@@ -340,6 +362,8 @@ def _write_array_with_tensorstore(
340362
zarr_format=zarr_format,
341363
dimension_names=dimension_names,
342364
internal_chunk_shape=internal_chunk_shape,
365+
full_array_shape=full_array_shape,
366+
create_dataset=create_dataset,
343367
**kwargs,
344368
)
345369

@@ -472,6 +496,8 @@ def _handle_large_array_writing(
472496
zarr_format,
473497
dimension_names,
474498
region,
499+
full_array_shape=arr.shape,
500+
create_dataset=(region_index == 0), # Only create on first region
475501
**kwargs,
476502
)
477503
else:
@@ -854,6 +880,8 @@ def to_ngff_zarr(
854880
zarr_format,
855881
dimension_names,
856882
region,
883+
full_array_shape=arr.shape,
884+
create_dataset=True, # Always create for small arrays
857885
**kwargs,
858886
)
859887
else:

0 commit comments

Comments
 (0)