Skip to content

Add CLI for converting v2 metadata to v3 #3257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
45bb4e5
add rough cli converter structure
K-Meech Jul 1, 2025
456c9e7
allow zstd, gzip and numcodecs zarr 3 compression
K-Meech Jul 1, 2025
242a338
convert filters to v3
K-Meech Jul 1, 2025
1045c33
create BytesCodec with correct endian
K-Meech Jul 1, 2025
4e2442f
handle C vs F order in v2 metadata
K-Meech Jul 1, 2025
c63f0b8
save group and array metadata to file
K-Meech Jul 2, 2025
2947ce4
create overall conversion functions for store, array or group
K-Meech Jul 2, 2025
ba81755
add minimal typer cli
K-Meech Jul 3, 2025
67f9580
add initial tests for converter
K-Meech Jul 3, 2025
0d7c2c8
add tests for conversion of groups and nested groups and arrays
K-Meech Jul 3, 2025
cf39580
add tests for conversion of compressors and filters
K-Meech Jul 3, 2025
11499e7
test conversion of order and endianness
K-Meech Jul 3, 2025
90b0996
add tests for edge cases of incorrect codecs
K-Meech Jul 3, 2025
85159bb
add tests for / separator
K-Meech Jul 4, 2025
53ba166
draft of metadata remover and add test for internal paths
K-Meech Jul 7, 2025
d4cdc04
add clear command to cli with tests
K-Meech Jul 7, 2025
dfdc729
add test for metadata removal with path#
K-Meech Jul 7, 2025
ad60991
add verbose logging option
K-Meech Jul 7, 2025
66bae0d
add dry run option to cli
K-Meech Jul 8, 2025
97df9bf
add test for dry-run
K-Meech Jul 8, 2025
42e0435
add zarr-converter script and enable cli dep in tests
K-Meech Jul 9, 2025
9e20b39
use v2 chunk key encoding type
K-Meech Jul 9, 2025
6586e66
Merge branch 'main' of github.com:K-Meech/zarr-python into km/v2-v3-c…
K-Meech Jul 14, 2025
ce409a3
update endianness of test data type
K-Meech Jul 14, 2025
fb7136b
Merge branch 'main' of github.com:K-Meech/zarr-python into km/v2-v3-c…
K-Meech Jul 16, 2025
6585f24
check converted arrays can be accessed
K-Meech Jul 16, 2025
46e958d
Merge branch 'main' of github.com:K-Meech/zarr-python into km/v2-v3-c…
K-Meech Jul 16, 2025
08fc138
remove uses of pathlib walk, as it didn't exist in python 3.11
K-Meech Jul 16, 2025
3540434
include tags in checkout for gpu test, to avoid numcodecs.zarr3 reque…
K-Meech Jul 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/gpu_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ jobs:

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # grab all branches and tags
# - name: cuda-toolkit
# uses: Jimver/cuda-toolkit@v0.2.16
# id: cuda-toolkit
Expand Down
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ remote = [
gpu = [
"cupy-cuda12x",
]
cli = ["typer"]
# Development extras
test = [
"coverage",
Expand Down Expand Up @@ -113,6 +114,9 @@ docs = [
'pytest'
]

[project.scripts]
zarr-converter = "zarr.core.metadata.converter.cli:app"


[project.urls]
"Bug Tracker" = "https://github.com/zarr-developers/zarr-python/issues"
Expand Down Expand Up @@ -159,7 +163,7 @@ deps = ["minimal", "optional"]

[tool.hatch.envs.test.overrides]
matrix.deps.dependencies = [
{value = "zarr[remote, remote_tests, test, optional]", if = ["optional"]}
{value = "zarr[remote, remote_tests, test, optional, cli]", if = ["optional"]}
]

[tool.hatch.envs.test.scripts]
Expand Down
Empty file.
112 changes: 112 additions & 0 deletions src/zarr/core/metadata/converter/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import logging
from typing import Annotated, Literal, cast

import typer

from zarr.core.metadata.converter.converter_v2_v3 import convert_v2_to_v3, remove_metadata
from zarr.core.sync import sync

app = typer.Typer()

logger = logging.getLogger(__name__)


def _set_logging_config(verbose: bool) -> None:
if verbose:
lvl = logging.INFO
else:
lvl = logging.WARNING
fmt = "%(message)s"
logging.basicConfig(level=lvl, format=fmt)


def _set_verbose_level() -> None:
logging.getLogger().setLevel(logging.INFO)


@app.command() # type: ignore[misc]
def convert(
store: Annotated[
str,
typer.Argument(
help="Store or path to directory in file system or name of zip file e.g. 'data/example-1.zarr', 's3://example-bucket/example'..."
),
],
path: Annotated[str | None, typer.Option(help="The path within the store to open")] = None,
dry_run: Annotated[
bool,
typer.Option(
help="Enable a dry-run: files that would be converted are logged, but no new files are actually created."
),
] = False,
) -> None:
"""Convert all v2 metadata in a zarr hierarchy to v3. This will create a zarr.json file at each level
(for every group / array). V2 files (.zarray, .zattrs etc.) will be left as-is.
"""
if dry_run:
_set_verbose_level()
logger.info(
"Dry run enabled - no new files will be created. Log of files that would be created on a real run:"
)

convert_v2_to_v3(store=store, path=path, dry_run=dry_run)


@app.command() # type: ignore[misc]
def clear(
store: Annotated[
str,
typer.Argument(
help="Store or path to directory in file system or name of zip file e.g. 'data/example-1.zarr', 's3://example-bucket/example'..."
),
],
zarr_format: Annotated[
int,
typer.Argument(
help="Which format's metadata to remove - 2 or 3.",
min=2,
max=3,
),
],
path: Annotated[str | None, typer.Option(help="The path within the store to open")] = None,
dry_run: Annotated[
bool,
typer.Option(
help="Enable a dry-run: files that would be deleted are logged, but no files are actually removed."
),
] = False,
) -> None:
"""Remove all v2 (.zarray, .zattrs, .zgroup, .zmetadata) or v3 (zarr.json) metadata files from the given Zarr.
Note - this will remove metadata files at all levels of the hierarchy (every group and array).
"""
if dry_run:
_set_verbose_level()
logger.info(
"Dry run enabled - no files will be deleted. Log of files that would be deleted on a real run:"
)

sync(
remove_metadata(
store=store, zarr_format=cast(Literal[2, 3], zarr_format), path=path, dry_run=dry_run
)
)


@app.callback() # type: ignore[misc]
def main(
verbose: Annotated[
bool,
typer.Option(
help="enable verbose logging - will print info about metadata files being deleted / saved."
),
] = False,
) -> None:
"""
Convert metadata from v2 to v3. See available commands below - access help for individual commands with
zarr-converter COMMAND --help.
"""
_set_logging_config(verbose)


if __name__ == "__main__":
app()
Loading
Loading