From 01e75181ee904282e656ee01180c2d1d3e679239 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 24 Oct 2024 17:48:00 -0400 Subject: [PATCH 1/4] new blank whatsnew --- doc/whats-new.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9a451a836ad..18fae4e0151 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,34 @@ What's New np.random.seed(123456) +.. _whats-new.2024.10.1: + +v.2024.10.1 (unreleased) +------------------------ + +New Features +~~~~~~~~~~~~ + + +Breaking changes +~~~~~~~~~~~~~~~~ + + +Deprecations +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + +Documentation +~~~~~~~~~~~~~ + + +Internal Changes +~~~~~~~~~~~~~~~~ + .. _whats-new.2024.10.0: v2024.10.0 (Oct 24th, 2024) From 51be61f896e0b19e73088930bf4c0f8d0fbeed28 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 6 Jan 2025 11:47:13 -0500 Subject: [PATCH 2/4] explicitly check for http urls --- xarray/backends/common.py | 46 +++++++++++++++++++++------------------ xarray/core/utils.py | 10 ++++++++- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 58a98598a5b..0b7ddbcc735 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -22,6 +22,7 @@ NdimSizeLenMixin, attempt_import, emit_user_level_warning, + is_http_url, is_remote_uri, ) from xarray.namedarray.parallelcompat import get_chunked_array_type @@ -143,28 +144,31 @@ def _find_absolute_paths( ['common.py'] """ if isinstance(paths, str): - if is_remote_uri(paths) and kwargs.get("engine") == "zarr": - if TYPE_CHECKING: - import fsspec + if is_remote_uri(paths): + if not is_http_url(paths): + # remote uris that are not http are likely s3 or similar, which support globbing with fsspec + + if TYPE_CHECKING: + import fsspec + else: + fsspec = attempt_import("fsspec") + + fs, _, _ = fsspec.core.get_fs_token_paths( + paths, + mode="rb", + storage_options=kwargs.get("backend_kwargs", {}).get( + "storage_options", {} + ), + expand=False, + ) + tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories + return [fs.get_mapper(path) for path in tmp_paths] else: - fsspec = attempt_import("fsspec") - - fs, _, _ = fsspec.core.get_fs_token_paths( - paths, - mode="rb", - storage_options=kwargs.get("backend_kwargs", {}).get( - "storage_options", {} - ), - expand=False, - ) - tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories - return [fs.get_mapper(path) for path in tmp_paths] - elif is_remote_uri(paths): - raise ValueError( - "cannot do wild-card matching for paths that are remote URLs " - f"unless engine='zarr' is specified. Got paths: {paths}. " - "Instead, supply paths as an explicit list of strings." - ) + raise ValueError( + "cannot do wild-card matching for paths that are remote http URLs. " + f"Got paths: {paths}. " + "Instead, supply paths as an explicit list of strings." + ) else: return sorted(glob(_normalize_path(paths))) elif isinstance(paths, os.PathLike): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 11f9ee49ca2..aecd91ea41e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -617,7 +617,7 @@ def close_on_error(f): def is_remote_uri(path: str) -> bool: - """Finds URLs of the form protocol:// or protocol:: + """Matches URLs of the form protocol:// or protocol:: This also matches for http[s]://, which were the only remote URLs supported in <=v0.16.2. @@ -625,6 +625,14 @@ def is_remote_uri(path: str) -> bool: return bool(re.search(r"^[a-z][a-z0-9]*(\://|\:\:)", path)) +def is_http_url(path: str) -> bool: + """Matches URLs of the form http[s]:// + + Does not match for + """ + return bool(re.search(r"^https?\://", path)) + + def read_magic_number_from_file(filename_or_obj, count=8) -> bytes: # check byte header to determine file type if isinstance(filename_or_obj, bytes): From ae314d1574b26525bcf300d5fcf87431ac96409a Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 6 Jan 2025 11:47:41 -0500 Subject: [PATCH 3/4] test utility for recognising http urls --- xarray/tests/test_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 9873b271033..21d14daf3df 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -176,6 +176,15 @@ def test_is_remote_uri(): assert utils.is_remote_uri("https://example.com") assert not utils.is_remote_uri(" http://example.com") assert not utils.is_remote_uri("example.nc") + assert utils.is_remote_uri("s3://bucket/example.nc") + + +def test_is_http_url(): + assert utils.is_http_url("http://example.com") + assert utils.is_http_url("https://example.com") + assert not utils.is_http_url(" http://example.com") + assert not utils.is_http_url("example.nc") + assert not utils.is_http_url("s3://bucket/example.nc") class Test_is_uniform_and_sorted: From 06088c2b7ab1aa09c892ca88598b4077ebcf593c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 7 Jan 2025 20:23:30 -0500 Subject: [PATCH 4/4] change test to match new expected behaviour --- xarray/tests/test_backends.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 330dd1dac1f..554f2d93d67 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4748,11 +4748,11 @@ def test_open_mfdataset(self) -> None: @requires_fsspec def test_open_mfdataset_no_files(self) -> None: - pytest.importorskip("aiobotocore") - - # glob is attempted as of #4823, but finds no files - with pytest.raises(OSError, match=r"no files"): - open_mfdataset("http://some/remote/uri", engine="zarr") + with pytest.raises( + ValueError, + match=r"cannot do wild-card matching for paths that are remote http URLs", + ): + open_mfdataset("http://some/remote/uri") def test_open_mfdataset_2d(self) -> None: original = Dataset({"foo": (["x", "y"], np.random.randn(10, 8))})