diff --git a/CHANGES.md b/CHANGES.md index 34d5a010..e9bbc934 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,8 @@ ### Improvements - Capture all errors logged by gdal when opening a file fails (#495). +- Add support to read and write ".gpkg.zip" (GDAL >= 3.7), ".shp.zip", and ".shz" + files (#527). ### Bug fixes diff --git a/pyogrio/_compat.py b/pyogrio/_compat.py index acfea471..ff1d8029 100644 --- a/pyogrio/_compat.py +++ b/pyogrio/_compat.py @@ -41,6 +41,7 @@ PANDAS_GE_22 = pandas is not None and Version(pandas.__version__) >= Version("2.2.0") GDAL_GE_352 = __gdal_version__ >= (3, 5, 2) +GDAL_GE_37 = __gdal_version__ >= (3, 7, 0) GDAL_GE_38 = __gdal_version__ >= (3, 8, 0) HAS_GDAL_GEOS = __gdal_geos_version__ is not None diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index ebc4e18d..63df12c3 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -31,7 +31,10 @@ ".geojsonl": "GeoJSONSeq", ".geojsons": "GeoJSONSeq", ".gpkg": "GPKG", + ".gpkg.zip": "GPKG", ".shp": "ESRI Shapefile", + ".shp.zip": "ESRI Shapefile", + ".shz": "ESRI Shapefile", } # mapping of driver name to extension diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 0a89a92a..67212d10 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -643,6 +643,9 @@ def test_write_append(request, tmp_path, naturalearth_lowres, ext): pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON") ) + if ext == ".gpkg.zip": + pytest.skip("Append is not supported for .gpkg.zip") + meta, table = read_arrow(naturalearth_lowres) # coerce output layer to generic Geometry to avoid mixed type errors diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index a65b5baa..00132af1 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -16,7 +16,13 @@ vsi_listtree, vsi_unlink, ) -from pyogrio._compat import GDAL_GE_352, HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15 +from pyogrio._compat import ( + GDAL_GE_37, + GDAL_GE_352, + HAS_ARROW_WRITE_API, + HAS_PYPROJ, + PANDAS_GE_15, +) from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe from pyogrio.raw import ( @@ -1580,6 +1586,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow): assert df.crs.equals(expected.crs) +@pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"]) +@pytest.mark.requires_arrow_write_api +def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow): + """Run a basic read and write test on some extra (zipped) extensions.""" + if ext == ".gpkg.zip" and not GDAL_GE_37: + pytest.skip(".gpkg.zip support requires GDAL >= 3.7") + + input_gdf = read_dataframe(naturalearth_lowres) + output_path = tmp_path / f"test{ext}" + + write_dataframe(input_gdf, output_path, use_arrow=use_arrow) + + assert output_path.exists() + result_gdf = read_dataframe(output_path) + + geometry_types = result_gdf.geometry.type.unique() + if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI: + assert list(geometry_types) == ["MultiPolygon"] + else: + assert set(geometry_types) == {"MultiPolygon", "Polygon"} + + assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False) + + def test_write_read_mixed_column_values(tmp_path): # use_arrow=True is tested separately below mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan] diff --git a/pyogrio/tests/test_path.py b/pyogrio/tests/test_path.py index 9cc7943c..e64d6c2f 100644 --- a/pyogrio/tests/test_path.py +++ b/pyogrio/tests/test_path.py @@ -33,10 +33,20 @@ def change_cwd(path): [ # local file paths that should be passed through as is ("data.gpkg", "data.gpkg"), + ("data.gpkg.zip", "data.gpkg.zip"), + ("data.shp.zip", "data.shp.zip"), (Path("data.gpkg"), "data.gpkg"), + (Path("data.gpkg.zip"), "data.gpkg.zip"), + (Path("data.shp.zip"), "data.shp.zip"), ("/home/user/data.gpkg", "/home/user/data.gpkg"), + ("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"), + ("/home/user/data.shp.zip", "/home/user/data.shp.zip"), (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"), + (r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"), + (r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"), ("file:///home/user/data.gpkg", "/home/user/data.gpkg"), + ("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"), + ("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"), ("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"), # cloud URIs ("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"), diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 54127d0b..e9a6176a 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -17,7 +17,7 @@ read_info, set_gdal_config_options, ) -from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY +from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY from pyogrio.errors import DataLayerError, DataSourceError, FeatureError from pyogrio.raw import open_arrow, read, write from pyogrio.tests.conftest import ( @@ -63,9 +63,10 @@ def test_read(naturalearth_lowres): @pytest.mark.parametrize("ext", DRIVERS) def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext): # Test all supported autodetect drivers + if ext == ".gpkg.zip" and not GDAL_GE_37: + pytest.skip(".gpkg.zip not supported for gdal < 3.7.0") testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext) - assert testfile.suffix == ext assert testfile.exists() meta, _, geometry, fields = read(testfile) @@ -703,6 +704,9 @@ def test_write_append(tmp_path, naturalearth_lowres, ext): if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0): pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0") + if ext == ".gpkg.zip": + pytest.skip("Append to .gpkg.zip is not supported") + meta, _, geometry, field_data = read(naturalearth_lowres) # coerce output layer to MultiPolygon to avoid mixed type errors diff --git a/pyogrio/util.py b/pyogrio/util.py index b018ad79..a09b08ae 100644 --- a/pyogrio/util.py +++ b/pyogrio/util.py @@ -9,6 +9,8 @@ from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel +MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip") + def get_vsi_path_or_buffer(path_or_buffer): """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer. @@ -68,15 +70,23 @@ def vsi_path(path: Union[str, Path]) -> str: # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like # URL schemes if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path): + # If it is not a zip file or it is multi-extension zip file that is directly + # supported by a GDAL driver, return the path as is. if not path.split("!")[0].endswith(".zip"): return path + if path.split("!")[0].endswith(MULTI_EXTENSIONS): + return path # prefix then allow to proceed with remaining parsing path = f"zip://{path}" path, archive, scheme = _parse_uri(path) - if scheme or archive or path.endswith(".zip"): + if ( + scheme + or archive + or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS)) + ): return _construct_vsi_path(path, archive, scheme) return path @@ -146,7 +156,10 @@ def _construct_vsi_path(path, archive, scheme) -> str: suffix = "" schemes = scheme.split("+") - if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")): + if "zip" not in schemes and ( + archive.endswith(".zip") + or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS)) + ): schemes.insert(0, "zip") if schemes: