From 7e327b972dcfddb7772cb7290a9bb5d68e8ab4cc Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 4 Aug 2024 01:03:44 +0200 Subject: [PATCH 01/33] ENH: add support to use /vsimem/ input files --- pyogrio/_io.pyx | 35 +++++++++++++++++++----------- pyogrio/tests/test_geopandas_io.py | 17 +++++++++++++++ pyogrio/tests/test_path.py | 8 ------- pyogrio/util.py | 5 ----- 4 files changed, 39 insertions(+), 26 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 7c6427ce..126ceff6 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -13,12 +13,12 @@ import os import sys import warnings +from io import BytesIO from libc.stdint cimport uint8_t, uintptr_t from libc.stdlib cimport malloc, free from libc.string cimport strlen from libc.math cimport isnan from cpython.pycapsule cimport PyCapsule_GetPointer - cimport cython from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer @@ -2250,7 +2250,7 @@ def ogr_write( cdef int num_records = -1 cdef int num_field_data = len(field_data) if field_data is not None else 0 cdef int num_fields = len(fields) if fields is not None else 0 - cdef bint is_vsi = False + cdef bint is_tmp_vsi = False if num_fields != num_field_data: raise ValueError("field_data array needs to be same length as fields array") @@ -2291,12 +2291,16 @@ def ogr_write( try: # Setup in-memory handler if needed - path = get_ogr_vsimem_write_path(path_or_fp, driver) - is_vsi = path.startswith('/vsimem/') + if isinstance(path_or_fp, str): + path = path_or_fp + is_tmp_vsi = False + else: + path = get_ogr_vsimem_write_path(path_or_fp, driver) + is_tmp_vsi = path.startswith('/vsimem/') # Setup dataset and layer layer_created = create_ogr_dataset_layer( - path, is_vsi, layer, driver, crs, geometry_type, encoding, + path, is_tmp_vsi, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata, &ogr_dataset, &ogr_layer, @@ -2501,7 +2505,7 @@ def ogr_write( raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") # copy in-memory file back to path_or_fp object - if is_vsi: + if is_tmp_vsi: read_vsimem_to_buffer(path, path_or_fp) finally: @@ -2523,7 +2527,7 @@ def ogr_write( if ogr_dataset != NULL: ogr_close(ogr_dataset) - if is_vsi: + if is_tmp_vsi: delete_vsimem_file(path) @@ -2548,7 +2552,7 @@ def ogr_write_arrow( cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef char **options = NULL - cdef bint is_vsi = False + cdef bint is_tmp_vsi = False cdef ArrowArrayStream* stream = NULL cdef ArrowSchema schema cdef ArrowArray array @@ -2557,11 +2561,16 @@ def ogr_write_arrow( array.release = NULL try: - path = get_ogr_vsimem_write_path(path_or_fp, driver) - is_vsi = path.startswith('/vsimem/') + # Setup in-memory handler if needed + if isinstance(path_or_fp, str): + path = path_or_fp + is_tmp_vsi = False + else: + path = get_ogr_vsimem_write_path(path_or_fp, driver) + is_tmp_vsi = path.startswith('/vsimem/') layer_created = create_ogr_dataset_layer( - path, is_vsi, layer, driver, crs, geometry_type, encoding, + path, is_tmp_vsi, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata, &ogr_dataset, &ogr_layer, @@ -2622,7 +2631,7 @@ def ogr_write_arrow( raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") # copy in-memory file back to path_or_fp object - if is_vsi: + if is_tmp_vsi: read_vsimem_to_buffer(path, path_or_fp) finally: @@ -2642,7 +2651,7 @@ def ogr_write_arrow( if ogr_dataset != NULL: ogr_close(ogr_dataset) - if is_vsi: + if is_tmp_vsi: delete_vsimem_file(path) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 12146c3d..e5da2da4 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1534,6 +1534,23 @@ def test_write_read_null(tmp_path, use_arrow): assert result_gdf["object_str"][2] is None +def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow): + path, _ = naturalearth_lowres_vsi + mem_path = f"/vsimem/{str(path)}" + + input = read_dataframe(path, use_arrow=use_arrow) + assert len(input) == 177 + + try: + write_dataframe(input, mem_path, use_arrow=use_arrow) + result = read_dataframe(mem_path, use_arrow=use_arrow) + assert len(result) == 177 + finally: + # TODO: delete_vsimem_file isn't public (yet) + # delete_vsimem_file(mem_path) + pass + + @pytest.mark.parametrize( "wkt,geom_types", [ diff --git a/pyogrio/tests/test_path.py b/pyogrio/tests/test_path.py index 3dde5363..99c9a2cd 100644 --- a/pyogrio/tests/test_path.py +++ b/pyogrio/tests/test_path.py @@ -343,11 +343,3 @@ def test_get_vsi_path_or_buffer_obj_to_string(): def test_get_vsi_path_or_buffer_fixtures_to_string(tmp_path): path = tmp_path / "test.gpkg" assert get_vsi_path_or_buffer(path) == str(path) - - -@pytest.mark.parametrize( - "raw_path", ["/vsimem/test.shp.zip", "/vsizip//vsimem/test.shp.zip"] -) -def test_vsimem_path_exception(raw_path): - with pytest.raises(ValueError, match=""): - vsi_path(raw_path) diff --git a/pyogrio/util.py b/pyogrio/util.py index 6bf04693..4721ea94 100644 --- a/pyogrio/util.py +++ b/pyogrio/util.py @@ -52,11 +52,6 @@ def vsi_path(path: str) -> str: """ - if "/vsimem/" in path: - raise ValueError( - "path cannot contain /vsimem/ directly; to use an in-memory dataset a bytes object must be passed instead" - ) - # path is already in GDAL format if path.startswith("/vsi"): return path From 0c1458bc0aff7136531603add06a41859e7ba549 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 9 Aug 2024 22:55:08 +0200 Subject: [PATCH 02/33] ENH: make it possible to use /vsimem files --- docs/source/api.rst | 2 +- pyogrio/__init__.py | 10 +- pyogrio/_io.pyx | 55 ++++---- pyogrio/_ogr.pxd | 7 + pyogrio/_vsi.pxd | 2 +- pyogrio/_vsi.pyx | 176 ++++++++++++++++++++--- pyogrio/core.py | 81 +++++++++++ pyogrio/raw.py | 2 +- pyogrio/tests/conftest.py | 25 ++++ pyogrio/tests/test_arrow.py | 19 +++ pyogrio/tests/test_core.py | 220 +++++++++++++++++++++++------ pyogrio/tests/test_geopandas_io.py | 43 ++++-- pyogrio/tests/test_path.py | 15 +- pyogrio/util.py | 14 +- 14 files changed, 562 insertions(+), 109 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 007470d7..105fbb3f 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -5,7 +5,7 @@ Core ---- .. automodule:: pyogrio - :members: list_drivers, detect_write_driver, list_layers, read_bounds, read_info, set_gdal_config_options, get_gdal_config_option, __gdal_version__, __gdal_version_string__ + :members: list_drivers, detect_write_driver, list_layers, read_bounds, read_info, set_gdal_config_options, get_gdal_config_option, vsi_listtree, vsi_rmtree, vsi_unlink, __gdal_version__, __gdal_version_string__ GeoPandas integration --------------------- diff --git a/pyogrio/__init__.py b/pyogrio/__init__.py index 66fbbcf7..97311dc2 100644 --- a/pyogrio/__init__.py +++ b/pyogrio/__init__.py @@ -15,6 +15,9 @@ set_gdal_config_options, get_gdal_config_option, get_gdal_data_path, + vsi_listtree, + vsi_rmtree, + vsi_unlink, __gdal_version__, __gdal_version_string__, __gdal_geos_version__, @@ -36,10 +39,13 @@ "set_gdal_config_options", "get_gdal_config_option", "get_gdal_data_path", - "read_arrow", "open_arrow", - "write_arrow", + "read_arrow", "read_dataframe", + "vsi_listtree", + "vsi_rmtree", + "vsi_unlink", + "write_arrow", "write_dataframe", "__gdal_version__", "__gdal_version_string__", diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 126ceff6..82e664e9 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -21,6 +21,7 @@ from libc.math cimport isnan from cpython.pycapsule cimport PyCapsule_GetPointer cimport cython from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer +from pathlib import Path import numpy as np @@ -1184,7 +1185,7 @@ def ogr_read( ): cdef int err = 0 - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL @@ -1224,7 +1225,7 @@ def ogr_read( raise ValueError("'max_features' must be >= 0") try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer if encoding: # for shapefiles, SHAPE_ENCODING must be set before opening the file @@ -1362,8 +1363,8 @@ def ogr_read( CPLFree(prev_shape_encoding) prev_shape_encoding = NULL - if is_vsimem: - delete_vsimem_file(path) + if is_tmp_vsimem: + vsimem_rmtree_toplevel(path) return ( meta, @@ -1424,7 +1425,7 @@ def ogr_open_arrow( ): cdef int err = 0 - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL @@ -1480,7 +1481,7 @@ def ogr_open_arrow( reader = None try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer if encoding: override_shape_encoding = True @@ -1679,8 +1680,8 @@ def ogr_open_arrow( CPLFree(prev_shape_encoding) prev_shape_encoding = NULL - if is_vsimem: - delete_vsimem_file(path) + if is_tmp_vsimem: + vsimem_rmtree_toplevel(path) def ogr_read_bounds( @@ -1697,7 +1698,7 @@ def ogr_read_bounds( object mask=None): cdef int err = 0 - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef const char *where_c = NULL cdef OGRDataSourceH ogr_dataset = NULL @@ -1715,7 +1716,7 @@ def ogr_read_bounds( raise ValueError("'max_features' must be >= 0") try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL) if layer is None: @@ -1744,8 +1745,8 @@ def ogr_read_bounds( GDALClose(ogr_dataset) ogr_dataset = NULL - if is_vsimem: - delete_vsimem_file(path) + if is_tmp_vsimem: + vsimem_rmtree_toplevel(path) return bounds @@ -1758,7 +1759,7 @@ def ogr_read_info( int force_feature_count=False, int force_total_bounds=False): - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef OGRDataSourceH ogr_dataset = NULL @@ -1767,7 +1768,7 @@ def ogr_read_info( cdef bint override_shape_encoding = False try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer if encoding: override_shape_encoding = True @@ -1826,19 +1827,19 @@ def ogr_read_info( if prev_shape_encoding != NULL: CPLFree(prev_shape_encoding) - if is_vsimem: - delete_vsimem_file(path) + if is_tmp_vsimem: + vsimem_rmtree_toplevel(path) return meta def ogr_list_layers(object path_or_buffer): - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef OGRDataSourceH ogr_dataset = NULL try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL) layers = get_layer_names(ogr_dataset) @@ -1847,8 +1848,8 @@ def ogr_list_layers(object path_or_buffer): GDALClose(ogr_dataset) ogr_dataset = NULL - if is_vsimem: - delete_vsimem_file(path) + if is_tmp_vsimem: + vsimem_rmtree_toplevel(path) return layers @@ -2014,7 +2015,7 @@ cdef infer_field_types(list dtypes): cdef create_ogr_dataset_layer( str path, - bint is_vsi, + bint is_tmp_vsimem, str layer, str driver, str crs, @@ -2048,6 +2049,8 @@ cdef create_ogr_dataset_layer( encoding : str Only used if `driver` is "ESRI Shapefile". If not None, it overrules the default shapefile encoding, which is "UTF-8" in pyogrio. + is_tmp_vsimem : bool + Whether the file path is meant to save a temporary memory file to. Returns ------- @@ -2075,8 +2078,8 @@ cdef create_ogr_dataset_layer( driver_b = driver.encode('UTF-8') driver_c = driver_b - # in-memory dataset is always created from scratch - path_exists = os.path.exists(path) if not is_vsi else False + # temporary in-memory dataset is always created from scratch + path_exists = os.path.exists(path) if not is_tmp_vsimem else False if not layer: layer = os.path.splitext(os.path.split(path)[1])[0] @@ -2112,7 +2115,7 @@ cdef create_ogr_dataset_layer( raise exc # otherwise create from scratch - if is_vsi: + if is_tmp_vsimem: VSIUnlink(path_c) else: os.unlink(path) @@ -2528,7 +2531,7 @@ def ogr_write( ogr_close(ogr_dataset) if is_tmp_vsi: - delete_vsimem_file(path) + vsimem_rmtree_toplevel(path) def ogr_write_arrow( @@ -2652,7 +2655,7 @@ def ogr_write_arrow( ogr_close(ogr_dataset) if is_tmp_vsi: - delete_vsimem_file(path) + vsimem_rmtree_toplevel(path) cdef get_arrow_extension_metadata(const ArrowSchema* schema): diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 9369ba71..7b321e20 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -36,6 +36,10 @@ cdef extern from "cpl_error.h" nogil: void CPLPopErrorHandler() +cdef extern from "cpl_port.h": + ctypedef char **CSLConstList + + cdef extern from "cpl_string.h": char** CSLAddNameValue(char **list, const char *name, const char *value) char** CSLSetNameValue(char **list, const char *name, const char *value) @@ -53,6 +57,9 @@ cdef extern from "cpl_vsi.h" nogil: long st_mode int st_mtime + int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf) + int VSI_ISDIR(int mode) + char** VSIReadDirRecursive(const char *path) int VSIFCloseL(VSILFILE *fp) int VSIFFlushL(VSILFILE *fp) int VSIUnlink(const char *path) diff --git a/pyogrio/_vsi.pxd b/pyogrio/_vsi.pxd index afa2633a..a4960f25 100644 --- a/pyogrio/_vsi.pxd +++ b/pyogrio/_vsi.pxd @@ -1,4 +1,4 @@ cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver) cdef str read_buffer_to_vsimem(bytes bytes_buffer) cdef read_vsimem_to_buffer(str path, object out_buffer) -cdef delete_vsimem_file(str path) \ No newline at end of file +cpdef vsimem_rmtree_toplevel(str path) \ No newline at end of file diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index 47b8c11d..5d4500cd 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -1,6 +1,8 @@ +import fnmatch from io import BytesIO from uuid import uuid4 + from libc.stdlib cimport malloc, free from libc.string cimport memcpy @@ -17,7 +19,7 @@ cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): sibling files (though drivers that create sibling files are not supported for in-memory files). - Caller is responsible for deleting the directory via delete_vsimem_file() + Caller is responsible for deleting the directory via vsimem_rmtree_toplevel() Parameters ---------- @@ -28,8 +30,13 @@ cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): if not isinstance(path_or_fp, BytesIO): return path_or_fp - # Create in-memory directory to contain auxiliary files - memfilename = uuid4().hex + # check for existing bytes + if path_or_fp.getbuffer().nbytes > 0: + raise NotImplementedError("writing to existing in-memory object is not supported") + + # Create in-memory directory to contain auxiliary files. + # Prefix with "pyogrio_" so it is clear the directory was created by pyogrio. + memfilename = f"pyogrio_{uuid4().hex}" VSIMkdir(f"/vsimem/{memfilename}".encode("UTF-8"), 0666) # file extension is required for some drivers, set it based on driver metadata @@ -40,10 +47,6 @@ cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): path = f"/vsimem/{memfilename}/{memfilename}{ext}" - # check for existing bytes - if path_or_fp.getbuffer().nbytes > 0: - raise NotImplementedError("writing to existing in-memory object is not supported") - return path @@ -54,7 +57,8 @@ cdef str read_buffer_to_vsimem(bytes bytes_buffer): will be prefixed with /vsizip/ and suffixed with .zip to enable proper reading by GDAL. - Caller is responsible for deleting the in-memory file via delete_vsimem_file(). + Caller is responsible for deleting the in-memory file via + vsimem_rmtree_toplevel(). Parameters ---------- @@ -65,12 +69,15 @@ cdef str read_buffer_to_vsimem(bytes bytes_buffer): is_zipped = len(bytes_buffer) > 4 and bytes_buffer[:4].startswith(b"PK\x03\x04") ext = ".zip" if is_zipped else "" - path = f"/vsimem/{uuid4().hex}{ext}" + # Prefix with "pyogrio_" so it is clear the file was created by pyogrio. + path = f"/vsimem/pyogrio_{uuid4().hex}{ext}" # Create an in-memory object that references bytes_buffer # NOTE: GDAL does not copy the contents of bytes_buffer; it must remain # in scope through the duration of using this file - vsi_handle = VSIFileFromMemBuffer(path.encode("UTF-8"), bytes_buffer, num_bytes, 0) + vsi_handle = VSIFileFromMemBuffer( + path.encode("UTF-8"), bytes_buffer, num_bytes, 0 + ) if vsi_handle == NULL: raise OSError("failed to read buffer into in-memory file") @@ -88,8 +95,8 @@ cdef read_vsimem_to_buffer(str path, object out_buffer): """Copy bytes from in-memory file to buffer This will automatically unlink the in-memory file pointed to by path; caller - is still responsible for calling delete_vsimem_file() to cleanup any other - files contained in the in-memory directory. + is still responsible for calling vsimem_rmtree_toplevel() to cleanup any + other files contained in the in-memory directory. Parameters: ----------- @@ -118,23 +125,154 @@ cdef read_vsimem_to_buffer(str path, object out_buffer): CPLFree(vsi_buffer) -cdef delete_vsimem_file(str path): - """ Recursively delete in-memory path or directory containing path +cpdef vsimem_rmtree_toplevel(str path): + """Remove the toplevel file or toplevel directory containing the file. - This is used for final cleanup of an in-memory dataset, which may have been - created within a directory to contain sibling files. + This is used for final cleanup of an in-memory dataset. The path can point + to either: + - a toplevel file (directly in /vsimem/). + - a file in a directory, with possibly some sibling files. + - a zip file, which apparently is reported as a directory by VSI_ISDIR. + + Except for the first case, the toplevel directory (direct subdirectory of + /vsimem/) will be determined and will be removed recursively. Additional VSI handlers may be chained to the left of /vsimem/ in path and will be ignored. + Even though it is only meant for "internal use", the function is declared + as cpdef, so it can be called from tests as well. + Parameters: ----------- path : str path to in-memory file + """ + cdef VSIStatBufL st_buf if "/vsimem/" not in path: - return + raise ValueError(f"Path is not a /vsimem/ path: '{path}'") + + # Determine the toplevel directory of the file + mempath_parts = path.split("/vsimem/")[1].split("/") + if len(mempath_parts) == 0: + raise OSError("Removing /vsimem/ is not supported") + + toplevel_path = f"/vsimem/{mempath_parts[0]}" + + if not VSIStatL(toplevel_path.encode("UTF-8"), &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + if VSI_ISDIR(st_buf.st_mode): + errcode = VSIRmdirRecursive(toplevel_path.encode("UTF-8")) + else: + errcode = VSIUnlink(toplevel_path.encode("UTF-8")) + + if errcode != 0: + raise OSError(f"Error removing '{path}': {errcode=}") + + +def ogr_vsi_listtree(str path, str pattern=None): + """Recursively list the contents in a vsi directory. + + An fnmatch pattern can be specified to filter the directories/files + returned. + + Parameters: + ----------- + path : str + Path to the vsi directory to be listed. + pattern : str + Fnmatch pattern to filter results. - root = "/vsimem/" + path.split("/vsimem/")[1].split("/")[0] - VSIRmdirRecursive(root.encode("UTF-8")) + """ + cdef const char *path_c + cdef int n + cdef char** papszFiles + cdef VSIStatBufL st_buf + + try: + path_b = path.encode("utf-8") + except UnicodeDecodeError: + path_b = path + path_c = path_b + if not VSIStatL(path_c, &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + if not VSI_ISDIR(st_buf.st_mode): + raise NotADirectoryError(f"Path is not a directory: '{path}'") + + papszFiles = VSIReadDirRecursive(path_c) + n = CSLCount(papszFiles) + files = [] + for i in range(n): + files.append(papszFiles[i].decode("utf-8")) + CSLDestroy(papszFiles) + + # Apply filter pattern + if pattern is not None: + files = fnmatch.filter(files, pattern) + + # Prepend files with the base path + if not path.endswith("/"): + path = f"{path}/" + files = [f"{path}{file}" for file in files] + + return files + + +def ogr_vsi_rmtree(str path): + """Recursively remove vsi directory. + + Parameters: + ----------- + path : str + path to the vsi directory to be removed. + + """ + cdef const char *path_c + cdef VSIStatBufL st_buf + + try: + path_b = path.encode("utf-8") + except UnicodeDecodeError: + path_b = path + path_c = path_b + if not VSIStatL(path_c, &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + if not VSI_ISDIR(st_buf.st_mode): + raise NotADirectoryError(f"Path is not a directory: '{path}'") + if path.endswith("/vsimem") or path.endswith("/vsimem/"): + raise OSError("Removing /vsimem/ is not supported") + + errcode = VSIRmdirRecursive(path_c) + + if errcode != 0: + raise OSError(f"Error in rmtree of '{path}': {errcode=}") + + +def ogr_vsi_unlink(str path): + """Remove vsi file. + + Parameters: + ----------- + path : str + path to the vsi file to be removed. + + """ + cdef const char *path_c + cdef VSIStatBufL st_buf + + try: + path_b = path.encode("utf-8") + except UnicodeDecodeError: + path_b = path + path_c = path_b + if not VSIStatL(path_c, &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + if VSI_ISDIR(st_buf.st_mode): + raise IsADirectoryError(f"Path is a directory: '{path}'") + + errcode = VSIUnlink(path_c) + + if errcode != 0: + raise OSError(f"Error removing '{path}': {errcode=}") diff --git a/pyogrio/core.py b/pyogrio/core.py index a51de125..95ee03f1 100644 --- a/pyogrio/core.py +++ b/pyogrio/core.py @@ -1,3 +1,5 @@ +from pathlib import Path +from typing import Union from pyogrio._env import GDALEnv from pyogrio.util import ( get_vsi_path_or_buffer, @@ -22,6 +24,12 @@ ) from pyogrio._err import _register_error_handler from pyogrio._io import ogr_list_layers, ogr_read_bounds, ogr_read_info + from pyogrio._vsi import ( + ogr_vsi_listtree, + ogr_vsi_rmtree, + ogr_vsi_unlink, + vsimem_rmtree_toplevel, + ) _init_gdal_data() _init_proj_data() @@ -318,3 +326,76 @@ def get_gdal_data_path(): str, or None if data directory was not found """ return _get_gdal_data_path() + + +def vsi_listtree(path: Union[str, Path], pattern: str = None): + """Recursively list the contents in a vsi directory. + + An fnmatch pattern can be specified to filter the directories/files + returned. + + Parameters: + ----------- + path : str or pathlib.Path + Path to the vsi directory to be listed. + pattern : str, optional + Fnmatch pattern to filter results. + + """ + if isinstance(path, Path): + path = path.as_posix() + + return ogr_vsi_listtree(path, pattern=pattern) + + +def vsi_rmtree(path: Union[str, Path]): + """Recursively remove vsi directory. + + Parameters: + ----------- + path : str or pathlib.Path + path to the vsi directory to be removed. + + """ + if isinstance(path, Path): + path = path.as_posix() + + ogr_vsi_rmtree(path) + + +def vsi_unlink(path: Union[str, Path]): + """Remove a vsi file. + + Parameters + ---------- + path : str or pathlib.Path + path to vsimem file to be removed + + """ + if isinstance(path, Path): + path = path.as_posix() + + ogr_vsi_unlink(path) + + +def _vsimem_rmtree_toplevel(path: Union[str, Path]): + """Remove the parent directory of the file path recursively. + + This is used for final cleanup of an in-memory dataset, which may have been + created within a directory to contain sibling files. + + Additional VSI handlers may be chained to the left of /vsimem/ in path and + will be ignored. + + Remark: function is defined here to be able to run tests on it. + + Parameters: + ----------- + path : str or pathlib.Path + path to in-memory file + + """ + if isinstance(path, Path): + path = path.as_posix() + + vsimem_rmtree_toplevel(path) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index a0cb4ee5..1eeab3f9 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -562,7 +562,7 @@ def _get_write_path_driver(path, driver, append=False): ) else: - path = vsi_path(str(path)) + path = vsi_path(path) if driver is None: driver = detect_write_driver(path) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index d562a4d1..871a7847 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -16,6 +16,7 @@ HAS_PYARROW, HAS_SHAPELY, ) +from pyogrio.core import vsi_rmtree from pyogrio.raw import read, write _data_dir = Path(__file__).parent.resolve() / "fixtures" @@ -35,6 +36,15 @@ ALL_EXTS = [".fgb", ".geojson", ".geojsonl", ".gpkg", ".shp"] +START_FID = { + ".fgb": 0, + ".geojson": 0, + ".geojsonl": 0, + ".geojsons": 0, + ".gpkg": 1, + ".shp": 0, +} + def pytest_report_header(config): drivers = ", ".join( @@ -122,6 +132,21 @@ def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): return path, f"/vsizip/{path}/{naturalearth_lowres.name}" +@pytest.fixture(scope="function") +def naturalearth_lowres_vsimem(naturalearth_lowres): + """Write naturalearth_lowres to a vsimem file for vsi tests""" + + meta, _, geometry, field_data = read(naturalearth_lowres) + dst_path = Path(f"/vsimem/pyogrio_test/{naturalearth_lowres.stem}.gpkg") + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + + write(dst_path, geometry, field_data, **meta) + yield dst_path + + vsi_rmtree(dst_path.parent) + + @pytest.fixture(scope="session") def test_fgdb_vsi(): return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip" diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index c058af3b..cef98b18 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -18,6 +18,7 @@ list_layers, get_gdal_config_option, set_gdal_config_options, + vsi_listtree, ) from pyogrio.raw import open_arrow, read_arrow, write, write_arrow from pyogrio.errors import DataSourceError, FieldError, DataLayerError @@ -161,6 +162,9 @@ def test_read_arrow_vsi(naturalearth_lowres_vsi): table = read_arrow(naturalearth_lowres_vsi[1])[1] assert len(table) == 177 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_read_arrow_bytes(geojson_bytes): meta, table = read_arrow(geojson_bytes) @@ -168,12 +172,18 @@ def test_read_arrow_bytes(geojson_bytes): assert meta["fields"].shape == (5,) assert len(table) == 3 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_read_arrow_nonseekable_bytes(nonseekable_bytes): meta, table = read_arrow(nonseekable_bytes) assert meta["fields"].shape == (0,) assert len(table) == 1 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_read_arrow_filelike(geojson_filelike): meta, table = read_arrow(geojson_filelike) @@ -181,6 +191,9 @@ def test_read_arrow_filelike(geojson_filelike): assert meta["fields"].shape == (5,) assert len(table) == 3 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_open_arrow_pyarrow(naturalearth_lowres): with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader): @@ -884,6 +897,9 @@ def test_write_memory_driver_required(naturalearth_lowres): geometry_name=meta["geometry_name"] or "wkb_geometry", ) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @requires_arrow_write_api @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"]) @@ -990,6 +1006,9 @@ def test_write_open_file_handle(tmp_path, naturalearth_lowres): geometry_name=meta["geometry_name"] or "wkb_geometry", ) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @requires_arrow_write_api def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text): diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index ef73ea41..1d87b5ad 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -1,3 +1,5 @@ +from pathlib import Path + import numpy as np from numpy import array_equal, allclose import pytest @@ -5,6 +7,7 @@ from pyogrio import ( __gdal_version__, __gdal_geos_version__, + detect_write_driver, list_drivers, list_layers, read_bounds, @@ -12,11 +15,15 @@ set_gdal_config_options, get_gdal_config_option, get_gdal_data_path, + vsi_listtree, + vsi_rmtree, + vsi_unlink, ) -from pyogrio.core import detect_write_driver from pyogrio._compat import GDAL_GE_38 +from pyogrio.core import _vsimem_rmtree_toplevel from pyogrio.errors import DataSourceError, DataLayerError -from pyogrio.tests.conftest import prepare_testfile, requires_shapely +from pyogrio.raw import read, write +from pyogrio.tests.conftest import START_FID, prepare_testfile, requires_shapely from pyogrio._env import GDALEnv @@ -151,7 +158,12 @@ def test_list_drivers(): assert len(drivers) == len(expected) -def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi): +def test_list_layers( + naturalearth_lowres, + naturalearth_lowres_vsi, + naturalearth_lowres_vsimem, + test_fgdb_vsi, +): assert array_equal( list_layers(naturalearth_lowres), [["naturalearth_lowres", "Polygon"]] ) @@ -160,6 +172,11 @@ def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi list_layers(naturalearth_lowres_vsi[1]), [["naturalearth_lowres", "Polygon"]] ) + assert array_equal( + list_layers(naturalearth_lowres_vsimem), + [["naturalearth_lowres", "MultiPolygon"]], + ) + # Measured 3D is downgraded to plain 3D during read # Make sure this warning is raised with pytest.warns( @@ -198,22 +215,18 @@ def test_list_layers_filelike(geojson_filelike): assert layers[0, 0] == "test" -def test_read_bounds(naturalearth_lowres): - fids, bounds = read_bounds(naturalearth_lowres) - assert fids.shape == (177,) - assert bounds.shape == (4, 177) - - assert fids[0] == 0 - # Fiji; wraps antimeridian - assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088]) - +@pytest.mark.parametrize( + "testfile", + ["naturalearth_lowres", "naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"], +) +def test_read_bounds(testfile, request): + path = request.getfixturevalue(testfile) + path = path if not isinstance(path, tuple) else path[1] -def test_read_bounds_vsi(naturalearth_lowres_vsi): - fids, bounds = read_bounds(naturalearth_lowres_vsi[1]) + fids, bounds = read_bounds(path) assert fids.shape == (177,) assert bounds.shape == (4, 177) - - assert fids[0] == 0 + assert fids[0] == START_FID[Path(path).suffix] # Fiji; wraps antimeridian assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088]) @@ -299,12 +312,9 @@ def test_read_bounds_bbox(naturalearth_lowres_all_ext): fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10)) assert fids.shape == (2,) - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, [34, 35]) # PAN, CRI - else: - # fid in other formats is 0-based - assert array_equal(fids, [33, 34]) # PAN, CRI + fids_expected = np.array([33, 34]) # PAN, CRI + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) assert bounds.shape == (4, 2) assert allclose( @@ -369,12 +379,8 @@ def test_read_bounds_mask(naturalearth_lowres_all_ext, mask, expected): fids = read_bounds(naturalearth_lowres_all_ext, mask=mask)[0] - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, np.array(expected) + 1) - else: - # fid in other formats is 0-based - assert array_equal(fids, expected) + fids_expected = np.array(expected) + START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) @pytest.mark.skipif( @@ -390,21 +396,15 @@ def test_read_bounds_bbox_intersects_vs_envelope_overlaps(naturalearth_lowres_al if __gdal_geos_version__ is None: # bboxes for CAN, RUS overlap but do not intersect geometries assert fids.shape == (4,) - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, [4, 5, 19, 28]) # CAN, USA, RUS, MEX - else: - # fid in other formats is 0-based - assert array_equal(fids, [3, 4, 18, 27]) # CAN, USA, RUS, MEX + fids_expected = np.array([3, 4, 18, 27]) # CAN, USA, RUS, MEX + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) else: assert fids.shape == (2,) - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, [5, 28]) # USA, MEX - else: - # fid in other formats is 0-based - assert array_equal(fids, [4, 27]) # USA, MEX + fids_expected = np.array([4, 27]) # USA, MEX + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) @pytest.mark.parametrize("naturalearth_lowres", [".shp", ".gpkg"], indirect=True) @@ -443,8 +443,14 @@ def test_read_info(naturalearth_lowres): raise ValueError(f"test not implemented for ext {naturalearth_lowres.suffix}") -def test_read_info_vsi(naturalearth_lowres_vsi): - meta = read_info(naturalearth_lowres_vsi[1]) +@pytest.mark.parametrize( + "testfile", ["naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"] +) +def test_read_info_vsi(testfile, request): + path = request.getfixturevalue(testfile) + path = path if not isinstance(path, tuple) else path[1] + + meta = read_info(path) assert meta["fields"].shape == (5,) assert meta["features"] == 177 @@ -601,3 +607,133 @@ def test_error_handling_warning(capfd, naturalearth_lowres): read_info(naturalearth_lowres, INVALID="YES") assert capfd.readouterr().err == "" + + +def test_vsimem_listtree(naturalearth_lowres): + # Prepare test data in /vsimem + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") + test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg") + + write(test_file_path, geometry, field_data, **meta) + write(test_dir_path, geometry, field_data, **meta) + + # Check basic listtree + files = vsi_listtree("/vsimem/") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() in files + + # Check listtree with pattern + files = vsi_listtree("/vsimem/", pattern="pyogrio_dir_test*.gpkg") + assert test_file_path.as_posix() not in files + assert test_dir_path.as_posix() in files + + files = vsi_listtree("/vsimem/", pattern="pyogrio_test*.gpkg") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() not in files + + # Clean up + vsi_unlink(test_file_path) + vsi_rmtree(test_dir_path.parent) + + +def test_vsimem_rmtree_error(naturalearth_lowres_vsimem): + with pytest.raises(NotADirectoryError, match="Path is not a directory"): + vsi_rmtree(naturalearth_lowres_vsimem) + + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsi_rmtree("/vsimem/non-existent") + + with pytest.raises(OSError, match="Removing /vsimem/ is not supported"): + vsi_rmtree("/vsimem") + with pytest.raises(OSError, match="Removing /vsimem/ is not supported"): + vsi_rmtree("/vsimem/") + + # Verify that naturalearth_lowres_vsimem still exists. + assert naturalearth_lowres_vsimem.as_posix() in vsi_listtree("/vsimem") + + +def test_vsimem_rmtree_unlink(naturalearth_lowres): + """Test all basic functionalities of file handling in /vsimem/.""" + # Prepare test data in /vsimem + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_file_path = Path(f"/vsimem/{naturalearth_lowres.stem}.gpkg") + test_dir_path = Path(f"/vsimem/test_dir/{naturalearth_lowres.stem}.gpkg") + + write(test_file_path, geometry, field_data, **meta) + write(test_dir_path, geometry, field_data, **meta) + + # Check if everything was created properly with listtree + files = vsi_listtree("/vsimem/") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() in files + + # Remove test_dir and its contents + vsi_rmtree(test_dir_path.parent) + files = vsi_listtree("/vsimem/") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() not in files + + # Remove test_file + vsi_unlink(test_file_path) + + +def test_vsimem_unlink_error(naturalearth_lowres_vsimem): + with pytest.raises(IsADirectoryError, match="Path is a directory"): + vsi_unlink(naturalearth_lowres_vsimem.parent) + + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsi_unlink("/vsimem/non-existent.gpkg") + + +def test_vsimem_rmtree_toplevel(naturalearth_lowres): + # Prepare test data in /vsimem/ + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_dir_path = Path(f"/vsimem/test/{naturalearth_lowres.stem}.gpkg") + test_dir2_path = Path(f"/vsimem/test2/test2/{naturalearth_lowres.stem}.gpkg") + + write(test_dir_path, geometry, field_data, **meta) + write(test_dir2_path, geometry, field_data, **meta) + + # Check if everything was created properly with listtree + files = vsi_listtree("/vsimem/") + assert test_dir_path.as_posix() in files + assert test_dir2_path.as_posix() in files + + # Test deleting parent dir of file in single directory + _vsimem_rmtree_toplevel(test_dir_path) + files = vsi_listtree("/vsimem/") + assert test_dir_path.parent.as_posix() not in files + assert test_dir2_path.as_posix() in files + + # Test deleting toplevel dir of file in a subdirectory + _vsimem_rmtree_toplevel(test_dir2_path) + assert test_dir2_path.as_posix() not in vsi_listtree("/vsimem/") + + +def test_vsimem_rmtree_toplevel_error(naturalearth_lowres): + # Prepare test data in /vsimem + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") + + write(test_file_path, geometry, field_data, **meta) + assert test_file_path.as_posix() in vsi_listtree("/vsimem/") + + # Deleting parent dir of non-existent file should raise an error. + with pytest.raises(FileNotFoundError, match="Path does not exist"): + _vsimem_rmtree_toplevel("/vsimem/test/non-existent.gpkg") + + # File should still be there + assert test_file_path.as_posix() in vsi_listtree("/vsimem/") + + # Cleanup. + vsi_unlink(test_file_path) + assert test_file_path not in vsi_listtree("/vsimem/") diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index e5da2da4..840e0b02 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -8,7 +8,14 @@ import numpy as np import pytest -from pyogrio import list_layers, list_drivers, read_info, __gdal_version__ +from pyogrio import ( + list_layers, + list_drivers, + read_info, + vsi_listtree, + vsi_unlink, + __gdal_version__, +) from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError from pyogrio.geopandas import read_dataframe, write_dataframe, PANDAS_GE_20 from pyogrio.raw import ( @@ -18,6 +25,7 @@ from pyogrio.tests.conftest import ( ALL_EXTS, DRIVERS, + START_FID, requires_pyarrow_api, requires_arrow_write_api, requires_gdal_geos, @@ -346,12 +354,9 @@ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow): fid_as_index=True, **kwargs, ) - if naturalearth_lowres_all_ext.suffix in [".gpkg"]: - # File format where fid starts at 1 - assert_index_equal(df.index, pd.Index([3, 4], name="fid")) - else: - # File format where fid starts at 0 - assert_index_equal(df.index, pd.Index([2, 3], name="fid")) + fids_expected = pd.Index([2, 3], name="fid") + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert_index_equal(df.index, fids_expected) def test_read_fid_as_index_only(naturalearth_lowres, use_arrow): @@ -1536,7 +1541,7 @@ def test_write_read_null(tmp_path, use_arrow): def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow): path, _ = naturalearth_lowres_vsi - mem_path = f"/vsimem/{str(path)}" + mem_path = f"/vsimem/{path.name}" input = read_dataframe(path, use_arrow=use_arrow) assert len(input) == 177 @@ -1546,9 +1551,7 @@ def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow): result = read_dataframe(mem_path, use_arrow=use_arrow) assert len(result) == 177 finally: - # TODO: delete_vsimem_file isn't public (yet) - # delete_vsimem_file(mem_path) - pass + vsi_unlink(mem_path) @pytest.mark.parametrize( @@ -1938,6 +1941,9 @@ def test_write_memory(naturalearth_lowres, driver): check_dtype=not is_json, ) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_write_memory_driver_required(naturalearth_lowres): df = read_dataframe(naturalearth_lowres) @@ -1950,6 +1956,9 @@ def test_write_memory_driver_required(naturalearth_lowres): ): write_dataframe(df.head(1), buffer, driver=None, layer="test") + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"]) def test_write_memory_unsupported_driver(naturalearth_lowres, driver): @@ -1965,6 +1974,9 @@ def test_write_memory_unsupported_driver(naturalearth_lowres, driver): ): write_dataframe(df, buffer, driver=driver, layer="test") + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"]) def test_write_memory_append_unsupported(naturalearth_lowres, driver): @@ -1977,6 +1989,9 @@ def test_write_memory_append_unsupported(naturalearth_lowres, driver): ): write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_write_memory_existing_unsupported(naturalearth_lowres): df = read_dataframe(naturalearth_lowres) @@ -1988,6 +2003,9 @@ def test_write_memory_existing_unsupported(naturalearth_lowres): ): write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test") + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_write_open_file_handle(tmp_path, naturalearth_lowres): """Verify that writing to an open file handle is not currently supported""" @@ -2009,6 +2027,9 @@ def test_write_open_file_handle(tmp_path, naturalearth_lowres): with z.open("test.geojson", "w") as f: write_dataframe(df.head(1), f) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @pytest.mark.parametrize("ext", ["gpkg", "geojson"]) def test_non_utf8_encoding_io(tmp_path, ext, encoded_text): diff --git a/pyogrio/tests/test_path.py b/pyogrio/tests/test_path.py index 99c9a2cd..095114c4 100644 --- a/pyogrio/tests/test_path.py +++ b/pyogrio/tests/test_path.py @@ -32,6 +32,7 @@ def change_cwd(path): [ # local file paths that should be passed through as is ("data.gpkg", "data.gpkg"), + (Path("data.gpkg"), "data.gpkg"), ("/home/user/data.gpkg", "/home/user/data.gpkg"), (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"), ("file:///home/user/data.gpkg", "/home/user/data.gpkg"), @@ -84,6 +85,8 @@ def change_cwd(path): "s3://testing/test.zip!a/b/item.shp", "/vsizip/vsis3/testing/test.zip/a/b/item.shp", ), + ("/vsimem/data.gpkg", "/vsimem/data.gpkg"), + (Path("/vsimem/data.gpkg"), "/vsimem/data.gpkg"), ], ) def test_vsi_path(path, expected): @@ -335,9 +338,15 @@ def test_uri_s3_dataframe(aws_env_setup): assert len(df) == 67 -def test_get_vsi_path_or_buffer_obj_to_string(): - path = Path("/tmp/test.gpkg") - assert get_vsi_path_or_buffer(path) == str(path) +@pytest.mark.parametrize( + "path, expected", + [ + (Path("/tmp/test.gpkg"), str(Path("/tmp/test.gpkg"))), + (Path("/vsimem/test.gpkg"), "/vsimem/test.gpkg"), + ], +) +def test_get_vsi_path_or_buffer_obj_to_string(path, expected): + assert get_vsi_path_or_buffer(path) == expected def test_get_vsi_path_or_buffer_fixtures_to_string(tmp_path): diff --git a/pyogrio/util.py b/pyogrio/util.py index 4721ea94..1337c17a 100644 --- a/pyogrio/util.py +++ b/pyogrio/util.py @@ -1,6 +1,7 @@ from pathlib import Path import re import sys +from typing import Union from urllib.parse import urlparse from packaging.version import Version @@ -27,9 +28,10 @@ def get_vsi_path_or_buffer(path_or_buffer): str or bytes """ - # force path objects to string to specifically ignore their read method + # treat Path objects here already to ignore their read method + to avoid backslashes + # on Windows. if isinstance(path_or_buffer, Path): - return vsi_path(str(path_or_buffer)) + return vsi_path(path_or_buffer) if isinstance(path_or_buffer, bytes): return path_or_buffer @@ -46,11 +48,17 @@ def get_vsi_path_or_buffer(path_or_buffer): return vsi_path(str(path_or_buffer)) -def vsi_path(path: str) -> str: +def vsi_path(path: Union[str, Path]) -> str: """ Ensure path is a local path or a GDAL-compatible vsi path. """ + # Convert Path objects to string, but for vsi paths, keep posix style path. + if isinstance(path, Path): + if sys.platform == "win32" and path.as_posix().startswith("/vsi"): + path = path.as_posix() + else: + path = str(path) # path is already in GDAL format if path.startswith("/vsi"): From 8f7733d069271a373f36a9a179f0794e07884431 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 9 Aug 2024 23:41:48 +0200 Subject: [PATCH 03/33] Update CHANGES.md --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 99c837ae..9a151a89 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,11 @@ # CHANGELOG +## 0.10.0 (yyyy-mm-dd) + +### Improvements + +- Add support to read, write, list and remove `/vsimem/` files (#457) + ## 0.9.1 (yyyy-mm-dd) ### Bug fixes From 949e30f2238a0cf49301730edd47eb6d154b1536 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 9 Aug 2024 23:42:24 +0200 Subject: [PATCH 04/33] temporarily add pyproj as dependency in CI --- ci/envs/latest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/envs/latest.yml b/ci/envs/latest.yml index 0643a67f..00f41faf 100644 --- a/ci/envs/latest.yml +++ b/ci/envs/latest.yml @@ -8,4 +8,5 @@ dependencies: - shapely>=2 - geopandas-base - pyarrow + - pyproj From 72c5248ac050ca23f2dfcae7458af5387613b685 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 01:53:06 +0200 Subject: [PATCH 05/33] Try to fix directory creation issue for gdal <3.8 --- pyogrio/_io.pyx | 5 +++++ pyogrio/_ogr.pxd | 1 + 2 files changed, 6 insertions(+) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 82e664e9..968219ca 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1932,6 +1932,11 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except CPLE_BaseError as exc: raise DataSourceError(str(exc)) + # If it is a vsimem file, create directories first to support GDAL < 3.8. Starting + # from GDAL 3.8 directories are created automatically. + if "/vsimem/" in path_c: + VSIMkdirRecursive(Path(path_c).parent.as_posix().encode("utf-8"), 0666) + # Create the dataset try: ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options)) diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 7b321e20..75f999d3 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -68,6 +68,7 @@ cdef extern from "cpl_vsi.h" nogil: unsigned char *VSIGetMemFileBuffer(const char *path, vsi_l_offset *data_len, int take_ownership) int VSIMkdir(const char *path, long mode) + int VSIMkdirRecursive(const char *path, long mode) int VSIRmdirRecursive(const char *pszDirname) From 4786a2bc460ad7ac7186ccfb57491192eec7ee94 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 02:03:21 +0200 Subject: [PATCH 06/33] Update _io.pyx --- pyogrio/_io.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 968219ca..880e31f6 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1934,8 +1934,8 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) # If it is a vsimem file, create directories first to support GDAL < 3.8. Starting # from GDAL 3.8 directories are created automatically. - if "/vsimem/" in path_c: - VSIMkdirRecursive(Path(path_c).parent.as_posix().encode("utf-8"), 0666) + if "/vsimem/" in str(path_c): + VSIMkdirRecursive(Path(str(path_c)).parent.as_posix().encode("utf-8"), 0666) # Create the dataset try: From c7c7a2cc695dda00e87ce868b6adece266c17d27 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 02:33:31 +0200 Subject: [PATCH 07/33] Update _io.pyx --- pyogrio/_io.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 880e31f6..4fcb872b 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1935,7 +1935,9 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) # If it is a vsimem file, create directories first to support GDAL < 3.8. Starting # from GDAL 3.8 directories are created automatically. if "/vsimem/" in str(path_c): - VSIMkdirRecursive(Path(str(path_c)).parent.as_posix().encode("utf-8"), 0666) + parent = Path(str(path_c)).parent.as_posix() + if not parent.endswith("/vsimem"): + VSIMkdirRecursive(parent.encode("utf-8"), 0666) # Create the dataset try: From 4d69b9acd08a45cf87fda29919262918b877c8bd Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 14:54:35 +0200 Subject: [PATCH 08/33] Support vsimem files in a directory for gdal<3.8 --- pyogrio/_io.pyx | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 4fcb872b..e7beb331 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1932,12 +1932,15 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except CPLE_BaseError as exc: raise DataSourceError(str(exc)) - # If it is a vsimem file, create directories first to support GDAL < 3.8. Starting - # from GDAL 3.8 directories are created automatically. - if "/vsimem/" in str(path_c): - parent = Path(str(path_c)).parent.as_posix() - if not parent.endswith("/vsimem"): - VSIMkdirRecursive(parent.encode("utf-8"), 0666) + # For /vsimem/ files, with GDAL >= 3.8 parent directories are created automatically. + IF CTE_GDAL_VERSION < (3, 8, 0): + path = path_c.decode("UTF-8") + if "/vsimem/" in path: + parent = f"{Path(path).parent.as_posix()}" + if not parent.endswith("/vsimem"): + retcode = VSIMkdirRecursive(parent.encode("UTF-8"), 0666) + if retcode != 0: + raise OSError(f"Could not create parent directory '{parent}'") # Create the dataset try: From 82b08d4b6e1ce2c687534d40ba1177a1f221ef34 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 14:58:11 +0200 Subject: [PATCH 09/33] Textual improvements --- pyogrio/_vsi.pyx | 14 ++++++-------- pyogrio/core.py | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index 5d4500cd..5386452b 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -172,7 +172,7 @@ cpdef vsimem_rmtree_toplevel(str path): raise OSError(f"Error removing '{path}': {errcode=}") -def ogr_vsi_listtree(str path, str pattern=None): +def ogr_vsi_listtree(str path, str pattern): """Recursively list the contents in a vsi directory. An fnmatch pattern can be specified to filter the directories/files @@ -183,7 +183,7 @@ def ogr_vsi_listtree(str path, str pattern=None): path : str Path to the vsi directory to be listed. pattern : str - Fnmatch pattern to filter results. + Pattern to filter results, in fnmatch format. """ cdef const char *path_c @@ -192,7 +192,7 @@ def ogr_vsi_listtree(str path, str pattern=None): cdef VSIStatBufL st_buf try: - path_b = path.encode("utf-8") + path_b = path.encode("UTF-8") except UnicodeDecodeError: path_b = path path_c = path_b @@ -205,7 +205,7 @@ def ogr_vsi_listtree(str path, str pattern=None): n = CSLCount(papszFiles) files = [] for i in range(n): - files.append(papszFiles[i].decode("utf-8")) + files.append(papszFiles[i].decode("UTF-8")) CSLDestroy(papszFiles) # Apply filter pattern @@ -233,7 +233,7 @@ def ogr_vsi_rmtree(str path): cdef VSIStatBufL st_buf try: - path_b = path.encode("utf-8") + path_b = path.encode("UTF-8") except UnicodeDecodeError: path_b = path path_c = path_b @@ -245,7 +245,6 @@ def ogr_vsi_rmtree(str path): raise OSError("Removing /vsimem/ is not supported") errcode = VSIRmdirRecursive(path_c) - if errcode != 0: raise OSError(f"Error in rmtree of '{path}': {errcode=}") @@ -263,7 +262,7 @@ def ogr_vsi_unlink(str path): cdef VSIStatBufL st_buf try: - path_b = path.encode("utf-8") + path_b = path.encode("UTF-8") except UnicodeDecodeError: path_b = path path_c = path_b @@ -273,6 +272,5 @@ def ogr_vsi_unlink(str path): raise IsADirectoryError(f"Path is a directory: '{path}'") errcode = VSIUnlink(path_c) - if errcode != 0: raise OSError(f"Error removing '{path}': {errcode=}") diff --git a/pyogrio/core.py b/pyogrio/core.py index 95ee03f1..c83b96ca 100644 --- a/pyogrio/core.py +++ b/pyogrio/core.py @@ -339,7 +339,7 @@ def vsi_listtree(path: Union[str, Path], pattern: str = None): path : str or pathlib.Path Path to the vsi directory to be listed. pattern : str, optional - Fnmatch pattern to filter results. + Pattern to filter results, in fnmatch format. """ if isinstance(path, Path): From 33f55a6dff1d5ab7095315621e281c8b984823ed Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 16:39:27 +0200 Subject: [PATCH 10/33] improve directory name for vsimem fixture --- pyogrio/tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 871a7847..bafeba0e 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -137,7 +137,8 @@ def naturalearth_lowres_vsimem(naturalearth_lowres): """Write naturalearth_lowres to a vsimem file for vsi tests""" meta, _, geometry, field_data = read(naturalearth_lowres) - dst_path = Path(f"/vsimem/pyogrio_test/{naturalearth_lowres.stem}.gpkg") + name = f"pyogrio_fixture_{naturalearth_lowres.stem}" + dst_path = Path(f"/vsimem/{name}/{name}.gpkg") meta["spatial_index"] = False meta["geometry_type"] = "MultiPolygon" From 8d281cfa21c11ce8acc79b412808e6d8daf256fa Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 16:41:01 +0200 Subject: [PATCH 11/33] Skip arrow write test for gdal < 3.8 --- pyogrio/tests/test_geopandas_io.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 840e0b02..c55b8086 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -65,6 +65,17 @@ def use_arrow(request): return request.param +@pytest.fixture( + scope="session", + params=[ + False, + pytest.param(True, marks=requires_arrow_write_api), + ], +) +def use_arrow_write(request): + return request.param + + @pytest.fixture(autouse=True) def skip_if_no_arrow_write_api(request): # automatically skip tests with use_arrow=True and that require Arrow write @@ -1539,16 +1550,16 @@ def test_write_read_null(tmp_path, use_arrow): assert result_gdf["object_str"][2] is None -def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow): +def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow_write): path, _ = naturalearth_lowres_vsi mem_path = f"/vsimem/{path.name}" - input = read_dataframe(path, use_arrow=use_arrow) + input = read_dataframe(path, use_arrow=use_arrow_write) assert len(input) == 177 try: - write_dataframe(input, mem_path, use_arrow=use_arrow) - result = read_dataframe(mem_path, use_arrow=use_arrow) + write_dataframe(input, mem_path, use_arrow=use_arrow_write) + result = read_dataframe(mem_path, use_arrow=use_arrow_write) assert len(result) == 177 finally: vsi_unlink(mem_path) From d9ad3d70b23a849bc7aef528bac5f1c23a76b98f Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 17:05:20 +0200 Subject: [PATCH 12/33] fix vsimem listlayers test --- pyogrio/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index bafeba0e..0e814c1f 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -142,7 +142,7 @@ def naturalearth_lowres_vsimem(naturalearth_lowres): meta["spatial_index"] = False meta["geometry_type"] = "MultiPolygon" - write(dst_path, geometry, field_data, **meta) + write(dst_path, geometry, field_data, layer="naturalearth_lowres", **meta) yield dst_path vsi_rmtree(dst_path.parent) From b728332000192d21909aab6180053f6db7d9b648 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 17:18:12 +0200 Subject: [PATCH 13/33] rename is_tmp_vsimem to use_tmp_vsimem --- pyogrio/_io.pyx | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index e7beb331..e6793e63 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1185,7 +1185,7 @@ def ogr_read( ): cdef int err = 0 - cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL @@ -1225,7 +1225,7 @@ def ogr_read( raise ValueError("'max_features' must be >= 0") try: - path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer if encoding: # for shapefiles, SHAPE_ENCODING must be set before opening the file @@ -1363,7 +1363,7 @@ def ogr_read( CPLFree(prev_shape_encoding) prev_shape_encoding = NULL - if is_tmp_vsimem: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) return ( @@ -1425,7 +1425,7 @@ def ogr_open_arrow( ): cdef int err = 0 - cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL @@ -1481,7 +1481,7 @@ def ogr_open_arrow( reader = None try: - path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer if encoding: override_shape_encoding = True @@ -1680,7 +1680,7 @@ def ogr_open_arrow( CPLFree(prev_shape_encoding) prev_shape_encoding = NULL - if is_tmp_vsimem: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) @@ -1698,7 +1698,7 @@ def ogr_read_bounds( object mask=None): cdef int err = 0 - cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef const char *where_c = NULL cdef OGRDataSourceH ogr_dataset = NULL @@ -1716,7 +1716,7 @@ def ogr_read_bounds( raise ValueError("'max_features' must be >= 0") try: - path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL) if layer is None: @@ -1745,7 +1745,7 @@ def ogr_read_bounds( GDALClose(ogr_dataset) ogr_dataset = NULL - if is_tmp_vsimem: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) return bounds @@ -1759,7 +1759,7 @@ def ogr_read_info( int force_feature_count=False, int force_total_bounds=False): - cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef OGRDataSourceH ogr_dataset = NULL @@ -1768,7 +1768,7 @@ def ogr_read_info( cdef bint override_shape_encoding = False try: - path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer if encoding: override_shape_encoding = True @@ -1827,19 +1827,19 @@ def ogr_read_info( if prev_shape_encoding != NULL: CPLFree(prev_shape_encoding) - if is_tmp_vsimem: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) return meta def ogr_list_layers(object path_or_buffer): - cdef bint is_tmp_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef OGRDataSourceH ogr_dataset = NULL try: - path = read_buffer_to_vsimem(path_or_buffer) if is_tmp_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL) layers = get_layer_names(ogr_dataset) @@ -1848,7 +1848,7 @@ def ogr_list_layers(object path_or_buffer): GDALClose(ogr_dataset) ogr_dataset = NULL - if is_tmp_vsimem: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) return layers @@ -2025,7 +2025,7 @@ cdef infer_field_types(list dtypes): cdef create_ogr_dataset_layer( str path, - bint is_tmp_vsimem, + bint use_tmp_vsimem, str layer, str driver, str crs, @@ -2059,7 +2059,7 @@ cdef create_ogr_dataset_layer( encoding : str Only used if `driver` is "ESRI Shapefile". If not None, it overrules the default shapefile encoding, which is "UTF-8" in pyogrio. - is_tmp_vsimem : bool + use_tmp_vsimem : bool Whether the file path is meant to save a temporary memory file to. Returns @@ -2089,7 +2089,7 @@ cdef create_ogr_dataset_layer( driver_c = driver_b # temporary in-memory dataset is always created from scratch - path_exists = os.path.exists(path) if not is_tmp_vsimem else False + path_exists = os.path.exists(path) if not use_tmp_vsimem else False if not layer: layer = os.path.splitext(os.path.split(path)[1])[0] @@ -2125,7 +2125,7 @@ cdef create_ogr_dataset_layer( raise exc # otherwise create from scratch - if is_tmp_vsimem: + if use_tmp_vsimem: VSIUnlink(path_c) else: os.unlink(path) From 36212182073f421f79fad8b1f392affd44a3019c Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 17:24:19 +0200 Subject: [PATCH 14/33] Rename is_tmp_vsi to use_tmp_vsimem --- pyogrio/_io.pyx | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index e6793e63..09ebac10 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -12,16 +12,16 @@ import math import os import sys import warnings +from pathlib import Path -from io import BytesIO from libc.stdint cimport uint8_t, uintptr_t from libc.stdlib cimport malloc, free from libc.string cimport strlen from libc.math cimport isnan from cpython.pycapsule cimport PyCapsule_GetPointer + cimport cython from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer -from pathlib import Path import numpy as np @@ -2263,7 +2263,7 @@ def ogr_write( cdef int num_records = -1 cdef int num_field_data = len(field_data) if field_data is not None else 0 cdef int num_fields = len(fields) if fields is not None else 0 - cdef bint is_tmp_vsi = False + cdef bint use_tmp_vsimem = False if num_fields != num_field_data: raise ValueError("field_data array needs to be same length as fields array") @@ -2306,14 +2306,14 @@ def ogr_write( # Setup in-memory handler if needed if isinstance(path_or_fp, str): path = path_or_fp - is_tmp_vsi = False + use_tmp_vsimem = False else: path = get_ogr_vsimem_write_path(path_or_fp, driver) - is_tmp_vsi = path.startswith('/vsimem/') + use_tmp_vsimem = path.startswith('/vsimem/') # Setup dataset and layer layer_created = create_ogr_dataset_layer( - path, is_tmp_vsi, layer, driver, crs, geometry_type, encoding, + path, use_tmp_vsimem, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata, &ogr_dataset, &ogr_layer, @@ -2518,7 +2518,7 @@ def ogr_write( raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") # copy in-memory file back to path_or_fp object - if is_tmp_vsi: + if use_tmp_vsimem: read_vsimem_to_buffer(path, path_or_fp) finally: @@ -2540,7 +2540,7 @@ def ogr_write( if ogr_dataset != NULL: ogr_close(ogr_dataset) - if is_tmp_vsi: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) @@ -2565,7 +2565,7 @@ def ogr_write_arrow( cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef char **options = NULL - cdef bint is_tmp_vsi = False + cdef bint use_tmp_vsimem = False cdef ArrowArrayStream* stream = NULL cdef ArrowSchema schema cdef ArrowArray array @@ -2577,13 +2577,13 @@ def ogr_write_arrow( # Setup in-memory handler if needed if isinstance(path_or_fp, str): path = path_or_fp - is_tmp_vsi = False + use_tmp_vsimem = False else: path = get_ogr_vsimem_write_path(path_or_fp, driver) - is_tmp_vsi = path.startswith('/vsimem/') + use_tmp_vsimem = path.startswith('/vsimem/') layer_created = create_ogr_dataset_layer( - path, is_tmp_vsi, layer, driver, crs, geometry_type, encoding, + path, use_tmp_vsimem, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata, &ogr_dataset, &ogr_layer, @@ -2644,7 +2644,7 @@ def ogr_write_arrow( raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") # copy in-memory file back to path_or_fp object - if is_tmp_vsi: + if use_tmp_vsimem: read_vsimem_to_buffer(path, path_or_fp) finally: @@ -2664,7 +2664,7 @@ def ogr_write_arrow( if ogr_dataset != NULL: ogr_close(ogr_dataset) - if is_tmp_vsi: + if use_tmp_vsimem: vsimem_rmtree_toplevel(path) From 1b150047644cabbf9882ec519774b8f9d74aac9a Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 10 Aug 2024 17:27:59 +0200 Subject: [PATCH 15/33] Rename input params in _ogr.pxd to path --- pyogrio/_ogr.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 75f999d3..8ce6a578 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -57,7 +57,7 @@ cdef extern from "cpl_vsi.h" nogil: long st_mode int st_mtime - int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf) + int VSIStatL(const char *path, VSIStatBufL *psStatBuf) int VSI_ISDIR(int mode) char** VSIReadDirRecursive(const char *path) int VSIFCloseL(VSILFILE *fp) @@ -69,7 +69,7 @@ cdef extern from "cpl_vsi.h" nogil: int VSIMkdir(const char *path, long mode) int VSIMkdirRecursive(const char *path, long mode) - int VSIRmdirRecursive(const char *pszDirname) + int VSIRmdirRecursive(const char *path) cdef extern from "ogr_core.h": From 149a0ad77583b5817f1cdded9f02d649d680cb1e Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 21 Aug 2024 16:07:54 +0200 Subject: [PATCH 16/33] sort dependencies (to trigger tests) --- environment-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index c6de8e69..d92ac999 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -3,13 +3,13 @@ channels: - conda-forge dependencies: # Required - - numpy - libgdal-core + - numpy - shapely>=2 # Optional - geopandas-base - - pyproj - pyarrow + - pyproj # Specific for dev - cython - pre-commit From 9870fdfcebe68ed74f97c64cdf3abed57306129c Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 21 Aug 2024 16:18:02 +0200 Subject: [PATCH 17/33] Update lint.yml --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1d6e80f1..07bab840 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -5,6 +5,7 @@ on: branches: - main pull_request: + types: [opened, edited] workflow_dispatch: jobs: From e4ab48d062e886652ee3c34d2a6aa71ffdede344 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 10 Sep 2024 08:51:19 +0200 Subject: [PATCH 18/33] Apply feedback: small changes --- CHANGES.md | 2 +- pyogrio/_io.pyx | 2 +- pyogrio/_vsi.pyx | 33 +++++++++++++++++---------------- pyogrio/tests/test_core.py | 2 +- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e2e323b5..e7c805fa 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,7 @@ ### Improvements -- Add support to read, write, list and remove `/vsimem/` files (#457) +- Add support to read, write, list, and remove `/vsimem/` files (#457) ## 0.9.1 (yyyy-mm-dd) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 09ebac10..b7d8b84e 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1936,7 +1936,7 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) IF CTE_GDAL_VERSION < (3, 8, 0): path = path_c.decode("UTF-8") if "/vsimem/" in path: - parent = f"{Path(path).parent.as_posix()}" + parent = str(Path(path).parent.as_posix()) if not parent.endswith("/vsimem"): retcode = VSIMkdirRecursive(parent.encode("UTF-8"), 0666) if retcode != 0: diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index 5386452b..ff0c5cd6 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -126,15 +126,15 @@ cdef read_vsimem_to_buffer(str path, object out_buffer): cpdef vsimem_rmtree_toplevel(str path): - """Remove the toplevel file or toplevel directory containing the file. + """Remove the top-level file or top-level directory containing the file. This is used for final cleanup of an in-memory dataset. The path can point to either: - - a toplevel file (directly in /vsimem/). + - a top-level file (directly in /vsimem/). - a file in a directory, with possibly some sibling files. - a zip file, which apparently is reported as a directory by VSI_ISDIR. - Except for the first case, the toplevel directory (direct subdirectory of + Except for the first case, the top-level directory (direct subdirectory of /vsimem/) will be determined and will be removed recursively. Additional VSI handlers may be chained to the left of /vsimem/ in path and @@ -154,15 +154,16 @@ cpdef vsimem_rmtree_toplevel(str path): if "/vsimem/" not in path: raise ValueError(f"Path is not a /vsimem/ path: '{path}'") - # Determine the toplevel directory of the file + # Determine the top-level directory of the file mempath_parts = path.split("/vsimem/")[1].split("/") if len(mempath_parts) == 0: - raise OSError("Removing /vsimem/ is not supported") + raise OSError("path to in-memory file or directory is required") toplevel_path = f"/vsimem/{mempath_parts[0]}" if not VSIStatL(toplevel_path.encode("UTF-8"), &st_buf) == 0: raise FileNotFoundError(f"Path does not exist: '{path}'") + if VSI_ISDIR(st_buf.st_mode): errcode = VSIRmdirRecursive(toplevel_path.encode("UTF-8")) else: @@ -191,22 +192,22 @@ def ogr_vsi_listtree(str path, str pattern): cdef char** papszFiles cdef VSIStatBufL st_buf - try: - path_b = path.encode("UTF-8") - except UnicodeDecodeError: - path_b = path + path_b = path.encode("UTF-8") path_c = path_b + if not VSIStatL(path_c, &st_buf) == 0: raise FileNotFoundError(f"Path does not exist: '{path}'") if not VSI_ISDIR(st_buf.st_mode): raise NotADirectoryError(f"Path is not a directory: '{path}'") - papszFiles = VSIReadDirRecursive(path_c) - n = CSLCount(papszFiles) - files = [] - for i in range(n): - files.append(papszFiles[i].decode("UTF-8")) - CSLDestroy(papszFiles) + try: + papszFiles = VSIReadDirRecursive(path_c) + n = CSLCount(papszFiles) + files = [] + for i in range(n): + files.append(papszFiles[i].decode("UTF-8")) + finally: + CSLDestroy(papszFiles) # Apply filter pattern if pattern is not None: @@ -242,7 +243,7 @@ def ogr_vsi_rmtree(str path): if not VSI_ISDIR(st_buf.st_mode): raise NotADirectoryError(f"Path is not a directory: '{path}'") if path.endswith("/vsimem") or path.endswith("/vsimem/"): - raise OSError("Removing /vsimem/ is not supported") + raise OSError("path to in-memory file or directory is required") errcode = VSIRmdirRecursive(path_c) if errcode != 0: diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index a1f8f35e..0e69bc54 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -718,7 +718,7 @@ def test_vsimem_rmtree_toplevel(naturalearth_lowres): assert test_dir_path.parent.as_posix() not in files assert test_dir2_path.as_posix() in files - # Test deleting toplevel dir of file in a subdirectory + # Test deleting top-level dir of file in a subdirectory _vsimem_rmtree_toplevel(test_dir2_path) assert test_dir2_path.as_posix() not in vsi_listtree("/vsimem/") From 68a595a559097ba8d2e136489690ca2944040915 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 10 Sep 2024 08:53:35 +0200 Subject: [PATCH 19/33] Apply feedback: small change --- pyogrio/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/core.py b/pyogrio/core.py index fdbf42d5..e974d18d 100644 --- a/pyogrio/core.py +++ b/pyogrio/core.py @@ -338,7 +338,7 @@ def get_gdal_data_path(): def vsi_listtree(path: Union[str, Path], pattern: Optional[str] = None): - """Recursively list the contents in a vsi directory. + """Recursively list the contents of a VSI directory. An fnmatch pattern can be specified to filter the directories/files returned. From cfdfd7c2bf57fc2ebdd32c9748128f3850ca16c7 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 10 Sep 2024 13:01:18 +0200 Subject: [PATCH 20/33] Fix test --- pyogrio/tests/test_core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index 0e69bc54..72906a2b 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -652,9 +652,13 @@ def test_vsimem_rmtree_error(naturalearth_lowres_vsimem): with pytest.raises(FileNotFoundError, match="Path does not exist"): vsi_rmtree("/vsimem/non-existent") - with pytest.raises(OSError, match="Removing /vsimem/ is not supported"): + with pytest.raises( + OSError, match="path to in-memory file or directory is required" + ): vsi_rmtree("/vsimem") - with pytest.raises(OSError, match="Removing /vsimem/ is not supported"): + with pytest.raises( + OSError, match="path to in-memory file or directory is required" + ): vsi_rmtree("/vsimem/") # Verify that naturalearth_lowres_vsimem still exists. From a997cefe3bab9c8e9544d59d7e267998c8857987 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 11 Sep 2024 00:11:03 +0200 Subject: [PATCH 21/33] Let get_ogr_vsimem_write_path return if a tmp vsimem file is used --- pyogrio/_io.pyx | 14 ++------------ pyogrio/_vsi.pxd | 2 +- pyogrio/_vsi.pyx | 38 ++++++++++++++++++++++++-------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index b7d8b84e..b4156681 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -2304,12 +2304,7 @@ def ogr_write( try: # Setup in-memory handler if needed - if isinstance(path_or_fp, str): - path = path_or_fp - use_tmp_vsimem = False - else: - path = get_ogr_vsimem_write_path(path_or_fp, driver) - use_tmp_vsimem = path.startswith('/vsimem/') + path, use_tmp_vsimem = get_ogr_vsimem_write_path(path_or_fp, driver) # Setup dataset and layer layer_created = create_ogr_dataset_layer( @@ -2575,12 +2570,7 @@ def ogr_write_arrow( try: # Setup in-memory handler if needed - if isinstance(path_or_fp, str): - path = path_or_fp - use_tmp_vsimem = False - else: - path = get_ogr_vsimem_write_path(path_or_fp, driver) - use_tmp_vsimem = path.startswith('/vsimem/') + path, use_tmp_vsimem = get_ogr_vsimem_write_path(path_or_fp, driver) layer_created = create_ogr_dataset_layer( path, use_tmp_vsimem, layer, driver, crs, geometry_type, encoding, diff --git a/pyogrio/_vsi.pxd b/pyogrio/_vsi.pxd index a4960f25..1c464489 100644 --- a/pyogrio/_vsi.pxd +++ b/pyogrio/_vsi.pxd @@ -1,4 +1,4 @@ -cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver) +cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver) cdef str read_buffer_to_vsimem(bytes bytes_buffer) cdef read_vsimem_to_buffer(str path, object out_buffer) cpdef vsimem_rmtree_toplevel(str path) \ No newline at end of file diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index ff0c5cd6..76ed8464 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -10,27 +10,37 @@ from pyogrio._ogr cimport * from pyogrio._ogr import _get_driver_metadata_item -cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): - """ Return the original path or a /vsimem/ path - - If passed a io.BytesIO object, this will return a /vsimem/ path that can be - used to create a new in-memory file with an extension inferred from the driver - if possible. Path will be contained in an in-memory directory to contain - sibling files (though drivers that create sibling files are not supported for - in-memory files). - - Caller is responsible for deleting the directory via vsimem_rmtree_toplevel() +cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver): + """ Return the path to write to and whether it is a tmp vsimem filepath. + + If passed a io.BytesIO object to write to, a temporary vsimem file will be + used to be able to write the data directly to memory. + Hence, a tuple will be returned with a /vsimem/ path and True to indicate + the path will be to a tmp vsimem file. + The path will have an extension inferred from the driver if possible. Path + will be contained in an in-memory directory to contain sibling files + (though drivers that create sibling files are not supported for in-memory + files). + Caller is responsible for deleting the directory via + vsimem_rmtree_toplevel(). Parameters ---------- path_or_fp : str or io.BytesIO object driver : str - """ + Returns + ------- + tuple of (path, use_tmp_vsimem) + Tuple of the path to write to and a bool indicating if the path is a + temporary vsimem filepath. + + """ + # The write path is not a BytesIO object, so return path as-is if not isinstance(path_or_fp, BytesIO): - return path_or_fp + return (path_or_fp, False) - # check for existing bytes + # Check for existing bytes if path_or_fp.getbuffer().nbytes > 0: raise NotImplementedError("writing to existing in-memory object is not supported") @@ -47,7 +57,7 @@ cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): path = f"/vsimem/{memfilename}/{memfilename}{ext}" - return path + return (path, True) cdef str read_buffer_to_vsimem(bytes bytes_buffer): From d610f00fda3011c80707d30e1f80ea16d75d12e8 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 15 Sep 2024 09:27:01 +0200 Subject: [PATCH 22/33] Apply feedback, textual changes --- pyogrio/_vsi.pyx | 18 ++++++++++-------- pyogrio/core.py | 8 ++++---- pyogrio/tests/conftest.py | 4 ++-- pyogrio/tests/test_arrow.py | 3 ++- pyogrio/tests/test_core.py | 2 +- pyogrio/util.py | 6 +++--- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index 76ed8464..c2c9f5e2 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -14,7 +14,7 @@ cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver): """ Return the path to write to and whether it is a tmp vsimem filepath. If passed a io.BytesIO object to write to, a temporary vsimem file will be - used to be able to write the data directly to memory. + used to write the data directly to memory. Hence, a tuple will be returned with a /vsimem/ path and True to indicate the path will be to a tmp vsimem file. The path will have an extension inferred from the driver if possible. Path @@ -141,7 +141,7 @@ cpdef vsimem_rmtree_toplevel(str path): This is used for final cleanup of an in-memory dataset. The path can point to either: - a top-level file (directly in /vsimem/). - - a file in a directory, with possibly some sibling files. + - a file in a directory, which may include sibling files. - a zip file, which apparently is reported as a directory by VSI_ISDIR. Except for the first case, the top-level directory (direct subdirectory of @@ -184,7 +184,7 @@ cpdef vsimem_rmtree_toplevel(str path): def ogr_vsi_listtree(str path, str pattern): - """Recursively list the contents in a vsi directory. + """Recursively list the contents in a VSI directory. An fnmatch pattern can be specified to filter the directories/files returned. @@ -192,7 +192,7 @@ def ogr_vsi_listtree(str path, str pattern): Parameters: ----------- path : str - Path to the vsi directory to be listed. + Path to the VSI directory to be listed. pattern : str Pattern to filter results, in fnmatch format. @@ -232,12 +232,12 @@ def ogr_vsi_listtree(str path, str pattern): def ogr_vsi_rmtree(str path): - """Recursively remove vsi directory. + """Recursively remove VSI directory. Parameters: ----------- path : str - path to the vsi directory to be removed. + path to the VSI directory to be removed. """ cdef const char *path_c @@ -261,12 +261,12 @@ def ogr_vsi_rmtree(str path): def ogr_vsi_unlink(str path): - """Remove vsi file. + """Remove VSI file. Parameters: ----------- path : str - path to the vsi file to be removed. + path to the VSI file to be removed. """ cdef const char *path_c @@ -277,8 +277,10 @@ def ogr_vsi_unlink(str path): except UnicodeDecodeError: path_b = path path_c = path_b + if not VSIStatL(path_c, &st_buf) == 0: raise FileNotFoundError(f"Path does not exist: '{path}'") + if VSI_ISDIR(st_buf.st_mode): raise IsADirectoryError(f"Path is a directory: '{path}'") diff --git a/pyogrio/core.py b/pyogrio/core.py index e974d18d..5fcc8c93 100644 --- a/pyogrio/core.py +++ b/pyogrio/core.py @@ -346,7 +346,7 @@ def vsi_listtree(path: Union[str, Path], pattern: Optional[str] = None): Parameters ---------- path : str or pathlib.Path - Path to the vsi directory to be listed. + Path to the VSI directory to be listed. pattern : str, optional Pattern to filter results, in fnmatch format. @@ -358,12 +358,12 @@ def vsi_listtree(path: Union[str, Path], pattern: Optional[str] = None): def vsi_rmtree(path: Union[str, Path]): - """Recursively remove vsi directory. + """Recursively remove VSI directory. Parameters ---------- path : str or pathlib.Path - path to the vsi directory to be removed. + path to the VSI directory to be removed. """ if isinstance(path, Path): @@ -373,7 +373,7 @@ def vsi_rmtree(path: Union[str, Path]): def vsi_unlink(path: Union[str, Path]): - """Remove a vsi file. + """Remove a VSI file. Parameters ---------- diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index dd880829..d6bea86b 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -126,7 +126,7 @@ def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request): @pytest.fixture(scope="function") def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): - """Wrap naturalearth_lowres as a zip file for vsi tests""" + """Wrap naturalearth_lowres as a zip file for VSI tests""" path = tmp_path / f"{naturalearth_lowres.name}.zip" with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out: @@ -139,7 +139,7 @@ def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): @pytest.fixture(scope="function") def naturalearth_lowres_vsimem(naturalearth_lowres): - """Write naturalearth_lowres to a vsimem file for vsi tests""" + """Write naturalearth_lowres to a vsimem file for VSI tests""" meta, _, geometry, field_data = read(naturalearth_lowres) name = f"pyogrio_fixture_{naturalearth_lowres.stem}" diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 6ce0176c..0a89a92a 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -163,7 +163,8 @@ def test_read_arrow_vsi(naturalearth_lowres_vsi): table = read_arrow(naturalearth_lowres_vsi[1])[1] assert len(table) == 177 - # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + # Check temp file was cleaned up. Filter to files created by pyogrio, as GDAL keeps + # cache files in /vsimem/. assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index 72906a2b..dfa44aa5 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -620,7 +620,7 @@ def test_vsimem_listtree(naturalearth_lowres): meta, _, geometry, field_data = read(naturalearth_lowres) meta["spatial_index"] = False meta["geometry_type"] = "MultiPolygon" - test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") + test_file_path = Path("/vsimem/pyogrio_test_naturalearth_lowres.gpkg") test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg") write(test_file_path, geometry, field_data, **meta) diff --git a/pyogrio/util.py b/pyogrio/util.py index cafc4307..fd227622 100644 --- a/pyogrio/util.py +++ b/pyogrio/util.py @@ -9,7 +9,7 @@ def get_vsi_path_or_buffer(path_or_buffer): - """Get vsi-prefixed path or bytes buffer depending on type of path_or_buffer. + """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer. If path_or_buffer is a bytes object, it will be returned directly and will be read into an in-memory dataset when passed to one of the Cython functions. @@ -51,8 +51,8 @@ def get_vsi_path_or_buffer(path_or_buffer): def vsi_path(path: Union[str, Path]) -> str: - """Ensure path is a local path or a GDAL-compatible vsi path.""" - # Convert Path objects to string, but for vsi paths, keep posix style path. + """Ensure path is a local path or a GDAL-compatible VSI path.""" + # Convert Path objects to string, but for VSI paths, keep posix style path. if isinstance(path, Path): if sys.platform == "win32" and path.as_posix().startswith("/vsi"): path = path.as_posix() From 63a8cb6fa5fd993cf2544fbe8f56054bd99ba1b6 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 15 Sep 2024 09:29:41 +0200 Subject: [PATCH 23/33] Apply feedback: textual change --- pyogrio/_vsi.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index c2c9f5e2..3982bc9d 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -142,7 +142,7 @@ cpdef vsimem_rmtree_toplevel(str path): to either: - a top-level file (directly in /vsimem/). - a file in a directory, which may include sibling files. - - a zip file, which apparently is reported as a directory by VSI_ISDIR. + - a zip file (reported as a directory by VSI_ISDIR). Except for the first case, the top-level directory (direct subdirectory of /vsimem/) will be determined and will be removed recursively. From 2c11d77a846d7d31f25cbc76802f07b510c6f595 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 15 Sep 2024 10:38:13 +0200 Subject: [PATCH 24/33] Parametrize test_vsimem_rmtree_error --- pyogrio/tests/test_core.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index dfa44aa5..c34e6190 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -645,24 +645,25 @@ def test_vsimem_listtree(naturalearth_lowres): vsi_rmtree(test_dir_path.parent) -def test_vsimem_rmtree_error(naturalearth_lowres_vsimem): - with pytest.raises(NotADirectoryError, match="Path is not a directory"): - vsi_rmtree(naturalearth_lowres_vsimem) - - with pytest.raises(FileNotFoundError, match="Path does not exist"): - vsi_rmtree("/vsimem/non-existent") +@pytest.mark.parametrize( + "path, exception, error_match", + [ + ("naturalearth_lowres_vsimem", NotADirectoryError, "Path is not a directory"), + ("/vsimem/non-existent", FileNotFoundError, "Path does not exist"), + ("/vsimem", OSError, "path to in-memory file or directory is required"), + ("/vsimem/", OSError, "path to in-memory file or directory is required"), + ], +) +def test_vsimem_rmtree_error(path, exception, error_match, request): + if not path.startswith("/vsimem"): + path = request.getfixturevalue(path) - with pytest.raises( - OSError, match="path to in-memory file or directory is required" - ): - vsi_rmtree("/vsimem") - with pytest.raises( - OSError, match="path to in-memory file or directory is required" - ): - vsi_rmtree("/vsimem/") + with pytest.raises(exception, match=error_match): + vsi_rmtree(path) # Verify that naturalearth_lowres_vsimem still exists. - assert naturalearth_lowres_vsimem.as_posix() in vsi_listtree("/vsimem") + if not path.startswith("/vsimem"): + assert path.as_posix() in vsi_listtree("/vsimem") def test_vsimem_rmtree_unlink(naturalearth_lowres): From 07b500a29e547b4ea3c21e5965c7207af7813ba7 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 15 Sep 2024 10:55:19 +0200 Subject: [PATCH 25/33] Add doc to test_get_vsi_path_or_buffer_obj_to_string --- pyogrio/tests/test_path.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyogrio/tests/test_path.py b/pyogrio/tests/test_path.py index 923588c8..9cc7943c 100644 --- a/pyogrio/tests/test_path.py +++ b/pyogrio/tests/test_path.py @@ -350,6 +350,12 @@ def test_uri_s3_dataframe(aws_env_setup): ], ) def test_get_vsi_path_or_buffer_obj_to_string(path, expected): + """Verify that get_vsi_path_or_buffer retains forward slashes in /vsimem paths. + + The /vsimem paths should keep forward slashes for GDAL to recognize them as such. + However, on Windows systems, forward slashes are by default replaced by backslashes, + so this test verifies that this doesn't happen for /vsimem paths. + """ assert get_vsi_path_or_buffer(path) == expected From 88066123099e283bf5c1a960f618e943e2397570 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 15 Sep 2024 11:09:59 +0200 Subject: [PATCH 26/33] Move vsimem_rmtree_toplevel to util + tests to new test_util.py --- pyogrio/core.py | 24 ---------------- pyogrio/tests/test_core.py | 50 ---------------------------------- pyogrio/tests/test_util.py | 56 ++++++++++++++++++++++++++++++++++++++ pyogrio/util.py | 25 +++++++++++++++++ 4 files changed, 81 insertions(+), 74 deletions(-) create mode 100644 pyogrio/tests/test_util.py diff --git a/pyogrio/core.py b/pyogrio/core.py index 5fcc8c93..1fa18fa4 100644 --- a/pyogrio/core.py +++ b/pyogrio/core.py @@ -30,7 +30,6 @@ ogr_vsi_listtree, ogr_vsi_rmtree, ogr_vsi_unlink, - vsimem_rmtree_toplevel, ) _init_gdal_data() @@ -385,26 +384,3 @@ def vsi_unlink(path: Union[str, Path]): path = path.as_posix() ogr_vsi_unlink(path) - - -def _vsimem_rmtree_toplevel(path: Union[str, Path]): - """Remove the parent directory of the file path recursively. - - This is used for final cleanup of an in-memory dataset, which may have been - created within a directory to contain sibling files. - - Additional VSI handlers may be chained to the left of /vsimem/ in path and - will be ignored. - - Remark: function is defined here to be able to run tests on it. - - Parameters - ---------- - path : str or pathlib.Path - path to in-memory file - - """ - if isinstance(path, Path): - path = path.as_posix() - - vsimem_rmtree_toplevel(path) diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index c34e6190..ebac0484 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -20,7 +20,6 @@ ) from pyogrio._compat import GDAL_GE_38 from pyogrio._env import GDALEnv -from pyogrio.core import _vsimem_rmtree_toplevel from pyogrio.errors import DataLayerError, DataSourceError from pyogrio.raw import read, write from pyogrio.tests.conftest import START_FID, prepare_testfile, requires_shapely @@ -699,52 +698,3 @@ def test_vsimem_unlink_error(naturalearth_lowres_vsimem): with pytest.raises(FileNotFoundError, match="Path does not exist"): vsi_unlink("/vsimem/non-existent.gpkg") - - -def test_vsimem_rmtree_toplevel(naturalearth_lowres): - # Prepare test data in /vsimem/ - meta, _, geometry, field_data = read(naturalearth_lowres) - meta["spatial_index"] = False - meta["geometry_type"] = "MultiPolygon" - test_dir_path = Path(f"/vsimem/test/{naturalearth_lowres.stem}.gpkg") - test_dir2_path = Path(f"/vsimem/test2/test2/{naturalearth_lowres.stem}.gpkg") - - write(test_dir_path, geometry, field_data, **meta) - write(test_dir2_path, geometry, field_data, **meta) - - # Check if everything was created properly with listtree - files = vsi_listtree("/vsimem/") - assert test_dir_path.as_posix() in files - assert test_dir2_path.as_posix() in files - - # Test deleting parent dir of file in single directory - _vsimem_rmtree_toplevel(test_dir_path) - files = vsi_listtree("/vsimem/") - assert test_dir_path.parent.as_posix() not in files - assert test_dir2_path.as_posix() in files - - # Test deleting top-level dir of file in a subdirectory - _vsimem_rmtree_toplevel(test_dir2_path) - assert test_dir2_path.as_posix() not in vsi_listtree("/vsimem/") - - -def test_vsimem_rmtree_toplevel_error(naturalearth_lowres): - # Prepare test data in /vsimem - meta, _, geometry, field_data = read(naturalearth_lowres) - meta["spatial_index"] = False - meta["geometry_type"] = "MultiPolygon" - test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") - - write(test_file_path, geometry, field_data, **meta) - assert test_file_path.as_posix() in vsi_listtree("/vsimem/") - - # Deleting parent dir of non-existent file should raise an error. - with pytest.raises(FileNotFoundError, match="Path does not exist"): - _vsimem_rmtree_toplevel("/vsimem/test/non-existent.gpkg") - - # File should still be there - assert test_file_path.as_posix() in vsi_listtree("/vsimem/") - - # Cleanup. - vsi_unlink(test_file_path) - assert test_file_path not in vsi_listtree("/vsimem/") diff --git a/pyogrio/tests/test_util.py b/pyogrio/tests/test_util.py new file mode 100644 index 00000000..52ef2a83 --- /dev/null +++ b/pyogrio/tests/test_util.py @@ -0,0 +1,56 @@ +from pathlib import Path + +from pyogrio import vsi_listtree, vsi_unlink +from pyogrio.raw import read, write +from pyogrio.util import vsimem_rmtree_toplevel + +import pytest + + +def test_vsimem_rmtree_toplevel(naturalearth_lowres): + # Prepare test data in /vsimem/ + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_dir_path = Path(f"/vsimem/test/{naturalearth_lowres.stem}.gpkg") + test_dir2_path = Path(f"/vsimem/test2/test2/{naturalearth_lowres.stem}.gpkg") + + write(test_dir_path, geometry, field_data, **meta) + write(test_dir2_path, geometry, field_data, **meta) + + # Check if everything was created properly with listtree + files = vsi_listtree("/vsimem/") + assert test_dir_path.as_posix() in files + assert test_dir2_path.as_posix() in files + + # Test deleting parent dir of file in single directory + vsimem_rmtree_toplevel(test_dir_path) + files = vsi_listtree("/vsimem/") + assert test_dir_path.parent.as_posix() not in files + assert test_dir2_path.as_posix() in files + + # Test deleting top-level dir of file in a subdirectory + vsimem_rmtree_toplevel(test_dir2_path) + assert test_dir2_path.as_posix() not in vsi_listtree("/vsimem/") + + +def test_vsimem_rmtree_toplevel_error(naturalearth_lowres): + # Prepare test data in /vsimem + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") + + write(test_file_path, geometry, field_data, **meta) + assert test_file_path.as_posix() in vsi_listtree("/vsimem/") + + # Deleting parent dir of non-existent file should raise an error. + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsimem_rmtree_toplevel("/vsimem/test/non-existent.gpkg") + + # File should still be there + assert test_file_path.as_posix() in vsi_listtree("/vsimem/") + + # Cleanup. + vsi_unlink(test_file_path) + assert test_file_path not in vsi_listtree("/vsimem/") diff --git a/pyogrio/util.py b/pyogrio/util.py index fd227622..b018ad79 100644 --- a/pyogrio/util.py +++ b/pyogrio/util.py @@ -7,6 +7,8 @@ from typing import Union from urllib.parse import urlparse +from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel + def get_vsi_path_or_buffer(path_or_buffer): """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer. @@ -220,3 +222,26 @@ def _mask_to_wkb(mask): raise ValueError("'mask' parameter must be a Shapely geometry") return shapely.to_wkb(mask) + + +def vsimem_rmtree_toplevel(path: Union[str, Path]): + """Remove the parent directory of the file path recursively. + + This is used for final cleanup of an in-memory dataset, which may have been + created within a directory to contain sibling files. + + Additional VSI handlers may be chained to the left of /vsimem/ in path and + will be ignored. + + Remark: function is defined here to be able to run tests on it. + + Parameters + ---------- + path : str or pathlib.Path + path to in-memory file + + """ + if isinstance(path, Path): + path = path.as_posix() + + _vsimem_rmtree_toplevel(path) From c3a01e5cbf206c837e42a39da2fb29a1488f2f71 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 15 Sep 2024 11:23:11 +0200 Subject: [PATCH 27/33] Fix test_vsimem_rmtree_error --- pyogrio/tests/test_core.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index ebac0484..94ed43cc 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -645,23 +645,28 @@ def test_vsimem_listtree(naturalearth_lowres): @pytest.mark.parametrize( - "path, exception, error_match", + "path, is_fixture, exception, error_match", [ - ("naturalearth_lowres_vsimem", NotADirectoryError, "Path is not a directory"), - ("/vsimem/non-existent", FileNotFoundError, "Path does not exist"), - ("/vsimem", OSError, "path to in-memory file or directory is required"), - ("/vsimem/", OSError, "path to in-memory file or directory is required"), + ( + "naturalearth_lowres_vsimem", + True, + NotADirectoryError, + "Path is not a directory", + ), + ("/vsimem/non-existent", False, FileNotFoundError, "Path does not exist"), + ("/vsimem", False, OSError, "path to in-memory file or directory is required"), + ("/vsimem/", False, OSError, "path to in-memory file or directory is required"), ], ) -def test_vsimem_rmtree_error(path, exception, error_match, request): - if not path.startswith("/vsimem"): +def test_vsimem_rmtree_error(path, is_fixture, exception, error_match, request): + if is_fixture: path = request.getfixturevalue(path) with pytest.raises(exception, match=error_match): vsi_rmtree(path) - # Verify that naturalearth_lowres_vsimem still exists. - if not path.startswith("/vsimem"): + # If path was a fixture, the temp file should still exists. + if is_fixture: assert path.as_posix() in vsi_listtree("/vsimem") From 774a6e8abba48035146313da3bc10962a444a667 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 16 Sep 2024 18:58:58 +0200 Subject: [PATCH 28/33] Remove 0.9.1 in changelog --- CHANGES.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e7c805fa..1bf5855b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,8 +6,6 @@ - Add support to read, write, list, and remove `/vsimem/` files (#457) -## 0.9.1 (yyyy-mm-dd) - ### Bug fixes - Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435). From bf2b45517f518409c95858bfd8f27fa387dc6dd7 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 16 Sep 2024 20:09:02 +0200 Subject: [PATCH 29/33] Feedback: textual changes + merge some vsimem tests --- .github/workflows/lint.yml | 1 - pyogrio/_vsi.pyx | 4 ++-- pyogrio/tests/test_core.py | 45 ++++++++++---------------------------- 3 files changed, 14 insertions(+), 36 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c71acbee..a5eb8080 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -5,7 +5,6 @@ on: branches: - main pull_request: - types: [opened, edited] workflow_dispatch: jobs: diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index 3982bc9d..757c2c78 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -2,7 +2,6 @@ import fnmatch from io import BytesIO from uuid import uuid4 - from libc.stdlib cimport malloc, free from libc.string cimport memcpy @@ -11,7 +10,7 @@ from pyogrio._ogr import _get_driver_metadata_item cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver): - """ Return the path to write to and whether it is a tmp vsimem filepath. + """Return the path to write to and whether it is a tmp vsimem filepath. If passed a io.BytesIO object to write to, a temporary vsimem file will be used to write the data directly to memory. @@ -21,6 +20,7 @@ cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver): will be contained in an in-memory directory to contain sibling files (though drivers that create sibling files are not supported for in-memory files). + Caller is responsible for deleting the directory via vsimem_rmtree_toplevel(). diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index 94ed43cc..a210e06f 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -614,36 +614,6 @@ def test_error_handling_warning(capfd, naturalearth_lowres): assert capfd.readouterr().err == "" -def test_vsimem_listtree(naturalearth_lowres): - # Prepare test data in /vsimem - meta, _, geometry, field_data = read(naturalearth_lowres) - meta["spatial_index"] = False - meta["geometry_type"] = "MultiPolygon" - test_file_path = Path("/vsimem/pyogrio_test_naturalearth_lowres.gpkg") - test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg") - - write(test_file_path, geometry, field_data, **meta) - write(test_dir_path, geometry, field_data, **meta) - - # Check basic listtree - files = vsi_listtree("/vsimem/") - assert test_file_path.as_posix() in files - assert test_dir_path.as_posix() in files - - # Check listtree with pattern - files = vsi_listtree("/vsimem/", pattern="pyogrio_dir_test*.gpkg") - assert test_file_path.as_posix() not in files - assert test_dir_path.as_posix() in files - - files = vsi_listtree("/vsimem/", pattern="pyogrio_test*.gpkg") - assert test_file_path.as_posix() in files - assert test_dir_path.as_posix() not in files - - # Clean up - vsi_unlink(test_file_path) - vsi_rmtree(test_dir_path.parent) - - @pytest.mark.parametrize( "path, is_fixture, exception, error_match", [ @@ -670,14 +640,14 @@ def test_vsimem_rmtree_error(path, is_fixture, exception, error_match, request): assert path.as_posix() in vsi_listtree("/vsimem") -def test_vsimem_rmtree_unlink(naturalearth_lowres): +def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres): """Test all basic functionalities of file handling in /vsimem/.""" # Prepare test data in /vsimem meta, _, geometry, field_data = read(naturalearth_lowres) meta["spatial_index"] = False meta["geometry_type"] = "MultiPolygon" - test_file_path = Path(f"/vsimem/{naturalearth_lowres.stem}.gpkg") - test_dir_path = Path(f"/vsimem/test_dir/{naturalearth_lowres.stem}.gpkg") + test_file_path = Path("/vsimem/pyogrio_test_naturalearth_lowres.gpkg") + test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg") write(test_file_path, geometry, field_data, **meta) write(test_dir_path, geometry, field_data, **meta) @@ -687,6 +657,15 @@ def test_vsimem_rmtree_unlink(naturalearth_lowres): assert test_file_path.as_posix() in files assert test_dir_path.as_posix() in files + # Check listtree with pattern + files = vsi_listtree("/vsimem/", pattern="pyogrio_dir_test*.gpkg") + assert test_file_path.as_posix() not in files + assert test_dir_path.as_posix() in files + + files = vsi_listtree("/vsimem/", pattern="pyogrio_test*.gpkg") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() not in files + # Remove test_dir and its contents vsi_rmtree(test_dir_path.parent) files = vsi_listtree("/vsimem/") From bc8edd2fcd611b8a883aced0cc927eaea3181c5d Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 16 Sep 2024 20:20:05 +0200 Subject: [PATCH 30/33] Rollback changes to test_vsimem_rmtree_error --- pyogrio/tests/test_core.py | 45 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index a210e06f..37b82057 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -614,32 +614,6 @@ def test_error_handling_warning(capfd, naturalearth_lowres): assert capfd.readouterr().err == "" -@pytest.mark.parametrize( - "path, is_fixture, exception, error_match", - [ - ( - "naturalearth_lowres_vsimem", - True, - NotADirectoryError, - "Path is not a directory", - ), - ("/vsimem/non-existent", False, FileNotFoundError, "Path does not exist"), - ("/vsimem", False, OSError, "path to in-memory file or directory is required"), - ("/vsimem/", False, OSError, "path to in-memory file or directory is required"), - ], -) -def test_vsimem_rmtree_error(path, is_fixture, exception, error_match, request): - if is_fixture: - path = request.getfixturevalue(path) - - with pytest.raises(exception, match=error_match): - vsi_rmtree(path) - - # If path was a fixture, the temp file should still exists. - if is_fixture: - assert path.as_posix() in vsi_listtree("/vsimem") - - def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres): """Test all basic functionalities of file handling in /vsimem/.""" # Prepare test data in /vsimem @@ -675,6 +649,25 @@ def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres): # Remove test_file vsi_unlink(test_file_path) +def test_vsimem_rmtree_error(naturalearth_lowres_vsimem): + with pytest.raises(NotADirectoryError, match="Path is not a directory"): + vsi_rmtree(naturalearth_lowres_vsimem) + + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsi_rmtree("/vsimem/non-existent") + + with pytest.raises( + OSError, match="path to in-memory file or directory is required" + ): + vsi_rmtree("/vsimem") + with pytest.raises( + OSError, match="path to in-memory file or directory is required" + ): + vsi_rmtree("/vsimem/") + + # Verify that naturalearth_lowres_vsimem still exists. + assert naturalearth_lowres_vsimem.as_posix() in vsi_listtree("/vsimem") + def test_vsimem_unlink_error(naturalearth_lowres_vsimem): with pytest.raises(IsADirectoryError, match="Path is a directory"): From 6436be120e4c26ade29977b607c9e4c51ef979b3 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 16 Sep 2024 20:20:27 +0200 Subject: [PATCH 31/33] Update test_core.py --- pyogrio/tests/test_core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index 37b82057..e0ff6d49 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -649,6 +649,7 @@ def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres): # Remove test_file vsi_unlink(test_file_path) + def test_vsimem_rmtree_error(naturalearth_lowres_vsimem): with pytest.raises(NotADirectoryError, match="Path is not a directory"): vsi_rmtree(naturalearth_lowres_vsimem) From bd51c707f7d6a7551cbd67d6a820ab4ead9a9c71 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 16 Sep 2024 20:50:26 +0200 Subject: [PATCH 32/33] Use requires_arrow_write_api to skip write arrow test --- pyogrio/tests/test_geopandas_io.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 810b0c27..c70ac820 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -66,17 +66,6 @@ def use_arrow(request): return request.param -@pytest.fixture( - scope="session", - params=[ - False, - pytest.param(True, marks=requires_arrow_write_api), - ], -) -def use_arrow_write(request): - return request.param - - @pytest.fixture(autouse=True) def skip_if_no_arrow_write_api(request): # automatically skip tests with use_arrow=True and that require Arrow write @@ -1584,16 +1573,17 @@ def test_write_read_null(tmp_path, use_arrow): assert result_gdf["object_str"][2] is None -def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow_write): +@pytest.mark.requires_arrow_write_api +def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow): path, _ = naturalearth_lowres_vsi mem_path = f"/vsimem/{path.name}" - input = read_dataframe(path, use_arrow=use_arrow_write) + input = read_dataframe(path, use_arrow=use_arrow) assert len(input) == 177 try: - write_dataframe(input, mem_path, use_arrow=use_arrow_write) - result = read_dataframe(mem_path, use_arrow=use_arrow_write) + write_dataframe(input, mem_path, use_arrow=use_arrow) + result = read_dataframe(mem_path, use_arrow=use_arrow) assert len(result) == 177 finally: vsi_unlink(mem_path) From 76cf286a98222a792fd4cc9a0fa40ab567e838f9 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 17 Sep 2024 00:18:28 +0200 Subject: [PATCH 33/33] Remove unreachable code regarding use_tmp_vsimem --- pyogrio/_io.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index b4156681..a9c934e5 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -2125,10 +2125,7 @@ cdef create_ogr_dataset_layer( raise exc # otherwise create from scratch - if use_tmp_vsimem: - VSIUnlink(path_c) - else: - os.unlink(path) + os.unlink(path) ogr_dataset = NULL