Skip to content

DOC/TST: document and test read_dataframe with on_invalid=fix #532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyogrio/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@
HAS_GDAL_GEOS = __gdal_geos_version__ is not None

HAS_SHAPELY = shapely is not None and Version(shapely.__version__) >= Version("2.0.0")
SHAPELY_GE_21 = shapely is not None and Version(shapely.__version__) >= Version("2.1.0")
3 changes: 3 additions & 0 deletions pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ def read_dataframe(
warning will be raised.
- **ignore**: invalid WKB geometries will be returned as ``None``
without a warning.
- **fix**: an effort is made to fix invalid input geometries (currently
just unclosed rings). If this is not possible, they are returned as
``None`` without a warning. Requires GEOS >= 3.11 and shapely >= 2.1.

arrow_to_pandas_kwargs : dict, optional (default: None)
When `use_arrow` is True, these kwargs will be passed to the `to_pandas`_
Expand Down
26 changes: 18 additions & 8 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
HAS_ARROW_WRITE_API,
HAS_PYPROJ,
PANDAS_GE_15,
SHAPELY_GE_21,
)
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
Expand Down Expand Up @@ -1803,23 +1804,29 @@ def test_write_geometry_z_types_auto(


@pytest.mark.parametrize(
"on_invalid, message",
"on_invalid, message, expected_wkt",
[
(
"warn",
"Invalid WKB: geometry is returned as None. IllegalArgumentException: "
"Invalid number of points in LinearRing found 2 - must be 0 or >=",
"Points of LinearRing do not form a closed linestring",
None,
),
("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
("ignore", None),
("raise", "Points of LinearRing do not form a closed linestring", None),
("ignore", None, None),
("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
],
)
def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
@pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
if on_invalid == "fix" and not SHAPELY_GE_21:
pytest.skip("on_invalid=fix not available for Shapely < 2.1")

if on_invalid == "raise":
handler = pytest.raises(shapely.errors.GEOSException, match=message)
elif on_invalid == "warn":
handler = pytest.warns(match=message)
elif on_invalid == "ignore":
elif on_invalid in ("fix", "ignore"):
handler = contextlib.nullcontext()
else:
raise ValueError(f"unknown value for on_invalid: {on_invalid}")
Expand All @@ -1833,7 +1840,7 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
"properties": {},
"geometry": {
"type": "Polygon",
"coordinates": [ [ [0, 0], [0, 0] ] ]
"coordinates": [ [ [0, 0], [0, 1] ] ]
}
}
]
Expand All @@ -1849,7 +1856,10 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
use_arrow=use_arrow,
on_invalid=on_invalid,
)
df.geometry.isnull().all()
if expected_wkt is None:
assert df.geometry.iloc[0] is None
else:
assert df.geometry.iloc[0].wkt == expected_wkt


def test_read_multisurface(multisurface_file, use_arrow):
Expand Down