Skip to content

Commit 86c5884

Browse files
authored
feat: change minimal duckdb version (#153)
* feat: bump duckdb version * chore: change readme * chore: change string to constant * ci: removed conftest.py from build process * chore: replace unary_union calls and silence warnings * feat: refactor duckdb parquet operations * chore: remove loading parquet extension * fix: change geometry reading for empty relations * chore: change lock hash * chore: add explicit geometry casting * chore: simplify tags loading with new duckdb engine
1 parent fea4cfd commit 86c5884

File tree

12 files changed

+158
-121
lines changed

12 files changed

+158
-121
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
12+
- Bumped minimal DuckDB version to `1.1.0`
13+
- Refactored geoparquet operations for compatibility with new DuckDB version
14+
- Excluded `conftest.py` file from the final library build
15+
- Replaced `unary_union` calls with `union_all()` on all GeoDataFrames
16+
- Silenced `pooch` library warnings regarding empty SHA hash
17+
1018
## [0.10.0] - 2024-09-23
1119

1220
### Changed

README.md

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ QuackOSM supports **Python >= 3.9**
7070

7171
Required:
7272

73-
- `duckdb (>=0.10.2, <1.1.0)`: For all DuckDB operations on PBF files
73+
- `duckdb (>=1.1.0)`: For all DuckDB operations on PBF files
7474

7575
- `pyarrow (>=16.0.0)`: For parquet files wrangling
7676

@@ -148,9 +148,7 @@ way/993121275 {'building': 'yes', 'name': ... POLYGON ((7.43214 43.7481...
148148
```python
149149
>>> import duckdb
150150
>>> duckdb.load_extension('spatial')
151-
>>> duckdb.read_parquet(str(gpq_path)).project(
152-
... "* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
153-
... ).order("feature_id")
151+
>>> duckdb.read_parquet(str(gpq_path)).order("feature_id")
154152
┌──────────────────┬──────────────────────┬──────────────────────────────────────────────┐
155153
│ feature_id │ tags │ geometry │
156154
│ varchar │ map(varchar, varch… │ geometry │
@@ -270,9 +268,7 @@ way/998561139 {'barrier': 'bollard', 'bicyc... LINESTRING (12.45828 41.9...
270268
```python
271269
>>> import duckdb
272270
>>> duckdb.load_extension('spatial')
273-
>>> duckdb.read_parquet(str(gpq_path)).project(
274-
... "* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
275-
... ).order("feature_id")
271+
>>> duckdb.read_parquet(str(gpq_path)).order("feature_id")
276272
┌──────────────────┬──────────────────────┬──────────────────────────────────────────────┐
277273
│ feature_id │ tags │ geometry │
278274
│ varchar │ map(varchar, varch… │ geometry │
@@ -388,9 +384,7 @@ relation/3256168 {'building': 'yes', 'type': ... POLYGON ((12.46061 41.907...
388384
```python
389385
>>> import duckdb
390386
>>> duckdb.load_extension('spatial')
391-
>>> duckdb.read_parquet(str(gpq_path)).project(
392-
... "* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
393-
... ).order("feature_id")
387+
>>> duckdb.read_parquet(str(gpq_path)).order("feature_id")
394388
┌──────────────────┬────────────────────────────┬──────────────────────────────┐
395389
│ feature_id │ tags │ geometry │
396390
│ varchar │ map(varchar, varchar) │ geometry │

pdm.lock

Lines changed: 35 additions & 35 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies = [
77
"geopandas>=0.6",
88
"shapely>=2",
99
"pyarrow>=16.0.0",
10-
"duckdb>=0.10.2,<1.1.0",
10+
"duckdb>=1.1.0",
1111
"geoarrow-pyarrow>=0.1.2",
1212
"geoarrow-pandas>=0.1.1",
1313
"typeguard>=3.0.0",
@@ -109,6 +109,9 @@ cli-dev = ["ipywidgets", "folium", "matplotlib>=3.2.0", "mapclassify"]
109109
[tool.pdm.scripts]
110110
post_install = "pre-commit install"
111111

112+
[tool.pdm.build]
113+
excludes = ["quackosm/conftest.py"]
114+
112115
[tool.black]
113116
line-length = 100
114117
target-version = ["py39", "py310", "py311", "py312"]

quackosm/cli.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
9696
import geopandas as gpd
9797

9898
gdf = gpd.read_file(value)
99-
return gdf.unary_union
99+
return gdf.union_all()
100100
except Exception:
101101
raise typer.BadParameter("Cannot parse provided geo file") from None
102102

@@ -140,7 +140,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
140140
geometries.append(
141141
box(minx=bounds["w"], miny=bounds["s"], maxx=bounds["e"], maxy=bounds["n"])
142142
)
143-
return gpd.GeoSeries(geometries).unary_union
143+
return gpd.GeoSeries(geometries).union_all()
144144
except Exception:
145145
raise typer.BadParameter(f"Cannot parse provided Geohash value: {geohash}") from None
146146

@@ -165,7 +165,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
165165
geometries.append(
166166
Polygon([coords[::-1] for coords in h3.cell_to_boundary(h3_cell.strip())])
167167
)
168-
return gpd.GeoSeries(geometries).unary_union
168+
return gpd.GeoSeries(geometries).union_all()
169169
except Exception as ex:
170170
raise typer.BadParameter(f"Cannot parse provided H3 values: {value}") from ex
171171

@@ -190,7 +190,7 @@ def convert(self, value, param=None, ctx=None): # type: ignore
190190
geometries.append(
191191
Polygon(s2.s2_to_geo_boundary(s2_index.strip(), geo_json_conformant=True))
192192
)
193-
return gpd.GeoSeries(geometries).unary_union
193+
return gpd.GeoSeries(geometries).union_all()
194194
except Exception:
195195
raise typer.BadParameter(f"Cannot parse provided S2 value: {s2_index}") from None
196196

quackosm/conftest.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import duckdb
1010
import pandas
1111
import pytest
12+
from pooch import get_logger as get_pooch_logger
1213
from pooch import retrieve
1314

1415
from quackosm.osm_extracts.extract import OsmExtractSource
@@ -53,13 +54,15 @@ def add_pbf_files(doctest_namespace): # type: ignore
5354
shutil.copy(pbf_file_path, geofabrik_pbf_file_path)
5455

5556

56-
5757
@pytest.fixture(autouse=True, scope="session")
5858
def download_osm_extracts_indexes(): # type: ignore
5959
"""Download OSM extract indexes files to cache."""
6060
download_directory = Path("cache")
6161
download_directory.mkdir(parents=True, exist_ok=True)
6262

63+
logger = get_pooch_logger()
64+
logger.setLevel("WARNING")
65+
6366
for osm_extract in OsmExtractSource:
6467
if osm_extract == OsmExtractSource.any:
6568
continue
@@ -81,6 +84,7 @@ def install_spatial_extension(): # type: ignore
8184
"""Install duckdb spatial extension."""
8285
duckdb.install_extension("spatial")
8386

87+
8488
@pytest.fixture(autouse=True, scope="session") # type: ignore
8589
def pandas_terminal_width() -> None:
8690
"""Change pandas dataframe display options."""

0 commit comments

Comments
 (0)