Skip to content

Commit d2d8bf7

Browse files
authored
Package rasterization (#25)
1 parent 609af0f commit d2d8bf7

File tree

6 files changed

+310
-261
lines changed

6 files changed

+310
-261
lines changed

pyproject.toml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,23 @@ dependencies = [
3434
]
3535
dynamic=["version"]
3636
[project.optional-dependencies]
37+
dask = ["dask-geopandas"]
3738
rasterize = [
38-
"dask-geopandas",
3939
"odc-geo",
4040
"rasterio",
41-
"exactextract",
4241
"rioxarray",
4342
]
44-
io = [
45-
"xarray[io]"
43+
exactextract = ["exactextract"]
44+
test = [
45+
"geodatasets",
46+
"dask-geopandas",
47+
"odc-geo",
48+
"rasterio",
49+
"rioxarray",
50+
"exactextract",
51+
"netCDF4"
4652
]
53+
4754
[tool.hatch]
4855
version.source = "vcs"
4956

@@ -64,19 +71,14 @@ dependencies = [
6471
"coverage",
6572
"pytest",
6673
"pytest-cov",
67-
"geodatasets",
68-
]
69-
features = [
70-
"rasterize",
71-
"io",
7274
]
75+
features = ["test"]
7376

7477
[[tool.hatch.envs.test.matrix]]
7578
python = ["3.10", "3.13"]
7679

7780
[tool.hatch.envs.test.scripts]
7881
run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
79-
run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
8082
run-coverage-html = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src"
8183
run-pytest = "run-coverage --no-cov"
8284
run-verbose = "run-coverage --verbose"
@@ -126,6 +128,6 @@ ignore-words-list = "nd,nax,coo"
126128
skip = "*.html"
127129

128130
[tool.mypy]
129-
python_version = "3.11"
131+
python_version = "3.12"
130132
ignore_missing_imports = true
131133
namespace_packages = false

src/rasterix/rasterize/__init__.py

Whitespace-only changes.

src/rasterix/rasterize/exact.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# exactexact wrappers
2+
from __future__ import annotations
3+
4+
from typing import TYPE_CHECKING, Any, Literal
5+
6+
import geopandas as gpd
7+
import numpy as np
8+
import xarray as xr
9+
from exactextract import exact_extract
10+
from exactextract.raster import NumPyRasterSource
11+
12+
from .utils import geometries_as_dask_array, is_in_memory
13+
14+
if TYPE_CHECKING:
15+
import dask.array
16+
import dask_geopandas
17+
18+
MIN_CHUNK_SIZE = 2 # exactextract cannot handle arrays of size 1.
19+
20+
__all__ = [
21+
"coverage",
22+
]
23+
24+
25+
def get_dtype(coverage_weight, geometries):
26+
if coverage_weight.lower() == "fraction":
27+
dtype = "float64"
28+
elif coverage_weight.lower() == "none":
29+
dtype = np.min_scalar_type(len(geometries))
30+
else:
31+
raise NotImplementedError
32+
return dtype
33+
34+
35+
def np_coverage(
36+
x: np.ndarray,
37+
y: np.ndarray,
38+
*,
39+
geometries: gpd.GeoDataFrame,
40+
coverage_weight: Literal["fraction", "none"] = "fraction",
41+
) -> np.ndarray[Any, Any]:
42+
"""
43+
Parameters
44+
----------
45+
46+
"""
47+
assert x.ndim == 1
48+
assert y.ndim == 1
49+
50+
dtype = get_dtype(coverage_weight, geometries)
51+
52+
xsize = x.size
53+
ysize = y.size
54+
55+
# we need the top left corner, and the bottom right corner
56+
dx0 = (x[1] - x[0]) / 2
57+
dx1 = (x[-1] - x[-2]) / 2
58+
dy0 = np.abs(y[1] - y[0]) / 2
59+
dy1 = np.abs(y[-1] - y[-2]) / 2
60+
if y[0] > y[-1]:
61+
dy0, dy1 = dy1, dy0
62+
63+
shape = (ysize, xsize)
64+
raster = NumPyRasterSource(
65+
np.broadcast_to([1], shape),
66+
xmin=x.min() - dx0,
67+
xmax=x.max() + dx1,
68+
ymin=y.min() - dy0,
69+
ymax=y.max() + dy1,
70+
srs_wkt=geometries.crs.to_wkt(),
71+
)
72+
result = exact_extract(
73+
rast=raster,
74+
vec=geometries,
75+
ops=["cell_id", f"coverage(coverage_weight={coverage_weight})"],
76+
output="pandas",
77+
# max_cells_in_memory=2*x.size * y.size
78+
)
79+
out = np.zeros((len(geometries), *shape), dtype=dtype)
80+
# TODO: vectorized assignment?
81+
for i in range(len(geometries)):
82+
res = result.loc[i]
83+
# indices = np.unravel_index(res.cell_id, shape=shape)
84+
# out[(i, *indices)] = offset + i + 1 # 0 is the fill value
85+
out[i, ...].flat[res.cell_id] = res.coverage
86+
return out
87+
88+
89+
def coverage_np_dask_wrapper(
90+
x: np.ndarray, y: np.ndarray, geom_array: np.ndarray, coverage_weight, crs
91+
) -> np.ndarray:
92+
return np_coverage(
93+
x=x,
94+
y=y,
95+
geometries=gpd.GeoDataFrame(geometry=geom_array, crs=crs),
96+
coverage_weight=coverage_weight,
97+
)
98+
99+
100+
def dask_coverage(
101+
x: dask.array.Array,
102+
y: dask.array.Array,
103+
*,
104+
geom_array: dask.array.Array,
105+
coverage_weight: Literal["fraction", "none"] = "fraction",
106+
crs: Any,
107+
) -> dask.array.Array:
108+
import dask.array
109+
110+
if any(c == 1 for c in x.chunks) or any(c == 1 for c in y.chunks):
111+
raise ValueError("exactextract does not support a chunksize of 1. Please rechunk to avoid this")
112+
113+
return dask.array.blockwise(
114+
coverage_np_dask_wrapper,
115+
"gji",
116+
x,
117+
"i",
118+
y,
119+
"j",
120+
geom_array,
121+
"g",
122+
crs=crs,
123+
coverage_weight=coverage_weight,
124+
dtype=get_dtype(coverage_weight, geom_array),
125+
)
126+
127+
128+
def coverage(
129+
obj: xr.Dataset | xr.DataArray,
130+
geometries: gpd.GeoDataFrame | dask_geopandas.GeoDataFrame,
131+
*,
132+
xdim="x",
133+
ydim="y",
134+
coverage_weight="fraction",
135+
) -> xr.DataArray:
136+
"""
137+
Returns "coverage" fractions for each pixel for each geometry calculated using exactextract.
138+
139+
Parameters
140+
----------
141+
obj : xr.DataArray | xr.Dataset
142+
Xarray object used to extract the grid
143+
geometries: GeoDataFrame | DaskGeoDataFrame
144+
Geometries used for to calculate coverage
145+
xdim: str
146+
Name of the "x" dimension on ``obj``.
147+
ydim: str
148+
Name of the "y" dimension on ``obj``.
149+
coverage_weight: {"fraction", "none", "area_cartesian", "area_spherical_m2", "area_spherical_km2"}
150+
Weights to estimate, passed directly to exactextract.
151+
152+
Returns
153+
-------
154+
DataArray
155+
3D dataarray with coverage fraction. The additional dimension is "geometry".
156+
"""
157+
if "spatial_ref" not in obj.coords:
158+
raise ValueError("Xarray object must contain the `spatial_ref` variable.")
159+
# FIXME: assert obj.crs == geometries.crs
160+
if is_in_memory(obj=obj, geometries=geometries):
161+
out = np_coverage(
162+
x=obj[xdim].data,
163+
y=obj[ydim].data,
164+
geometries=geometries,
165+
coverage_weight=coverage_weight,
166+
)
167+
geom_array = geometries.to_numpy().squeeze(axis=1)
168+
else:
169+
from dask.array import from_array
170+
171+
geom_dask_array = geometries_as_dask_array(geometries)
172+
out = dask_coverage(
173+
x=from_array(obj[xdim].data, chunks=obj.chunksizes.get(xdim, -1)),
174+
y=from_array(obj[ydim].data, chunks=obj.chunksizes.get(ydim, -1)),
175+
geom_array=geom_dask_array,
176+
crs=geometries.crs,
177+
coverage_weight=coverage_weight,
178+
)
179+
if isinstance(geometries, gpd.GeoDataFrame):
180+
geom_array = geometries.to_numpy().squeeze(axis=1)
181+
else:
182+
geom_array = geom_dask_array
183+
184+
coverage = xr.DataArray(
185+
dims=("geometry", ydim, xdim),
186+
data=out,
187+
coords=xr.Coordinates(
188+
coords={
189+
xdim: obj.coords[xdim],
190+
ydim: obj.coords[ydim],
191+
"spatial_ref": obj.spatial_ref,
192+
"geometry": geom_array,
193+
},
194+
indexes={xdim: obj.xindexes[xdim], ydim: obj.xindexes[ydim]},
195+
),
196+
)
197+
return coverage

0 commit comments

Comments
 (0)