|
| 1 | +# exactexact wrappers |
| 2 | +from __future__ import annotations |
| 3 | + |
| 4 | +from typing import TYPE_CHECKING, Any, Literal |
| 5 | + |
| 6 | +import geopandas as gpd |
| 7 | +import numpy as np |
| 8 | +import xarray as xr |
| 9 | +from exactextract import exact_extract |
| 10 | +from exactextract.raster import NumPyRasterSource |
| 11 | + |
| 12 | +from .utils import geometries_as_dask_array, is_in_memory |
| 13 | + |
| 14 | +if TYPE_CHECKING: |
| 15 | + import dask.array |
| 16 | + import dask_geopandas |
| 17 | + |
| 18 | +MIN_CHUNK_SIZE = 2 # exactextract cannot handle arrays of size 1. |
| 19 | + |
| 20 | +__all__ = [ |
| 21 | + "coverage", |
| 22 | +] |
| 23 | + |
| 24 | + |
| 25 | +def get_dtype(coverage_weight, geometries): |
| 26 | + if coverage_weight.lower() == "fraction": |
| 27 | + dtype = "float64" |
| 28 | + elif coverage_weight.lower() == "none": |
| 29 | + dtype = np.min_scalar_type(len(geometries)) |
| 30 | + else: |
| 31 | + raise NotImplementedError |
| 32 | + return dtype |
| 33 | + |
| 34 | + |
| 35 | +def np_coverage( |
| 36 | + x: np.ndarray, |
| 37 | + y: np.ndarray, |
| 38 | + *, |
| 39 | + geometries: gpd.GeoDataFrame, |
| 40 | + coverage_weight: Literal["fraction", "none"] = "fraction", |
| 41 | +) -> np.ndarray[Any, Any]: |
| 42 | + """ |
| 43 | + Parameters |
| 44 | + ---------- |
| 45 | +
|
| 46 | + """ |
| 47 | + assert x.ndim == 1 |
| 48 | + assert y.ndim == 1 |
| 49 | + |
| 50 | + dtype = get_dtype(coverage_weight, geometries) |
| 51 | + |
| 52 | + xsize = x.size |
| 53 | + ysize = y.size |
| 54 | + |
| 55 | + # we need the top left corner, and the bottom right corner |
| 56 | + dx0 = (x[1] - x[0]) / 2 |
| 57 | + dx1 = (x[-1] - x[-2]) / 2 |
| 58 | + dy0 = np.abs(y[1] - y[0]) / 2 |
| 59 | + dy1 = np.abs(y[-1] - y[-2]) / 2 |
| 60 | + if y[0] > y[-1]: |
| 61 | + dy0, dy1 = dy1, dy0 |
| 62 | + |
| 63 | + shape = (ysize, xsize) |
| 64 | + raster = NumPyRasterSource( |
| 65 | + np.broadcast_to([1], shape), |
| 66 | + xmin=x.min() - dx0, |
| 67 | + xmax=x.max() + dx1, |
| 68 | + ymin=y.min() - dy0, |
| 69 | + ymax=y.max() + dy1, |
| 70 | + srs_wkt=geometries.crs.to_wkt(), |
| 71 | + ) |
| 72 | + result = exact_extract( |
| 73 | + rast=raster, |
| 74 | + vec=geometries, |
| 75 | + ops=["cell_id", f"coverage(coverage_weight={coverage_weight})"], |
| 76 | + output="pandas", |
| 77 | + # max_cells_in_memory=2*x.size * y.size |
| 78 | + ) |
| 79 | + out = np.zeros((len(geometries), *shape), dtype=dtype) |
| 80 | + # TODO: vectorized assignment? |
| 81 | + for i in range(len(geometries)): |
| 82 | + res = result.loc[i] |
| 83 | + # indices = np.unravel_index(res.cell_id, shape=shape) |
| 84 | + # out[(i, *indices)] = offset + i + 1 # 0 is the fill value |
| 85 | + out[i, ...].flat[res.cell_id] = res.coverage |
| 86 | + return out |
| 87 | + |
| 88 | + |
| 89 | +def coverage_np_dask_wrapper( |
| 90 | + x: np.ndarray, y: np.ndarray, geom_array: np.ndarray, coverage_weight, crs |
| 91 | +) -> np.ndarray: |
| 92 | + return np_coverage( |
| 93 | + x=x, |
| 94 | + y=y, |
| 95 | + geometries=gpd.GeoDataFrame(geometry=geom_array, crs=crs), |
| 96 | + coverage_weight=coverage_weight, |
| 97 | + ) |
| 98 | + |
| 99 | + |
| 100 | +def dask_coverage( |
| 101 | + x: dask.array.Array, |
| 102 | + y: dask.array.Array, |
| 103 | + *, |
| 104 | + geom_array: dask.array.Array, |
| 105 | + coverage_weight: Literal["fraction", "none"] = "fraction", |
| 106 | + crs: Any, |
| 107 | +) -> dask.array.Array: |
| 108 | + import dask.array |
| 109 | + |
| 110 | + if any(c == 1 for c in x.chunks) or any(c == 1 for c in y.chunks): |
| 111 | + raise ValueError("exactextract does not support a chunksize of 1. Please rechunk to avoid this") |
| 112 | + |
| 113 | + return dask.array.blockwise( |
| 114 | + coverage_np_dask_wrapper, |
| 115 | + "gji", |
| 116 | + x, |
| 117 | + "i", |
| 118 | + y, |
| 119 | + "j", |
| 120 | + geom_array, |
| 121 | + "g", |
| 122 | + crs=crs, |
| 123 | + coverage_weight=coverage_weight, |
| 124 | + dtype=get_dtype(coverage_weight, geom_array), |
| 125 | + ) |
| 126 | + |
| 127 | + |
| 128 | +def coverage( |
| 129 | + obj: xr.Dataset | xr.DataArray, |
| 130 | + geometries: gpd.GeoDataFrame | dask_geopandas.GeoDataFrame, |
| 131 | + *, |
| 132 | + xdim="x", |
| 133 | + ydim="y", |
| 134 | + coverage_weight="fraction", |
| 135 | +) -> xr.DataArray: |
| 136 | + """ |
| 137 | + Returns "coverage" fractions for each pixel for each geometry calculated using exactextract. |
| 138 | +
|
| 139 | + Parameters |
| 140 | + ---------- |
| 141 | + obj : xr.DataArray | xr.Dataset |
| 142 | + Xarray object used to extract the grid |
| 143 | + geometries: GeoDataFrame | DaskGeoDataFrame |
| 144 | + Geometries used for to calculate coverage |
| 145 | + xdim: str |
| 146 | + Name of the "x" dimension on ``obj``. |
| 147 | + ydim: str |
| 148 | + Name of the "y" dimension on ``obj``. |
| 149 | + coverage_weight: {"fraction", "none", "area_cartesian", "area_spherical_m2", "area_spherical_km2"} |
| 150 | + Weights to estimate, passed directly to exactextract. |
| 151 | +
|
| 152 | + Returns |
| 153 | + ------- |
| 154 | + DataArray |
| 155 | + 3D dataarray with coverage fraction. The additional dimension is "geometry". |
| 156 | + """ |
| 157 | + if "spatial_ref" not in obj.coords: |
| 158 | + raise ValueError("Xarray object must contain the `spatial_ref` variable.") |
| 159 | + # FIXME: assert obj.crs == geometries.crs |
| 160 | + if is_in_memory(obj=obj, geometries=geometries): |
| 161 | + out = np_coverage( |
| 162 | + x=obj[xdim].data, |
| 163 | + y=obj[ydim].data, |
| 164 | + geometries=geometries, |
| 165 | + coverage_weight=coverage_weight, |
| 166 | + ) |
| 167 | + geom_array = geometries.to_numpy().squeeze(axis=1) |
| 168 | + else: |
| 169 | + from dask.array import from_array |
| 170 | + |
| 171 | + geom_dask_array = geometries_as_dask_array(geometries) |
| 172 | + out = dask_coverage( |
| 173 | + x=from_array(obj[xdim].data, chunks=obj.chunksizes.get(xdim, -1)), |
| 174 | + y=from_array(obj[ydim].data, chunks=obj.chunksizes.get(ydim, -1)), |
| 175 | + geom_array=geom_dask_array, |
| 176 | + crs=geometries.crs, |
| 177 | + coverage_weight=coverage_weight, |
| 178 | + ) |
| 179 | + if isinstance(geometries, gpd.GeoDataFrame): |
| 180 | + geom_array = geometries.to_numpy().squeeze(axis=1) |
| 181 | + else: |
| 182 | + geom_array = geom_dask_array |
| 183 | + |
| 184 | + coverage = xr.DataArray( |
| 185 | + dims=("geometry", ydim, xdim), |
| 186 | + data=out, |
| 187 | + coords=xr.Coordinates( |
| 188 | + coords={ |
| 189 | + xdim: obj.coords[xdim], |
| 190 | + ydim: obj.coords[ydim], |
| 191 | + "spatial_ref": obj.spatial_ref, |
| 192 | + "geometry": geom_array, |
| 193 | + }, |
| 194 | + indexes={xdim: obj.xindexes[xdim], ydim: obj.xindexes[ydim]}, |
| 195 | + ), |
| 196 | + ) |
| 197 | + return coverage |
0 commit comments