Skip to content

[Proposal] Allow to filter for xarray coordinates in EDR data sets #2006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions pygeoapi/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,17 @@ def describe_collections(api: API, request: APIRequest,
}
}

filter_dims = p.get_dims()
if filter_dims:
collection['filter_dims'] = {}
for key, value in filter_dims.items():
collection['filter_dims'][key] = {
'id': key,
'type': 'Dimension',
'name': value['title'],
'values': value['values']
}

for qt in p.get_query_types():
data_query = {
'link': {
Expand Down Expand Up @@ -1500,6 +1511,38 @@ def validate_bbox(value=None) -> list:
return bbox


def validate_filter_dims(query_string=None) -> dict:
if query_string is None:
LOGGER.debug('dims is empty')
return {}

if not isinstance(query_string, str):
msg = 'dimension query must be string'
LOGGER.debug(msg)
raise ValueError(msg)
checked = {}
for pair in query_string.split(','):
if ':' not in pair:
msg = """filter dimension and value must be separated by a colon ':' """ # noqa
LOGGER.debug(msg)
raise ValueError(msg)

key, value = map(str.strip, pair.split(':', 1))
if not key or not value:
msg = f"""Empty key or value in pair: '{pair}'"""
LOGGER.debug(msg)
raise ValueError(msg)

if key in checked:
msg = f"""Duplicate key found: '{key}'"""
LOGGER.debug(msg)
raise ValueError(msg)

checked[key] = value

return checked


def validate_datetime(resource_def, datetime_=None) -> str:
"""
Helper function to validate temporal parameter
Expand Down
8 changes: 7 additions & 1 deletion pygeoapi/api/environmental_data_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
)

from . import (APIRequest, API, F_COVERAGEJSON, F_HTML, F_JSON, F_JSONLD,
validate_datetime, validate_bbox)
validate_datetime, validate_bbox, validate_filter_dims)

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -298,6 +298,11 @@ def get_collection_edr_query(api: API, request: APIRequest,
if isinstance(parameternames, str):
parameternames = parameternames.split(',')

LOGGER.debug('Processing dims parameter')
dims = request.params.get('dims')
if dims:
dims = validate_filter_dims(dims)

bbox = None
if query_type in ['cube', 'locations']:
LOGGER.debug('Processing cube bbox')
Expand Down Expand Up @@ -364,6 +369,7 @@ def get_collection_edr_query(api: API, request: APIRequest,
format_=request.format,
datetime_=datetime_,
select_properties=parameternames,
dims=dims,
wkt=wkt,
z=z,
bbox=bbox,
Expand Down
15 changes: 15 additions & 0 deletions pygeoapi/openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,21 @@ def get_oas_30_parameters(cfg: dict, locale_: str):
'type': 'string'
}
},
'dims': {
'name': 'dims',
'in': 'query',
'description': 'Allows to select dims from multi dimensional EDR',
'required': False,
'style': 'form',
'explode': False,
'schema': {
'type': 'string',
'items': {
'type': 'string'
},
'format': 'dim1:value1,dim2:value2'
}
},
'bbox': {
'name': 'bbox',
'in': 'query',
Expand Down
1 change: 1 addition & 0 deletions pygeoapi/provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def __init__(self, provider_def):
self.properties = provider_def.get('properties', [])
self.file_types = provider_def.get('file_types', [])
self._fields = {}
self._dims = {}
self.filename = None

# for coverage providers
Expand Down
1 change: 1 addition & 0 deletions pygeoapi/provider/base_edr.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def query(self, **kwargs):
:param wkt: `shapely.geometry` WKT geometry
:param datetime_: temporal (datestamp or extent)
:param select_properties: list of parameters
:param dims: dims to select data from
:param z: vertical level(s)
:param format_: data format of output
:param bbox: bbox geometry (for cube queries)
Expand Down
23 changes: 23 additions & 0 deletions pygeoapi/provider/xarray_.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def __init__(self, provider_def):
self.axes = self._coverage_properties['axes']

self.get_fields()
self.get_dims()
except Exception as err:
LOGGER.warning(err)
raise ProviderConnectionError(err)
Expand All @@ -123,6 +124,28 @@ def get_fields(self):

return self._fields

def get_dims(self):
fields = [self.time_field, self.x_field, self.y_field]
if not self._dims:
for key, value in self._data.coords.items():
if key not in fields:
LOGGER.debug('Adding filterable dim')
dtype = value.dtype
if dtype.name.startswith('float'):
dtype = 'float'
elif dtype.name.startswith('int'):
dtype = 'int'
else:
dtype = 'str'
LOGGER.debug(f"""key: {key} with type: {type(value.values.tolist()[0])}""") # noqa
self._dims[key] = {
'type': type(value.values.tolist()[0]),
'title': value.attrs.get('long_name'),
'x-ogc-unit': value.attrs.get('units'),
'values': value.values.tolist()
}
return self._dims

def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326,
datetime_=None, format_='json', **kwargs):
"""
Expand Down
57 changes: 56 additions & 1 deletion pygeoapi/provider/xarray_edr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@

import numpy as np

from pygeoapi.provider.base import ProviderNoDataError, ProviderQueryError
from pygeoapi.provider.base import (ProviderNoDataError, ProviderQueryError,
ProviderInvalidQueryError)
from pygeoapi.provider.base_edr import BaseEDRProvider
from pygeoapi.provider.xarray_ import (
_to_datetime_string,
Expand Down Expand Up @@ -66,6 +67,7 @@ def position(self, **kwargs):
:param wkt: `shapely.geometry` WKT geometry
:param datetime_: temporal (datestamp or extent)
:param select_properties: list of parameters
:param dims: dict of dimensions to filter
:param z: vertical level(s)
:param format_: data format of output

Expand Down Expand Up @@ -96,6 +98,8 @@ def position(self, **kwargs):
LOGGER.debug('Processing parameter-name')
select_properties = kwargs.get('select_properties')

dims = kwargs.get('dims')

# example of fetching instance passed
# TODO: apply accordingly
instance = kwargs.get('instance')
Expand All @@ -114,6 +118,30 @@ def position(self, **kwargs):
else:
data = self._data

if dims:
string_query = {}
if isinstance(dims, dict):
for coord, level in dims.items():
if coord in self._dims:
if self._dims[coord]['type'](level) in self._dims[coord]['values']: # noqa
if self._dims[coord]['type'] == str:
string_query[coord] = self._dims[coord]['type'](level) # noqa
else:
query_params[coord] = self._dims[coord]['type'](level) # noqa
else:
raise ProviderInvalidQueryError(
user_msg=(
f"Invalid Value '{level}' for Dimension Parameter '{coord}'. " # noqa
f"Valid Values are '{self._dims[coord]['values']}'" # noqa
)
)

data = data.sel(string_query)
else:
raise ProviderInvalidQueryError(user_msg=f"""Invalid Dimension Parameter '{coord}'""") # noqa

LOGGER.debug(query_params)

if self.time_field in query_params:
remaining_query = {
key: val for key, val in query_params.items()
Expand Down Expand Up @@ -150,6 +178,7 @@ def position(self, **kwargs):
bbox = wkt.bounds
out_meta = {
'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]],
'dims': dims,
"time": time,
"driver": "xarray",
"height": height,
Expand Down Expand Up @@ -203,13 +232,38 @@ def cube(self, **kwargs):
if datetime_ is not None:
query_params[self.time_field] = self._make_datetime(datetime_)

dims = kwargs.get('dims')

LOGGER.debug(f'query parameters: {query_params}')
try:
if select_properties:
self._fields = {k: v for k, v in self._fields.items() if k in select_properties} # noqa
data = self._data[[*select_properties]]
else:
data = self._data

if dims:
string_query = {}
if isinstance(dims, dict):
for coord, level in dims.items():
if coord in self._dims:
if self._dims[coord]['type'](level) in self._dims[coord]['values']: # noqa
if self._dims[coord]['type'] == str:
string_query[coord] = self._dims[coord]['type'](level) # noqa
else:
query_params[coord] = self._dims[coord]['type'](level) # noqa
else:
raise ProviderInvalidQueryError(
user_msg=(
f"Invalid Value '{level}' for Dimension Parameter '{coord}'. " # noqa
f"Valid Values are '{self._dims[coord]['values']}'" # noqa
)
)

data = data.sel(string_query)
else:
raise ProviderInvalidQueryError(user_msg=f"""Invalid Dimension Parameter '{coord}'""") # noqa

data = data.sel(query_params)
data = _convert_float32_to_float64(data)
except KeyError:
Expand All @@ -226,6 +280,7 @@ def cube(self, **kwargs):
data.coords[self.x_field].values[-1],
data.coords[self.y_field].values[-1]
],
'dims': dims,
"time": time,
"driver": "xarray",
"height": height,
Expand Down
39 changes: 36 additions & 3 deletions tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@

from pygeoapi.api import (
API, APIRequest, FORMAT_TYPES, F_HTML, F_JSON, F_JSONLD, F_GZIP,
__version__, validate_bbox, validate_datetime, evaluate_limit,
validate_subset, landing_page, openapi_, conformance, describe_collections,
get_collection_schema,
__version__, validate_bbox, validate_filter_dims, validate_datetime,
evaluate_limit, validate_subset, landing_page, openapi_, conformance,
describe_collections, get_collection_schema,
)
from pygeoapi.util import yaml_load, get_api_rules, get_base_url

Expand Down Expand Up @@ -778,6 +778,39 @@ def test_validate_bbox():
validate_bbox('1,2,6,4,5,3')


def test_validate_filter_dims():
with pytest.raises(ValueError) as error:
validate_filter_dims(123)
assert error.type == ValueError
assert error.match('dimension query must be string')

assert validate_filter_dims('key1:val1') == {'key1': 'val1'}
with pytest.raises(ValueError) as error:
validate_filter_dims('key1val1')
assert error.type == ValueError
assert error.match("filter dimension and value must be separated by a colon ':' ") # noqa

assert validate_filter_dims('key1:val1,key2:val2') == {'key1': 'val1',
'key2': 'val2'}
with pytest.raises(ValueError) as error:
validate_filter_dims('key1:val1,key1:val2')
assert error.match("""Duplicate key found: 'key1'""")

with pytest.raises(ValueError) as error:
validate_filter_dims(':val1,key1:val2')
assert error.match("Empty key or value in pair: ':val1'")

with pytest.raises(ValueError) as error:
validate_filter_dims('key1:,key1:val2')
assert error.match("Empty key or value in pair: 'key1:'")

with pytest.raises(ValueError) as error:
validate_filter_dims('')
assert error.match("filter dimension and value must be separated by a colon ':' ") # noqa

assert validate_filter_dims(None) == {}


def test_validate_datetime():
config = yaml_load('''
temporal:
Expand Down