Skip to content

Commit 7a01cdb

Browse files
committed
abstract repository object in OGC API
1 parent 9c19fc1 commit 7a01cdb

File tree

4 files changed

+109
-118
lines changed

4 files changed

+109
-118
lines changed

pycsw/core/repository.py

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Angelos Tzotsos <tzotsos@gmail.com>
66
# Ricardo Garcia Silva <ricardo.garcia.silva@gmail.com>
77
#
8-
# Copyright (c) 2024 Tom Kralidis
8+
# Copyright (c) 2025 Tom Kralidis
99
# Copyright (c) 2015 Angelos Tzotsos
1010
# Copyright (c) 2017 Ricardo Garcia Silva
1111
#
@@ -34,6 +34,7 @@
3434

3535
import inspect
3636
import logging
37+
from operator import itemgetter
3738
import os
3839
from time import sleep
3940

@@ -49,6 +50,7 @@
4950
from pycsw.core import util
5051
from pycsw.core.etree import etree
5152
from pycsw.core.etree import PARSER
53+
from pycsw.core.pygeofilter_evaluate import to_filter
5254

5355
LOGGER = logging.getLogger(__name__)
5456

@@ -409,17 +411,26 @@ def query_source(self, source):
409411
query = self.session.query(self.dataset).filter(column == source)
410412
return self._get_repo_filter(query).all()
411413

412-
def query(self, constraint, sortby=None, typenames=None,
414+
def query(self, ast=None, sortby=None, typenames=None,
413415
maxrecords=10, startposition=0):
414416
''' Query records from underlying repository '''
415417

416418
# run the raw query and get total
417-
if 'where' in constraint: # GetRecords with constraint
418-
LOGGER.debug('constraint detected')
419-
query = self.session.query(self.dataset).filter(
420-
text(constraint['where'])).params(self._create_values(constraint['values']))
421-
else: # GetRecords sans constraint
422-
LOGGER.debug('No constraint detected')
419+
if ast is not None: # GetRecords with pygeofilter AST
420+
LOGGER.debug('pygeofilter AST detected')
421+
LOGGER.debug('Transforming AST into filters')
422+
try:
423+
filters = to_filter(ast, self.dbtype, self.query_mappings)
424+
LOGGER.debug(f'Filter: {filters}')
425+
except Exception as err:
426+
msg = f'AST evaluator error: {str(err)}'
427+
LOGGER.exception(msg)
428+
raise RuntimeError(msg)
429+
430+
query = self.session.query(self.dataset).filter(filters)
431+
432+
else: # GetRecords sans pygeofilter AST
433+
LOGGER.debug('No pygeofilter AST detected')
423434
query = self.session.query(self.dataset)
424435

425436
total = self._get_repo_filter(query).count()
@@ -451,9 +462,51 @@ def query(self, constraint, sortby=None, typenames=None,
451462
query = query.order_by(sortby_column)
452463

453464
# always apply limit and offset
454-
return [str(total), self._get_repo_filter(query).limit(
465+
return [total, self._get_repo_filter(query).limit(
455466
maxrecords).offset(startposition).all()]
456467

468+
def get_facets(self, facets=[], ast=None) -> dict:
469+
"""
470+
Gets all facets for a given query
471+
472+
:returns: `dict` of facets
473+
"""
474+
475+
facets_results = {}
476+
477+
for facet in facets:
478+
LOGGER.debug(f'Running facet for {facet}')
479+
facetq = self.session.query(self.query_mappings[facet], self.func.count(facet)).group_by(facet)
480+
481+
if ast is not None:
482+
try:
483+
filters = to_filter(ast, self.dbtype, self.query_mappings)
484+
LOGGER.debug(f'Filter: {filters}')
485+
except Exception as err:
486+
msg = f'AST evaluator error: {str(err)}'
487+
LOGGER.exception(msg)
488+
raise RuntimeError(msg)
489+
490+
facetq = facetq.filter(filters)
491+
492+
LOGGER.debug('Writing facet query results')
493+
facets_results[facet] = {
494+
'type': 'terms',
495+
'property': facet,
496+
'buckets': []
497+
}
498+
499+
for fq in facetq.all():
500+
facets_results[facet]['buckets'].append({
501+
'value': fq[0],
502+
'count': fq[1]
503+
})
504+
505+
facets_results[facet]['buckets'].sort(key=itemgetter('count'), reverse=True)
506+
507+
return facets_results
508+
509+
457510
def insert(self, record, source, insert_date):
458511
''' Insert a record into the repository '''
459512

pycsw/ogc/api/records.py

Lines changed: 31 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131

3232
import json
3333
import logging
34-
from operator import itemgetter
3534
import os
3635
from urllib.parse import urlencode, quote
3736

@@ -134,8 +133,6 @@ def __init__(self, config: dict):
134133
rs_mod = __import__(rs_modname, globals(), locals(), [rs_clsname])
135134
rs_cls = getattr(rs_mod, rs_clsname)
136135

137-
print("JJJ", rs_cls)
138-
139136
try:
140137
connection_done = False
141138
max_attempts = 0
@@ -146,8 +143,6 @@ def __init__(self, config: dict):
146143
LOGGER.debug('Custom repository %s loaded (%s)', rs, self.repository.dbtype)
147144
connection_done = True
148145
except Exception as err:
149-
import traceback
150-
print(traceback.format_exc())
151146
LOGGER.debug(f'Repository not loaded retry connection {max_attempts}: {err}')
152147
max_attempts += 1
153148
except Exception as err:
@@ -572,7 +567,6 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'):
572567

573568
response = {
574569
'type': 'FeatureCollection',
575-
'facets': [],
576570
'features': [],
577571
'links': []
578572
}
@@ -723,60 +717,24 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'):
723717
LOGGER.debug('Detected CQL JSON; ignoring all other query predicates')
724718
query_parser = parse_cql2_json
725719

726-
LOGGER.debug(f'query parser: {query_parser}')
727-
728-
if query_parser is not None and cql_query != {}:
729-
LOGGER.debug('Parsing CQL into AST')
730-
LOGGER.debug(json_post_data)
731-
LOGGER.debug(cql_query)
732-
try:
733-
ast = query_parser(cql_query)
734-
LOGGER.debug(f'Abstract syntax tree: {ast}')
735-
except Exception as err:
736-
msg = f'CQL parsing error: {str(err)}'
737-
LOGGER.exception(msg)
738-
return self.get_exception(400, headers_, 'InvalidParameterValue', msg)
739-
740-
LOGGER.debug('Transforming AST into filters')
741-
try:
742-
filters = to_filter(ast, self.repository.dbtype, self.repository.query_mappings)
743-
LOGGER.debug(f'Filter: {filters}')
744-
except Exception as err:
745-
msg = f'CQL evaluator error: {str(err)}'
746-
LOGGER.exception(msg)
747-
return self.get_exception(400, headers_, 'InvalidParameterValue', msg)
748-
749-
query = self.repository.session.query(self.repository.dataset).filter(filters)
750-
if facets_requested:
751-
LOGGER.debug('Running facet query')
752-
facets_results = self.get_facets(filters)
753-
else:
754-
query = self.repository.session.query(self.repository.dataset)
755-
facets_results = self.get_facets()
756-
757-
if facets_requested:
758-
response['facets'] = facets_results
759-
else:
760-
response.pop('facets')
761-
762-
if 'sortby' in args:
720+
if 'sortby' in args and args['sortby'] is not None:
763721
LOGGER.debug('sortby specified')
764-
sortby = args['sortby']
722+
sortby = {
723+
'order': 'ASC',
724+
'propertyname': None
725+
}
765726

766-
if sortby is not None:
767727
LOGGER.debug('processing sortby')
768-
if sortby.startswith('-'):
769-
sortby = sortby.lstrip('-')
728+
if args['sortby'].startswith('-'):
729+
sortby['order'] = 'DESC'
730+
sortby['propertyname'] = args['sortby'].lstrip('-')
770731

771-
if sortby not in list(self.repository.query_mappings.keys()):
732+
if sortby['propertyname'] not in list(self.repository.query_mappings.keys()):
772733
msg = 'Invalid sortby property'
773734
LOGGER.exception(msg)
774735
return self.get_exception(400, headers_, 'InvalidParameterValue', msg)
775-
776-
if args['sortby'].startswith('-'):
777-
query = query.order_by(self.repository.query_mappings[sortby].desc())
778-
else:
779-
query = query.order_by(self.repository.query_mappings[sortby])
736+
else:
737+
sortby = None
780738

781739
if limit is None and 'limit' in args:
782740
limit = int(args['limit'])
@@ -794,13 +752,26 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'):
794752

795753
offset = int(args.get('offset', 0))
796754

797-
LOGGER.debug(f'Query: {query}')
798-
LOGGER.debug('Querying repository')
799-
count = query.count()
800-
LOGGER.debug(f'count: {count}')
801-
LOGGER.debug(f'limit: {limit}')
802-
LOGGER.debug(f'offset: {offset}')
803-
records = query.limit(limit).offset(offset).all()
755+
if query_parser is not None and cql_query != {}:
756+
LOGGER.debug('Parsing CQL into AST')
757+
LOGGER.debug(json_post_data)
758+
LOGGER.debug(cql_query)
759+
try:
760+
ast = query_parser(cql_query)
761+
LOGGER.debug(f'Abstract syntax tree: {ast}')
762+
except Exception as err:
763+
msg = f'CQL parsing error: {str(err)}'
764+
LOGGER.exception(msg)
765+
return self.get_exception(400, headers_, 'InvalidParameterValue', msg)
766+
767+
else:
768+
ast = None
769+
770+
count, records = self.repository.query(ast=ast, sortby=sortby, maxrecords=limit, startposition=offset)
771+
772+
if facets_requested:
773+
LOGGER.debug('Running facet query')
774+
response['facets'] = self.repository.get_facets(self.facets, ast)
804775

805776
returned = len(records)
806777

@@ -1135,39 +1106,6 @@ def get_all_collections(self) -> list:
11351106

11361107
return [default_collection] + [vc.identifier for vc in virtual_collections]
11371108

1138-
def get_facets(self, filters=None) -> dict:
1139-
"""
1140-
Gets all facets for a given query
1141-
1142-
:returns: `dict` of facets
1143-
"""
1144-
1145-
facets_results = {}
1146-
1147-
for facet in self.facets:
1148-
LOGGER.debug(f'Running facet for {facet}')
1149-
facetq = self.repository.session.query(self.repository.query_mappings[facet], self.repository.func.count(facet)).group_by(facet)
1150-
1151-
if filters is not None:
1152-
facetq = facetq.filter(filters)
1153-
1154-
LOGGER.debug('Writing facet query results')
1155-
facets_results[facet] = {
1156-
'type': 'terms',
1157-
'property': facet,
1158-
'buckets': []
1159-
}
1160-
1161-
for fq in facetq.all():
1162-
facets_results[facet]['buckets'].append({
1163-
'value': fq[0],
1164-
'count': fq[1]
1165-
})
1166-
1167-
facets_results[facet]['buckets'].sort(key=itemgetter('count'), reverse=True)
1168-
1169-
return facets_results
1170-
11711109

11721110
def record2json(record, url, collection, mode='ogcapi-records'):
11731111
"""

pycsw/ogc/csw/csw2.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Authors: Tom Kralidis <tomkralidis@gmail.com>
55
# Angelos Tzotsos <tzotsos@gmail.com>
66
#
7-
# Copyright (c) 2024 Tom Kralidis
7+
# Copyright (c) 2025 Tom Kralidis
88
# Copyright (c) 2015 Angelos Tzotsos
99
#
1010
# Permission is hereby granted, free of charge, to any person
@@ -852,7 +852,7 @@ def getrecords(self):
852852
remotecsw_matches = int(remotecsw.results['matches'])
853853
plural = 's' if remotecsw_matches != 1 else ''
854854
if remotecsw_matches > 0:
855-
matched = str(int(matched) + remotecsw_matches)
855+
matched = str(matched + remotecsw_matches)
856856
dsresults.append(etree.Comment(
857857
' %d result%s from %s ' %
858858
(remotecsw_matches, plural, fedcat)))
@@ -869,22 +869,22 @@ def getrecords(self):
869869
' %s\n\n%s ' % (error_string, err)))
870870
LOGGER.exception(error_string)
871871

872-
if int(matched) == 0:
872+
if matched == 0:
873873
returned = nextrecord = '0'
874874
elif int(self.parent.kvp['maxrecords']) == 0:
875875
returned = '0'
876876
nextrecord = '1'
877-
elif int(matched) < int(self.parent.kvp['startposition']):
877+
elif matched < int(self.parent.kvp['startposition']):
878878
returned = '0'
879879
nextrecord = '1'
880-
elif int(matched) <= int(self.parent.kvp['startposition']) + int(self.parent.kvp['maxrecords']) - 1:
881-
returned = str(int(matched) - int(self.parent.kvp['startposition']) + 1)
880+
elif matched <= int(self.parent.kvp['startposition']) + int(self.parent.kvp['maxrecords']) - 1:
881+
returned = str(matched - int(self.parent.kvp['startposition']) + 1)
882882
nextrecord = '0'
883883
else:
884884
returned = str(self.parent.kvp['maxrecords'])
885885
nextrecord = str(int(self.parent.kvp['startposition']) + int(self.parent.kvp['maxrecords']))
886886

887-
LOGGER.debug('Results: matched: %s, returned: %s, next: %s',
887+
LOGGER.debug('Results: matched: %d, returned: %s, next: %s',
888888
matched, returned, nextrecord)
889889

890890
node = etree.Element(util.nspath_eval('csw:GetRecordsResponse',
@@ -909,7 +909,7 @@ def getrecords(self):
909909

910910
searchresults = etree.SubElement(node,
911911
util.nspath_eval('csw:SearchResults', self.parent.context.namespaces),
912-
numberOfRecordsMatched=matched, numberOfRecordsReturned=returned,
912+
numberOfRecordsMatched=str(matched), numberOfRecordsReturned=returned,
913913
nextRecord=nextrecord, recordSchema=self.parent.kvp['outputschema'])
914914

915915
if self.parent.kvp['elementsetname'] is not None:

pycsw/ogc/csw/csw3.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# Authors: Tom Kralidis <tomkralidis@gmail.com>
55
#
6-
# Copyright (c) 2024 Tom Kralidis
6+
# Copyright (c) 2025 Tom Kralidis
77
#
88
# Permission is hereby granted, free of charge, to any person
99
# obtaining a copy of this software and associated documentation
@@ -836,7 +836,7 @@ def getrecords(self):
836836
# query repository
837837
LOGGER.info('Querying repository with RECORD ids: %s', self.parent.kvp['recordids'])
838838
results = self.parent.repository.query_ids(self.parent.kvp['recordids'].split(','))
839-
matched = str(len(results))
839+
matched = len(results)
840840
if len(results) == 0:
841841
return self.exceptionreport('NotFound', 'recordids',
842842
'No records found for \'%s\'' % self.parent.kvp['recordids'])
@@ -859,20 +859,20 @@ def getrecords(self):
859859
return self.exceptionreport('InvalidParameterValue', 'constraint',
860860
'Invalid query syntax')
861861

862-
if int(matched) == 0:
862+
if matched == 0:
863863
returned = nextrecord = '0'
864864
elif int(self.parent.kvp['maxrecords']) == 0:
865865
returned = nextrecord = '0'
866-
elif int(matched) < int(self.parent.kvp['startposition']):
866+
elif matched < int(self.parent.kvp['startposition']):
867867
returned = nextrecord = '0'
868-
elif int(matched) <= int(self.parent.kvp['startposition']) + int(self.parent.kvp['maxrecords']) - 1:
869-
returned = str(int(matched) - int(self.parent.kvp['startposition']) + 1)
868+
elif matched <= int(self.parent.kvp['startposition']) + int(self.parent.kvp['maxrecords']) - 1:
869+
returned = str(matched - int(self.parent.kvp['startposition']) + 1)
870870
nextrecord = '0'
871871
else:
872872
returned = str(self.parent.kvp['maxrecords'])
873873
nextrecord = str(int(self.parent.kvp['startposition']) + int(self.parent.kvp['maxrecords']))
874874

875-
LOGGER.debug('Results: matched: %s, returned: %s, next: %s',
875+
LOGGER.debug('Results: matched: %d, returned: %s, next: %s',
876876
matched, returned, nextrecord)
877877

878878
node = etree.Element(util.nspath_eval('csw30:GetRecordsResponse',
@@ -897,7 +897,7 @@ def getrecords(self):
897897

898898
searchresults = etree.SubElement(node,
899899
util.nspath_eval('csw30:SearchResults', self.parent.context.namespaces),
900-
numberOfRecordsMatched=matched, numberOfRecordsReturned=returned,
900+
numberOfRecordsMatched=str(matched), numberOfRecordsReturned=returned,
901901
nextRecord=nextrecord, recordSchema=self.parent.kvp['outputschema'],
902902
expires=timestamp, status=get_resultset_status(matched, nextrecord))
903903

0 commit comments

Comments
 (0)