diff --git a/CHANGELOG.md b/CHANGELOG.md index 421e8315..780386b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added the ability to set timeout for Opensearch and Elasticsearch clients by setting the environmental variable `ES_TIMEOUT` [#408](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/408) +### Changed + +- Updated collection to index logic to support searching a large amount of indices [#412](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/412) + ## [v6.0.0] - 2025-06-22 ### Added diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 195950f3..16a8a83d 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -43,6 +43,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -520,6 +524,9 @@ async def execute_search( query = search.query.to_dict() if search.query else None index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) max_result_window = MAX_LIMIT diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index e4c88d85..c323b307 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -42,6 +42,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -532,6 +536,12 @@ async def execute_search( """ search_body: Dict[str, Any] = {} query = search.query.to_dict() if search.query else None + + index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) + if query: search_body["query"] = query @@ -544,8 +554,6 @@ async def execute_search( search_body["sort"] = sort if sort else DEFAULT_SORT - index_param = indices(collection_ids) - max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index dacbb590..97df5703 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -7,6 +7,8 @@ from stac_fastapi.sfeos_helpers.mappings import Geometry +ES_MAX_URL_LENGTH = 4096 + def apply_free_text_filter_shared( search: Any, free_text_queries: Optional[List[str]] @@ -83,3 +85,33 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: return {s.field: {"order": s.direction} for s in sortby} else: return None + + +def add_collections_to_body( + collection_ids: List[str], query: Optional[Dict[str, Any]] +) -> Dict[str, Any]: + """Add a list of collection ids to the body of a query. + + Args: + collection_ids (List[str]): A list of collections ids. + query (Optional[Dict[str, Any]]): The query to add collections to. If none, create a query that filters + the collection ids. + + Returns: + Dict[str, Any]: A query that contains a filter on the given collection ids. + + Notes: + This function is needed in the execute_search function when the size of the URL path will exceed the maximum of ES. + """ + index_filter = {"terms": {"collection": collection_ids}} + if query is None: + query = {"query": {}} + if "bool" not in query: + query["bool"] = {} + if "filter" not in query["bool"]: + query["bool"]["filter"] = [] + + filters = query["bool"]["filter"] + if index_filter not in filters: + filters.append(index_filter) + return query