From 506a186e326352a42f87d2af2581abe93fb125da Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Wed, 16 Jul 2025 13:16:02 +0200 Subject: [PATCH 1/7] Added support for searching large amount of indices by moving the indices from the request url to the body of the request when size is larger than 4096 bytes. --- .../stac_fastapi/opensearch/database_logic.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index e4c88d85..d677adf3 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -63,7 +63,7 @@ from stac_fastapi.types.stac import Collection, Item logger = logging.getLogger(__name__) - +ES_MAX_URL_LENGTH = 4096 async def create_index_templates() -> None: """ @@ -546,6 +546,17 @@ async def execute_search( index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH-300: + index_param = ITEM_INDICES + index_filter = {"terms": {"collection": collection_ids}} + if not "bool" in search_body["query"]: + search_body["query"]["bool"] = {} + if not "filter" in search_body["query"]["bool"]: + search_body["query"]["bool"]["filter"] = [index_filter] + filters = search_body["query"]["bool"]["filter"] + if not index_filter in filters: + filters.append(index_filter) + max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) From bd84128c90358e55bbbc50ec41599492b4d88cce Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Wed, 16 Jul 2025 13:44:40 +0200 Subject: [PATCH 2/7] not x in -> x not in --- .../opensearch/stac_fastapi/opensearch/database_logic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index d677adf3..d79b2d63 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -65,6 +65,7 @@ logger = logging.getLogger(__name__) ES_MAX_URL_LENGTH = 4096 + async def create_index_templates() -> None: """ Create index templates for the Collection and Item indices. @@ -546,15 +547,15 @@ async def execute_search( index_param = indices(collection_ids) - if len(index_param) > ES_MAX_URL_LENGTH-300: + if len(index_param) > ES_MAX_URL_LENGTH - 300: index_param = ITEM_INDICES index_filter = {"terms": {"collection": collection_ids}} - if not "bool" in search_body["query"]: + if "bool" not in search_body["query"]: search_body["query"]["bool"] = {} - if not "filter" in search_body["query"]["bool"]: + if "filter" not in search_body["query"]["bool"]: search_body["query"]["bool"]["filter"] = [index_filter] filters = search_body["query"]["bool"]["filter"] - if not index_filter in filters: + if index_filter not in filters: filters.append(index_filter) max_result_window = MAX_LIMIT From ab4f56e64f55519fff2abd7890ddd94aade8a0ba Mon Sep 17 00:00:00 2001 From: Stijn Caerts Date: Thu, 17 Jul 2025 13:40:14 +0200 Subject: [PATCH 3/7] update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 421e8315..780386b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added the ability to set timeout for Opensearch and Elasticsearch clients by setting the environmental variable `ES_TIMEOUT` [#408](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/408) +### Changed + +- Updated collection to index logic to support searching a large amount of indices [#412](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/412) + ## [v6.0.0] - 2025-06-22 ### Added From 06999c9f1ec4251817a77f08dca330d0b02ce5d7 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:26:21 +0200 Subject: [PATCH 4/7] Create add_collections_to_body in sfeos_helpers and added search support for large amount of queries to ElasticSearch database logic. --- .../elasticsearch/database_logic.py | 7 +++++ .../stac_fastapi/opensearch/database_logic.py | 23 ++++++--------- .../sfeos_helpers/database/query.py | 29 +++++++++++++++++++ 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 195950f3..ed3e3080 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -27,6 +27,8 @@ PartialItem, PatchOperation, ) + +from stac_fastapi.opensearch.stac_fastapi.opensearch.database_logic import ES_MAX_URL_LENGTH from stac_fastapi.sfeos_helpers import filter from stac_fastapi.sfeos_helpers.database import ( apply_free_text_filter_shared, @@ -60,6 +62,8 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item +from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body + logger = logging.getLogger(__name__) @@ -520,6 +524,9 @@ async def execute_search( query = search.query.to_dict() if search.query else None index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) max_result_window = MAX_LIMIT diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index d79b2d63..35f70fe3 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -62,8 +62,10 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item +from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ + ES_MAX_URL_LENGTH + logger = logging.getLogger(__name__) -ES_MAX_URL_LENGTH = 4096 async def create_index_templates() -> None: @@ -533,6 +535,12 @@ async def execute_search( """ search_body: Dict[str, Any] = {} query = search.query.to_dict() if search.query else None + + index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) + if query: search_body["query"] = query @@ -545,19 +553,6 @@ async def execute_search( search_body["sort"] = sort if sort else DEFAULT_SORT - index_param = indices(collection_ids) - - if len(index_param) > ES_MAX_URL_LENGTH - 300: - index_param = ITEM_INDICES - index_filter = {"terms": {"collection": collection_ids}} - if "bool" not in search_body["query"]: - search_body["query"]["bool"] = {} - if "filter" not in search_body["query"]["bool"]: - search_body["query"]["bool"]["filter"] = [index_filter] - filters = search_body["query"]["bool"]["filter"] - if index_filter not in filters: - filters.append(index_filter) - max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index dacbb590..81409b65 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -7,6 +7,7 @@ from stac_fastapi.sfeos_helpers.mappings import Geometry +ES_MAX_URL_LENGTH = 4096 def apply_free_text_filter_shared( search: Any, free_text_queries: Optional[List[str]] @@ -83,3 +84,31 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: return {s.field: {"order": s.direction} for s in sortby} else: return None + + +def add_collections_to_body(collection_ids: List[str], query: Optional[Dict[str, Any]]) -> Dict[str, Any]: + """Adds a list of collection ids to the body of a query. + + Args: + collection_ids (List[str]): A list of collections ids. + query (Optional[Dict[str, Any]]): The query to add collections to. If none, create a query that filters + the collection ids. + + Returns: + Dict[str, Any]: A query that contains a filter on the given collection ids. + + Notes: + This function is needed in the execute_search function when the size of the URL path will exceed the maximum of ES. + """ + index_filter = {"terms": {"collection": collection_ids}} + if query is None: + query = {"query": {}} + if "bool" not in query: + query["bool"] = {} + if "filter" not in query["bool"]: + query["bool"]["filter"] = [] + + filters = query["bool"]["filter"] + if index_filter not in filters: + filters.append(index_filter) + return query \ No newline at end of file From be48bd89e7ed9eba70d00156ee9c9e52cf8f0302 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:33:48 +0200 Subject: [PATCH 5/7] Fixed import. --- .../elasticsearch/stac_fastapi/elasticsearch/database_logic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index ed3e3080..98178b50 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -62,7 +62,8 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body +from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ + ES_MAX_URL_LENGTH logger = logging.getLogger(__name__) From 51a7a76c80cf48b934938edd0c07eac1e6823e22 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:36:50 +0200 Subject: [PATCH 6/7] Final import fix. Tests are passing again. --- .../elasticsearch/stac_fastapi/elasticsearch/database_logic.py | 3 +-- .../opensearch/stac_fastapi/opensearch/database_logic.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 98178b50..d4a8c8c4 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -28,7 +28,6 @@ PatchOperation, ) -from stac_fastapi.opensearch.stac_fastapi.opensearch.database_logic import ES_MAX_URL_LENGTH from stac_fastapi.sfeos_helpers import filter from stac_fastapi.sfeos_helpers.database import ( apply_free_text_filter_shared, @@ -62,7 +61,7 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ +from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ ES_MAX_URL_LENGTH logger = logging.getLogger(__name__) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 35f70fe3..f3c925d8 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -62,7 +62,7 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ +from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ ES_MAX_URL_LENGTH logger = logging.getLogger(__name__) From 552b483fa40ec72801403ba72a82d0f1b15cb560 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:51:29 +0200 Subject: [PATCH 7/7] Precommit. --- .../stac_fastapi/elasticsearch/database_logic.py | 8 ++++---- .../opensearch/stac_fastapi/opensearch/database_logic.py | 7 ++++--- .../stac_fastapi/sfeos_helpers/database/query.py | 9 ++++++--- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index d4a8c8c4..16a8a83d 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -27,7 +27,6 @@ PartialItem, PatchOperation, ) - from stac_fastapi.sfeos_helpers import filter from stac_fastapi.sfeos_helpers.database import ( apply_free_text_filter_shared, @@ -44,6 +43,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -61,9 +64,6 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ - ES_MAX_URL_LENGTH - logger = logging.getLogger(__name__) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index f3c925d8..c323b307 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -42,6 +42,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -62,9 +66,6 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ - ES_MAX_URL_LENGTH - logger = logging.getLogger(__name__) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index 81409b65..97df5703 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -9,6 +9,7 @@ ES_MAX_URL_LENGTH = 4096 + def apply_free_text_filter_shared( search: Any, free_text_queries: Optional[List[str]] ) -> Any: @@ -86,8 +87,10 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: return None -def add_collections_to_body(collection_ids: List[str], query: Optional[Dict[str, Any]]) -> Dict[str, Any]: - """Adds a list of collection ids to the body of a query. +def add_collections_to_body( + collection_ids: List[str], query: Optional[Dict[str, Any]] +) -> Dict[str, Any]: + """Add a list of collection ids to the body of a query. Args: collection_ids (List[str]): A list of collections ids. @@ -111,4 +114,4 @@ def add_collections_to_body(collection_ids: List[str], query: Optional[Dict[str, filters = query["bool"]["filter"] if index_filter not in filters: filters.append(index_filter) - return query \ No newline at end of file + return query