Skip to content

Commit a89bf54

Browse files
author
Grzegorz Pustulka
committed
added tests for inserts, fixed product deletion, improved time functions, fixed bugs in index_insertion_strategies
1 parent 0c2d8e6 commit a89bf54

File tree

8 files changed

+109
-86
lines changed

8 files changed

+109
-86
lines changed

compose.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ services:
2222
- ES_VERIFY_CERTS=false
2323
- BACKEND=elasticsearch
2424
- ENABLE_DATETIME_INDEX_FILTERING=true
25-
- DATETIME_INDEX_MAX_SIZE_GB=0
26-
- DATETIME_INDEX_MAX_SIZE_GB=0.00002
2725
ports:
2826
- "8080:8080"
2927
volumes:
@@ -59,7 +57,6 @@ services:
5957
- BACKEND=opensearch
6058
- STAC_FASTAPI_RATE_LIMIT=200/minute
6159
- ENABLE_DATETIME_INDEX_FILTERING=true
62-
- DATETIME_INDEX_MAX_SIZE_GB=0.00002
6360
ports:
6461
- "8082:8082"
6562
volumes:

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -888,10 +888,16 @@ async def delete_item(self, item_id: str, collection_id: str, **kwargs: Any):
888888

889889
try:
890890
# Perform the delete operation
891-
await self.client.delete(
891+
await self.client.delete_by_query(
892892
index=index_alias_by_collection_id(collection_id),
893-
id=mk_item_id(item_id, collection_id),
894-
refresh=refresh,
893+
body={
894+
"query": {
895+
"term": {
896+
"_id": mk_item_id(item_id, collection_id)
897+
}
898+
}
899+
},
900+
refresh=refresh
895901
)
896902
except ESNotFoundError:
897903
# Raise a custom NotFoundError if the item does not exist

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -873,10 +873,16 @@ async def delete_item(self, item_id: str, collection_id: str, **kwargs: Any):
873873
)
874874

875875
try:
876-
await self.client.delete(
876+
await self.client.delete_by_query(
877877
index=index_alias_by_collection_id(collection_id),
878-
id=mk_item_id(item_id, collection_id),
879-
refresh=refresh,
878+
body={
879+
"query": {
880+
"term": {
881+
"_id": mk_item_id(item_id, collection_id)
882+
}
883+
}
884+
},
885+
refresh=refresh
880886
)
881887
except exceptions.NotFoundError:
882888
raise NotFoundError(

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
"""
3131

3232
# Re-export all functions for backward compatibility
33-
from .datetime import extract_date, extract_date_from_index, return_date
33+
from .datetime import extract_date, return_date, extract_first_date_from_index
3434
from .document import mk_item_id
3535
from .index import (
3636
create_index_templates_shared,
@@ -108,5 +108,5 @@
108108
# Datetime utilities
109109
"return_date",
110110
"extract_date",
111-
"extract_date_from_index",
111+
"extract_first_date_from_index",
112112
]

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,19 @@ def extract_date(date_str: str) -> date:
7676
return datetime_type.fromisoformat(date_str).date()
7777

7878

79-
def extract_date_from_index(index: str) -> date:
80-
"""Extract date from index string containing date pattern.
79+
def extract_first_date_from_index(index_name: str) -> date:
80+
"""Extract the first date from an index name containing date patterns.
8181
82-
Searches for a date pattern (YYYY-MM-DD) within the index string
83-
and returns it as a date object.
82+
Searches for date patterns (YYYY-MM-DD) within the index name string
83+
and returns the first found date as a date object.
8484
8585
Args:
86-
index: Index string containing a date pattern.
86+
index_name: Index name containing date patterns.
8787
8888
Returns:
89-
A date object extracted from the index string.
89+
A date object extracted from the first date pattern found in the index name.
90+
9091
"""
91-
match = re.search(r"\d{4}-\d{2}-\d{2}", index)
92-
return datetime_type.strptime(match.group(0), "%Y-%m-%d").date()
92+
date_pattern = r'\d{4}-\d{2}-\d{2}'
93+
match = re.search(date_pattern, index_name)
94+
return datetime_type.strptime(match.group(0), "%Y-%m-%d").date()

stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/index_insertion_strategies.py

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
from stac_fastapi.core.utilities import get_bool_env
1010
from stac_fastapi.sfeos_helpers.database import (
1111
extract_date,
12-
extract_date_from_index,
1312
index_alias_by_collection_id,
1413
index_by_collection_id,
1514
mk_item_id,
15+
extract_first_date_from_index,
1616
)
1717
from stac_fastapi.sfeos_helpers.mappings import (
1818
_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE,
@@ -54,19 +54,27 @@ def create_datetime_index_sync(
5454
) -> str:
5555
pass
5656

57-
async def update_index_alias(self, client: Any, collection_id: str, end_date: str):
58-
index = index_alias_by_collection_id(collection_id)
59-
await client.indices.put_alias(
60-
index=index, name=self.alias_by_index_and_end_date(index, end_date)
61-
)
62-
63-
def update_index_alias_sync(
64-
self, sync_client: Any, collection_id: str, end_date: str
65-
):
66-
index = index_alias_by_collection_id(collection_id)
67-
sync_client.indices.put_alias(
68-
index=index, name=self.alias_by_index_and_end_date(index, end_date)
69-
)
57+
async def update_index_alias(self, client: Any, end_date: str, old_alias: str):
58+
index = ITEMS_INDEX_PREFIX + old_alias
59+
new_alias = self.alias_by_index_and_end_date(old_alias, end_date)
60+
await client.indices.update_aliases(body={
61+
"actions": [
62+
{"remove": {"index": index, "alias": old_alias}},
63+
{"add": {"index": index, "alias": new_alias}}
64+
]
65+
})
66+
return new_alias
67+
68+
def update_index_alias_sync(self, client: Any, end_date: str, old_alias: str):
69+
index = ITEMS_INDEX_PREFIX + old_alias
70+
new_alias = self.alias_by_index_and_end_date(old_alias, end_date)
71+
client.indices.update_aliases(body={
72+
"actions": [
73+
{"remove": {"index": index, "alias": old_alias}},
74+
{"add": {"index": index, "alias": new_alias}}
75+
]
76+
})
77+
return new_alias
7078

7179
@staticmethod
7280
def index_by_collection_id_and_date(collection_id: str, start_date: str) -> str:
@@ -247,19 +255,29 @@ async def _get_target_index_base(
247255
return target_index
248256

249257
all_indexes.sort()
258+
259+
if (start_date := extract_date(product_datetime)) < (end_date := extract_first_date_from_index(all_indexes[0])):
260+
target_index = await self.search_adapter.create_datetime_index(
261+
self.client, collection_id, str(start_date)
262+
)
263+
alias = await self.search_adapter.update_index_alias(
264+
self.client, str(end_date - timedelta(days=1)), target_index
265+
)
266+
await index_selector.refresh_cache()
267+
return alias
268+
250269
if target_index != all_indexes[-1]:
251270
return target_index
252271

253272
if check_size:
254-
breakpoint()
255273
index_size_gb = await self.get_index_size_in_gb(target_index)
256274
max_size_gb = float(os.getenv("DATETIME_INDEX_MAX_SIZE_GB", 20))
257275

258276
if index_size_gb > max_size_gb:
259277
end_date = extract_date(product_datetime)
260-
if end_date != extract_date_from_index(all_indexes[-1]):
278+
if end_date != extract_first_date_from_index(all_indexes[-1]):
261279
await self.search_adapter.update_index_alias(
262-
self.client, collection_id, str(end_date)
280+
self.client, str(end_date), target_index
263281
)
264282
target_index = await self.search_adapter.create_datetime_index(
265283
self.client, collection_id, (end_date + timedelta(days=1))
@@ -308,12 +326,12 @@ async def prepare_bulk_actions(
308326
max_size_gb = float(os.getenv("DATETIME_INDEX_MAX_SIZE_GB", 20))
309327

310328
if index_size_gb > max_size_gb:
311-
current_index_end_date = extract_date_from_index(first_item_index)
329+
current_index_end_date = extract_first_date_from_index(first_item_index)
312330
first_item_date = extract_date(first_item["properties"]["datetime"])
313331

314332
if first_item_date != current_index_end_date:
315333
await self.search_adapter.update_index_alias(
316-
self.client, collection_id, str(current_index_end_date)
334+
self.client, str(current_index_end_date), latest_index
317335
)
318336
next_day_start = current_index_end_date + timedelta(days=1)
319337
new_index = await self.search_adapter.create_datetime_index(
@@ -374,6 +392,16 @@ def _get_target_index_base(
374392
index_selector.refresh_cache()
375393
return target_index
376394

395+
if (start_date := extract_date(product_datetime)) < (end_date := extract_first_date_from_index(all_indexes[0])):
396+
target_index = self.search_adapter.create_datetime_index_sync(
397+
self.sync_client, collection_id, str(start_date)
398+
)
399+
alias = self.search_adapter.update_index_alias_sync(
400+
self.sync_client, str(end_date - timedelta(days=1)), target_index
401+
)
402+
index_selector.refresh_cache()
403+
return alias
404+
377405
all_indexes.sort()
378406
if target_index != all_indexes[-1]:
379407
return target_index
@@ -384,9 +412,9 @@ def _get_target_index_base(
384412

385413
if index_size_gb > max_size_gb:
386414
end_date = extract_date(product_datetime)
387-
if end_date != extract_date_from_index(all_indexes[-1]):
388-
self.search_adapter.update_index_alias_sync(
389-
self.sync_client, collection_id, str(end_date)
415+
if end_date != extract_first_date_from_index(all_indexes[-1]):
416+
self.search_adapter.update_index_alias(
417+
self.sync_client, str(end_date), target_index
390418
)
391419
target_index = self.search_adapter.create_datetime_index_sync(
392420
self.sync_client, collection_id, (end_date + timedelta(days=1))
@@ -432,12 +460,12 @@ def prepare_bulk_actions(
432460
index_size_gb = self.get_index_size_in_gb(first_item_index)
433461
max_size_gb = float(os.getenv("DATETIME_INDEX_MAX_SIZE_GB", 20))
434462
if index_size_gb > max_size_gb:
435-
current_index_end_date = extract_date_from_index(first_item_index)
463+
current_index_end_date = extract_first_date_from_index(first_item_index)
436464
first_item_date = extract_date(first_item["properties"]["datetime"])
437465

438466
if first_item_date != current_index_end_date:
439467
self.search_adapter.update_index_alias_sync(
440-
self.sync_client, collection_id, str(current_index_end_date)
468+
self.sync_client, str(current_index_end_date), latest_index
441469
)
442470
next_day_start = current_index_end_date + timedelta(days=1)
443471
new_index = self.search_adapter.create_datetime_index_sync(

stac_fastapi/tests/api/test_api.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import uuid
55
from copy import deepcopy
66
from datetime import datetime, timedelta
7+
from unittest.mock import patch
78

89
import pytest
910

@@ -703,7 +704,7 @@ async def test_big_int_eo_search(
703704

704705

705706
@pytest.mark.asyncio
706-
async def test_dodaj_item(app_client, load_test_data, txn_client, ctx):
707+
async def test_create_item_uses_existing_datetime_index(app_client, load_test_data, txn_client, ctx):
707708
if not os.getenv("ENABLE_DATETIME_INDEX_FILTERING"):
708709
pytest.skip()
709710

@@ -717,7 +718,7 @@ async def test_dodaj_item(app_client, load_test_data, txn_client, ctx):
717718

718719

719720
@pytest.mark.asyncio
720-
async def test_dodaj_item_2(app_client, load_test_data, txn_client, ctx):
721+
async def test_create_item_with_different_date_same_index(app_client, load_test_data, txn_client, ctx):
721722
if not os.getenv("ENABLE_DATETIME_INDEX_FILTERING"):
722723
pytest.skip()
723724

@@ -732,15 +733,35 @@ async def test_dodaj_item_2(app_client, load_test_data, txn_client, ctx):
732733

733734

734735
@pytest.mark.asyncio
735-
async def test_dodaj_item_3(app_client, load_test_data, txn_client, ctx):
736+
async def test_create_new_index_when_size_limit_exceeded(app_client, load_test_data, txn_client, ctx):
736737
if not os.getenv("ENABLE_DATETIME_INDEX_FILTERING"):
737738
pytest.skip()
738739

739740
item = load_test_data("test_item.json")
740741
item["id"] = str(uuid.uuid4())
741-
item["properties"]["datetime"] = "202-02-12T12:30:22Z"
742-
created_item = await app_client.post(f"/collections/{item['collection']}/items", json=item)
742+
item["properties"]["datetime"] = "2024-02-12T12:30:22Z"
743+
744+
with patch('stac_fastapi.sfeos_helpers.database.AsyncIndexInserter.get_index_size_in_gb') as mock_get_size:
745+
mock_get_size.return_value = 21.0
746+
created_item = await app_client.post(f"/collections/{item['collection']}/items", json=item)
747+
743748
assert created_item.status_code == 201
744749
indices = await txn_client.database.client.indices.get_alias(index="*")
745-
assert 'items_test-collection_2020-02-12' in indices.keys()
750+
assert 'items_test-collection_2020-02-12' and "items_test-collection_2024-02-13" in indices.keys()
746751
await app_client.delete(f"/collections/{item['collection']}/items/{item['id']}")
752+
753+
754+
@pytest.mark.asyncio
755+
async def test_create_item_in_past_date_creates_separate_index(app_client, load_test_data, txn_client, ctx):
756+
if not os.getenv("ENABLE_DATETIME_INDEX_FILTERING"):
757+
pytest.skip()
758+
759+
item = load_test_data("test_item.json")
760+
item["id"] = str(uuid.uuid4())
761+
item["properties"]["datetime"] = "2012-02-12T12:30:22Z"
762+
created_item = await app_client.post(f"/collections/{item['collection']}/items", json=item)
763+
breakpoint()
764+
assert created_item.status_code == 201
765+
indices = await txn_client.database.client.indices.get_alias(index="*")
766+
assert 'items_test-collection_2012-02-12' and 'items_test-collection_2020-02-12' in indices.keys()
767+
await app_client.delete(f"/collections/{item['collection']}/items/{item['id']}")

stac_fastapi/tests/index/test_index_insertion.py

Lines changed: 0 additions & 37 deletions
This file was deleted.

0 commit comments

Comments
 (0)