Skip to content

Commit 3b1bb7f

Browse files
authored
ref(deletions): Simplify seer deletion tests (#95267)
This simplifies the tests for deletions of hashes from Seer and it also adds a test for #95156.
1 parent 1814b53 commit 3b1bb7f

File tree

8 files changed

+93
-77
lines changed

8 files changed

+93
-77
lines changed

src/sentry/api/helpers/group_index/delete.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from sentry.models.groupinbox import GroupInbox
2020
from sentry.models.project import Project
2121
from sentry.signals import issue_deleted
22-
from sentry.tasks.delete_seer_grouping_records import call_delete_seer_grouping_records_by_hash
22+
from sentry.tasks.delete_seer_grouping_records import may_schedule_task_to_delete_hashes_from_seer
2323
from sentry.utils.audit import create_audit_entry
2424

2525
from . import BULK_MUTATION_LIMIT, SearchFunction
@@ -85,7 +85,7 @@ def delete_group_list(
8585
create_audit_entries(request, project, group_list, delete_type, transaction_id)
8686

8787
# Tell seer to delete grouping records for these groups
88-
call_delete_seer_grouping_records_by_hash(error_ids)
88+
may_schedule_task_to_delete_hashes_from_seer(error_ids)
8989

9090
# Removing GroupHash rows prevents new events from associating to the groups
9191
# we just deleted.

src/sentry/deletions/defaults/group.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from sentry.models.rulefirehistory import RuleFireHistory
1616
from sentry.notifications.models.notificationmessage import NotificationMessage
1717
from sentry.snuba.dataset import Dataset
18-
from sentry.tasks.delete_seer_grouping_records import call_delete_seer_grouping_records_by_hash
18+
from sentry.tasks.delete_seer_grouping_records import may_schedule_task_to_delete_hashes_from_seer
1919
from sentry.utils.snuba import bulk_snuba_queries
2020

2121
from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation
@@ -273,7 +273,7 @@ def delete_bulk(self, instance_list: Sequence[Group]) -> bool:
273273
except InvalidGroupTypeError:
274274
pass
275275
# Tell seer to delete grouping records with these group hashes
276-
call_delete_seer_grouping_records_by_hash(error_group_ids)
276+
may_schedule_task_to_delete_hashes_from_seer(error_group_ids)
277277

278278
self._delete_children(instance_list)
279279

src/sentry/seer/similarity/grouping_records.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def post_bulk_grouping_records(
8585
return {"success": False, "reason": response.reason}
8686

8787

88-
def delete_project_grouping_records(
88+
def call_seer_to_delete_project_grouping_records(
8989
project_id: int,
9090
) -> bool:
9191
try:

src/sentry/tasks/delete_seer_grouping_records.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
from sentry.models.group import Group
77
from sentry.models.grouphash import GroupHash
88
from sentry.seer.similarity.grouping_records import (
9+
call_seer_to_delete_project_grouping_records,
910
call_seer_to_delete_these_hashes,
10-
delete_project_grouping_records,
1111
)
1212
from sentry.seer.similarity.utils import ReferrerOptions, killswitch_enabled
1313
from sentry.silo.base import SiloMode
@@ -69,7 +69,7 @@ def delete_seer_grouping_records_by_hash(
6969
delete_seer_grouping_records_by_hash.apply_async(args=[project_id, chunked_hashes, 0])
7070

7171

72-
def call_delete_seer_grouping_records_by_hash(
72+
def may_schedule_task_to_delete_hashes_from_seer(
7373
group_ids: Sequence[int],
7474
) -> None:
7575
project = None
@@ -124,4 +124,4 @@ def call_seer_delete_project_grouping_records(
124124
return
125125

126126
logger.info("calling seer delete records by project", extra={"project_id": project_id})
127-
delete_project_grouping_records(project_id)
127+
call_seer_to_delete_project_grouping_records(project_id)

src/sentry/tasks/embeddings_grouping/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
BulkCreateGroupingRecordsResponse,
2323
CreateGroupingRecordData,
2424
CreateGroupingRecordsRequest,
25-
delete_project_grouping_records,
25+
call_seer_to_delete_project_grouping_records,
2626
post_bulk_grouping_records,
2727
)
2828
from sentry.seer.similarity.types import (
@@ -770,7 +770,7 @@ def delete_seer_grouping_records(
770770
"backfill_seer_grouping_records.delete_all_seer_records",
771771
extra={"project_id": project_id},
772772
)
773-
delete_project_grouping_records(project_id)
773+
call_seer_to_delete_project_grouping_records(project_id)
774774

775775
for groups in chunked(
776776
RangeQuerySetWrapper(

tests/sentry/tasks/test_backfill_seer_grouping_records.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ def test_multiple_batches(
868868
]
869869
assert mock_utils_logger.info.call_args_list == expected_utils_call_args_list
870870

871-
@patch("sentry.tasks.embeddings_grouping.utils.delete_project_grouping_records")
871+
@patch("sentry.tasks.embeddings_grouping.utils.call_seer_to_delete_project_grouping_records")
872872
def test_only_delete(self, mock_project_delete_grouping_records):
873873
"""
874874
Test that when the only_delete flag is on, seer_similarity is deleted from the metadata
@@ -906,7 +906,7 @@ def test_only_delete(self, mock_project_delete_grouping_records):
906906
for group in groups:
907907
assert group.data["metadata"] == default_metadata
908908

909-
@patch("sentry.tasks.embeddings_grouping.utils.delete_project_grouping_records")
909+
@patch("sentry.tasks.embeddings_grouping.utils.call_seer_to_delete_project_grouping_records")
910910
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
911911
def test_cohort_only_delete(
912912
self, mock_post_bulk_grouping_records, mock_delete_grouping_records
Lines changed: 80 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,52 @@
11
from time import time
22
from unittest.mock import MagicMock, patch
33

4-
from sentry import options
54
from sentry.models.grouphash import GroupHash
65
from sentry.tasks.delete_seer_grouping_records import (
7-
call_delete_seer_grouping_records_by_hash,
86
delete_seer_grouping_records_by_hash,
7+
may_schedule_task_to_delete_hashes_from_seer,
98
)
109
from sentry.testutils.cases import TestCase
1110
from sentry.testutils.pytest.fixtures import django_db_all
1211

1312

1413
@django_db_all
1514
class TestDeleteSeerGroupingRecordsByHash(TestCase):
16-
@patch("sentry.tasks.delete_seer_grouping_records.call_seer_to_delete_these_hashes")
17-
@patch(
18-
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
19-
)
20-
def test_delete_seer_grouping_records_by_hash_batches(
21-
self,
22-
mock_delete_seer_grouping_records_by_hash_apply_async: MagicMock,
23-
mock_call_seer_to_delete_these_hashes: MagicMock,
24-
) -> None:
25-
"""
26-
Test that when delete_seer_grouping_records_by_hash is called with more hashes than the batch size, it spawns
27-
another task with the end index of the previous batch.
28-
"""
29-
batch_size = options.get("embeddings-grouping.seer.delete-record-batch-size") or 100
30-
mock_call_seer_to_delete_these_hashes.return_value = True
31-
project_id, hashes = 1, [str(i) for i in range(batch_size + 1)]
32-
# We call it as a function and will schedule a task for the extra hash
33-
delete_seer_grouping_records_by_hash(project_id, hashes, 0)
34-
assert mock_delete_seer_grouping_records_by_hash_apply_async.call_args[1] == {
35-
# We do not schedule the task with all the hashes, but only the extra ones
36-
"args": [project_id, hashes[batch_size:], 0]
37-
}
38-
39-
@patch(
40-
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
41-
)
42-
def test_call_delete_seer_grouping_records_by_hash_simple(
43-
self, mock_apply_async: MagicMock
44-
) -> None:
45-
"""
46-
Test that call_delete_seer_grouping_records_by_hash correctly collects hashes
47-
and calls the deletion task with the expected parameters.
48-
"""
15+
def setUp(self) -> None:
16+
super().setUp()
17+
# Needed for may_schedule_task_to_delete_hashes_from_seer to allow the task to be scheduled
4918
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
5019

20+
def _setup_groups_and_hashes(self, number_of_groups: int = 5) -> tuple[list[int], list[str]]:
5121
group_ids, expected_hashes = [], []
52-
for i in range(5):
22+
for i in range(number_of_groups):
5323
group = self.create_group(project=self.project)
5424
group_ids.append(group.id)
5525
group_hash = GroupHash.objects.create(
5626
project=self.project, hash=f"{i:032d}", group=group
5727
)
5828
expected_hashes.append(group_hash.hash)
29+
return group_ids, expected_hashes
30+
31+
@patch(
32+
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
33+
)
34+
def test_simple(self, mock_apply_async: MagicMock) -> None:
35+
"""
36+
Test that it correctly collects hashes and schedules a task.
37+
"""
38+
group_ids, expected_hashes = self._setup_groups_and_hashes(number_of_groups=5)
5939

60-
call_delete_seer_grouping_records_by_hash(group_ids)
40+
may_schedule_task_to_delete_hashes_from_seer(group_ids)
6141

6242
# Verify that the task was called with the correct parameters
6343
mock_apply_async.assert_called_once_with(args=[self.project.id, expected_hashes, 0])
6444

65-
def test_call_delete_seer_grouping_records_by_hash_chunked(self) -> None:
45+
def test_chunked(self) -> None:
6646
"""
67-
Test that call_delete_seer_grouping_records_by_hash chunks large numbers of hashes
47+
Test that it chunks large numbers of hashes
6848
into separate tasks with a maximum of batch_size hashes per task.
6949
"""
70-
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
71-
7250
batch_size = 10
7351
with (
7452
patch(
@@ -77,16 +55,9 @@ def test_call_delete_seer_grouping_records_by_hash_chunked(self) -> None:
7755
self.options({"embeddings-grouping.seer.delete-record-batch-size": batch_size}),
7856
):
7957
# Create 15 group hashes to test chunking (10 + 5 with batch size of 10)
80-
group_ids, expected_hashes = [], []
81-
for i in range(batch_size + 5):
82-
group = self.create_group(project=self.project)
83-
group_ids.append(group.id)
84-
group_hash = GroupHash.objects.create(
85-
project=self.project, hash=f"{i:032d}", group=group
86-
)
87-
expected_hashes.append(group_hash.hash)
58+
group_ids, expected_hashes = self._setup_groups_and_hashes(batch_size + 5)
8859

89-
call_delete_seer_grouping_records_by_hash(group_ids)
60+
may_schedule_task_to_delete_hashes_from_seer(group_ids)
9061

9162
# Verify that the task was called 2 times (15 hashes / 10 per chunk = 2 chunks)
9263
assert mock_apply_async.call_count == 2
@@ -108,23 +79,68 @@ def test_call_delete_seer_grouping_records_by_hash_chunked(self) -> None:
10879
@patch(
10980
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
11081
)
111-
def test_call_delete_seer_grouping_records_by_hash_no_hashes(
112-
self, mock_apply_async: MagicMock
113-
) -> None:
114-
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
115-
116-
group_ids = []
117-
for _ in range(5):
118-
group = self.create_group(project=self.project)
119-
group_ids.append(group.id)
120-
call_delete_seer_grouping_records_by_hash(group_ids)
82+
def test_group_without_hashes(self, mock_apply_async: MagicMock) -> None:
83+
group = self.create_group(project=self.project)
84+
may_schedule_task_to_delete_hashes_from_seer([group.id])
12185
mock_apply_async.assert_not_called()
12286

12387
@patch(
12488
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
12589
)
126-
def test_call_delete_seer_grouping_records_by_hash_no_group_ids(
127-
self, mock_apply_async: MagicMock
128-
) -> None:
129-
call_delete_seer_grouping_records_by_hash([])
90+
def test_no_group_ids(self, mock_apply_async: MagicMock) -> None:
91+
"""
92+
Test that when no group ids are provided, the task is not scheduled.
93+
"""
94+
may_schedule_task_to_delete_hashes_from_seer([])
13095
mock_apply_async.assert_not_called()
96+
97+
@patch(
98+
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
99+
)
100+
def test_called_task_with_too_many_hashes(self, mock_apply_async: MagicMock) -> None:
101+
"""This tests the built-in logic of spreading hashes across multiple tasks."""
102+
batch_size = 5
103+
with self.options({"embeddings-grouping.seer.delete-record-batch-size": batch_size}):
104+
# Create 11 group hashes to test chunking (5 + 5 + 1 with batch size of 5)
105+
_, expected_hashes = self._setup_groups_and_hashes(batch_size + batch_size + 1)
106+
# Call function directly rather than scheduling a task
107+
delete_seer_grouping_records_by_hash(self.project.id, expected_hashes, 0)
108+
109+
# Verify the first chunk has batch_size hashes
110+
first_call_args = mock_apply_async.call_args_list[0][1]["args"]
111+
assert len(first_call_args[1]) == batch_size
112+
assert first_call_args[0] == self.project.id
113+
first_chunk = expected_hashes[0:batch_size]
114+
assert first_call_args[1] == first_chunk
115+
assert first_call_args[2] == 0
116+
117+
# Verify the second chunk has batch_size hashes
118+
second_call_args = mock_apply_async.call_args_list[1][1]["args"]
119+
assert len(second_call_args[1]) == batch_size
120+
assert second_call_args[0] == self.project.id
121+
second_chunk = expected_hashes[batch_size : (batch_size * 2)]
122+
assert second_call_args[1] == second_chunk
123+
assert second_call_args[2] == 0
124+
125+
# Verify the third chunk has 1 hash (remainder)
126+
third_call_args = mock_apply_async.call_args_list[2][1]["args"]
127+
assert len(third_call_args[1]) == 1
128+
assert third_call_args[0] == self.project.id
129+
third_chunk = expected_hashes[(batch_size * 2) :]
130+
assert third_call_args[1] == third_chunk
131+
assert third_call_args[2] == 0
132+
133+
# Make sure the hashes add up to the expected hashes
134+
assert first_chunk + second_chunk + third_chunk == expected_hashes
135+
136+
@patch(
137+
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
138+
)
139+
def test_does_not_schedule_task_if_missing_option(self, mock_apply_async: MagicMock) -> None:
140+
"""
141+
Test that when the project option is not set, the task is not scheduled.
142+
"""
143+
self.project.delete_option("sentry:similarity_backfill_completed")
144+
group_ids, _ = self._setup_groups_and_hashes(number_of_groups=5)
145+
may_schedule_task_to_delete_hashes_from_seer(group_ids)
146+
assert mock_apply_async.call_count == 0

tests/snuba/api/endpoints/test_project_group_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1636,7 +1636,7 @@ def test_bulk_delete_performance_issues(self):
16361636
assert response.status_code == 204
16371637
self.assert_groups_are_gone(groups)
16381638

1639-
@patch("sentry.api.helpers.group_index.delete.call_delete_seer_grouping_records_by_hash")
1639+
@patch("sentry.api.helpers.group_index.delete.may_schedule_task_to_delete_hashes_from_seer")
16401640
@patch("sentry.utils.audit.log_service.record_audit_log")
16411641
def test_audit_log_even_if_exception_raised(
16421642
self, mock_record_audit_log: Mock, mock_seer_delete: Mock

0 commit comments

Comments
 (0)