1
1
from time import time
2
2
from unittest .mock import MagicMock , patch
3
3
4
- from sentry import options
5
4
from sentry .models .grouphash import GroupHash
6
5
from sentry .tasks .delete_seer_grouping_records import (
7
- call_delete_seer_grouping_records_by_hash ,
8
6
delete_seer_grouping_records_by_hash ,
7
+ may_schedule_task_to_delete_hashes_from_seer ,
9
8
)
10
9
from sentry .testutils .cases import TestCase
11
10
from sentry .testutils .pytest .fixtures import django_db_all
12
11
13
12
14
13
@django_db_all
15
14
class TestDeleteSeerGroupingRecordsByHash (TestCase ):
16
- @patch ("sentry.tasks.delete_seer_grouping_records.call_seer_to_delete_these_hashes" )
17
- @patch (
18
- "sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
19
- )
20
- def test_delete_seer_grouping_records_by_hash_batches (
21
- self ,
22
- mock_delete_seer_grouping_records_by_hash_apply_async : MagicMock ,
23
- mock_call_seer_to_delete_these_hashes : MagicMock ,
24
- ) -> None :
25
- """
26
- Test that when delete_seer_grouping_records_by_hash is called with more hashes than the batch size, it spawns
27
- another task with the end index of the previous batch.
28
- """
29
- batch_size = options .get ("embeddings-grouping.seer.delete-record-batch-size" ) or 100
30
- mock_call_seer_to_delete_these_hashes .return_value = True
31
- project_id , hashes = 1 , [str (i ) for i in range (batch_size + 1 )]
32
- # We call it as a function and will schedule a task for the extra hash
33
- delete_seer_grouping_records_by_hash (project_id , hashes , 0 )
34
- assert mock_delete_seer_grouping_records_by_hash_apply_async .call_args [1 ] == {
35
- # We do not schedule the task with all the hashes, but only the extra ones
36
- "args" : [project_id , hashes [batch_size :], 0 ]
37
- }
38
-
39
- @patch (
40
- "sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
41
- )
42
- def test_call_delete_seer_grouping_records_by_hash_simple (
43
- self , mock_apply_async : MagicMock
44
- ) -> None :
45
- """
46
- Test that call_delete_seer_grouping_records_by_hash correctly collects hashes
47
- and calls the deletion task with the expected parameters.
48
- """
15
+ def setUp (self ) -> None :
16
+ super ().setUp ()
17
+ # Needed for may_schedule_task_to_delete_hashes_from_seer to allow the task to be scheduled
49
18
self .project .update_option ("sentry:similarity_backfill_completed" , int (time ()))
50
19
20
+ def _setup_groups_and_hashes (self , number_of_groups : int = 5 ) -> tuple [list [int ], list [str ]]:
51
21
group_ids , expected_hashes = [], []
52
- for i in range (5 ):
22
+ for i in range (number_of_groups ):
53
23
group = self .create_group (project = self .project )
54
24
group_ids .append (group .id )
55
25
group_hash = GroupHash .objects .create (
56
26
project = self .project , hash = f"{ i :032d} " , group = group
57
27
)
58
28
expected_hashes .append (group_hash .hash )
29
+ return group_ids , expected_hashes
30
+
31
+ @patch (
32
+ "sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
33
+ )
34
+ def test_simple (self , mock_apply_async : MagicMock ) -> None :
35
+ """
36
+ Test that it correctly collects hashes and schedules a task.
37
+ """
38
+ group_ids , expected_hashes = self ._setup_groups_and_hashes (number_of_groups = 5 )
59
39
60
- call_delete_seer_grouping_records_by_hash (group_ids )
40
+ may_schedule_task_to_delete_hashes_from_seer (group_ids )
61
41
62
42
# Verify that the task was called with the correct parameters
63
43
mock_apply_async .assert_called_once_with (args = [self .project .id , expected_hashes , 0 ])
64
44
65
- def test_call_delete_seer_grouping_records_by_hash_chunked (self ) -> None :
45
+ def test_chunked (self ) -> None :
66
46
"""
67
- Test that call_delete_seer_grouping_records_by_hash chunks large numbers of hashes
47
+ Test that it chunks large numbers of hashes
68
48
into separate tasks with a maximum of batch_size hashes per task.
69
49
"""
70
- self .project .update_option ("sentry:similarity_backfill_completed" , int (time ()))
71
-
72
50
batch_size = 10
73
51
with (
74
52
patch (
@@ -77,16 +55,9 @@ def test_call_delete_seer_grouping_records_by_hash_chunked(self) -> None:
77
55
self .options ({"embeddings-grouping.seer.delete-record-batch-size" : batch_size }),
78
56
):
79
57
# Create 15 group hashes to test chunking (10 + 5 with batch size of 10)
80
- group_ids , expected_hashes = [], []
81
- for i in range (batch_size + 5 ):
82
- group = self .create_group (project = self .project )
83
- group_ids .append (group .id )
84
- group_hash = GroupHash .objects .create (
85
- project = self .project , hash = f"{ i :032d} " , group = group
86
- )
87
- expected_hashes .append (group_hash .hash )
58
+ group_ids , expected_hashes = self ._setup_groups_and_hashes (batch_size + 5 )
88
59
89
- call_delete_seer_grouping_records_by_hash (group_ids )
60
+ may_schedule_task_to_delete_hashes_from_seer (group_ids )
90
61
91
62
# Verify that the task was called 2 times (15 hashes / 10 per chunk = 2 chunks)
92
63
assert mock_apply_async .call_count == 2
@@ -108,23 +79,68 @@ def test_call_delete_seer_grouping_records_by_hash_chunked(self) -> None:
108
79
@patch (
109
80
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
110
81
)
111
- def test_call_delete_seer_grouping_records_by_hash_no_hashes (
112
- self , mock_apply_async : MagicMock
113
- ) -> None :
114
- self .project .update_option ("sentry:similarity_backfill_completed" , int (time ()))
115
-
116
- group_ids = []
117
- for _ in range (5 ):
118
- group = self .create_group (project = self .project )
119
- group_ids .append (group .id )
120
- call_delete_seer_grouping_records_by_hash (group_ids )
82
+ def test_group_without_hashes (self , mock_apply_async : MagicMock ) -> None :
83
+ group = self .create_group (project = self .project )
84
+ may_schedule_task_to_delete_hashes_from_seer ([group .id ])
121
85
mock_apply_async .assert_not_called ()
122
86
123
87
@patch (
124
88
"sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
125
89
)
126
- def test_call_delete_seer_grouping_records_by_hash_no_group_ids (
127
- self , mock_apply_async : MagicMock
128
- ) -> None :
129
- call_delete_seer_grouping_records_by_hash ([])
90
+ def test_no_group_ids (self , mock_apply_async : MagicMock ) -> None :
91
+ """
92
+ Test that when no group ids are provided, the task is not scheduled.
93
+ """
94
+ may_schedule_task_to_delete_hashes_from_seer ([])
130
95
mock_apply_async .assert_not_called ()
96
+
97
+ @patch (
98
+ "sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
99
+ )
100
+ def test_called_task_with_too_many_hashes (self , mock_apply_async : MagicMock ) -> None :
101
+ """This tests the built-in logic of spreading hashes across multiple tasks."""
102
+ batch_size = 5
103
+ with self .options ({"embeddings-grouping.seer.delete-record-batch-size" : batch_size }):
104
+ # Create 11 group hashes to test chunking (5 + 5 + 1 with batch size of 5)
105
+ _ , expected_hashes = self ._setup_groups_and_hashes (batch_size + batch_size + 1 )
106
+ # Call function directly rather than scheduling a task
107
+ delete_seer_grouping_records_by_hash (self .project .id , expected_hashes , 0 )
108
+
109
+ # Verify the first chunk has batch_size hashes
110
+ first_call_args = mock_apply_async .call_args_list [0 ][1 ]["args" ]
111
+ assert len (first_call_args [1 ]) == batch_size
112
+ assert first_call_args [0 ] == self .project .id
113
+ first_chunk = expected_hashes [0 :batch_size ]
114
+ assert first_call_args [1 ] == first_chunk
115
+ assert first_call_args [2 ] == 0
116
+
117
+ # Verify the second chunk has batch_size hashes
118
+ second_call_args = mock_apply_async .call_args_list [1 ][1 ]["args" ]
119
+ assert len (second_call_args [1 ]) == batch_size
120
+ assert second_call_args [0 ] == self .project .id
121
+ second_chunk = expected_hashes [batch_size : (batch_size * 2 )]
122
+ assert second_call_args [1 ] == second_chunk
123
+ assert second_call_args [2 ] == 0
124
+
125
+ # Verify the third chunk has 1 hash (remainder)
126
+ third_call_args = mock_apply_async .call_args_list [2 ][1 ]["args" ]
127
+ assert len (third_call_args [1 ]) == 1
128
+ assert third_call_args [0 ] == self .project .id
129
+ third_chunk = expected_hashes [(batch_size * 2 ) :]
130
+ assert third_call_args [1 ] == third_chunk
131
+ assert third_call_args [2 ] == 0
132
+
133
+ # Make sure the hashes add up to the expected hashes
134
+ assert first_chunk + second_chunk + third_chunk == expected_hashes
135
+
136
+ @patch (
137
+ "sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
138
+ )
139
+ def test_does_not_schedule_task_if_missing_option (self , mock_apply_async : MagicMock ) -> None :
140
+ """
141
+ Test that when the project option is not set, the task is not scheduled.
142
+ """
143
+ self .project .delete_option ("sentry:similarity_backfill_completed" )
144
+ group_ids , _ = self ._setup_groups_and_hashes (number_of_groups = 5 )
145
+ may_schedule_task_to_delete_hashes_from_seer (group_ids )
146
+ assert mock_apply_async .call_count == 0
0 commit comments