Skip to content

Commit da61979

Browse files
Merge branch 'master' into constantinius/ref/analytics/tet-831
2 parents 7961fcb + 130345e commit da61979

File tree

150 files changed

+643
-810
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

150 files changed

+643
-810
lines changed

src/sentry/api/endpoints/project_details.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,7 @@
3636
from sentry.deletions.models.scheduleddeletion import RegionScheduledDeletion
3737
from sentry.dynamic_sampling import get_supported_biases_ids, get_user_biases
3838
from sentry.dynamic_sampling.types import DynamicSamplingMode
39-
from sentry.dynamic_sampling.utils import (
40-
has_custom_dynamic_sampling,
41-
has_dynamic_sampling,
42-
has_dynamic_sampling_minimum_sample_rate,
43-
)
39+
from sentry.dynamic_sampling.utils import has_custom_dynamic_sampling, has_dynamic_sampling
4440
from sentry.grouping.enhancer import Enhancements
4541
from sentry.grouping.enhancer.exceptions import InvalidEnhancerConfig
4642
from sentry.grouping.fingerprinting import FingerprintingRules, InvalidFingerprintingConfig
@@ -131,7 +127,6 @@ class ProjectMemberSerializer(serializers.Serializer):
131127
"copy_from_project",
132128
"targetSampleRate",
133129
"dynamicSamplingBiases",
134-
"dynamicSamplingMinimumSampleRate",
135130
"tempestFetchScreenshots",
136131
"tempestFetchDumps",
137132
"autofixAutomationTuning",
@@ -225,7 +220,6 @@ class ProjectAdminSerializer(ProjectMemberSerializer):
225220
copy_from_project = serializers.IntegerField(required=False)
226221
targetSampleRate = serializers.FloatField(required=False, min_value=0, max_value=1)
227222
dynamicSamplingBiases = DynamicSamplingBiasSerializer(required=False, many=True)
228-
dynamicSamplingMinimumSampleRate = serializers.BooleanField(required=False)
229223
tempestFetchScreenshots = serializers.BooleanField(required=False)
230224
tempestFetchDumps = serializers.BooleanField(required=False)
231225
autofixAutomationTuning = serializers.ChoiceField(
@@ -434,15 +428,6 @@ def validate_targetSampleRate(self, value):
434428

435429
return value
436430

437-
def validate_dynamicSamplingMinimumSampleRate(self, value):
438-
organization = self.context["project"].organization
439-
actor = self.context["request"].user
440-
if not has_dynamic_sampling_minimum_sample_rate(organization, actor=actor):
441-
raise serializers.ValidationError(
442-
"Organization does not have the dynamic sampling minimum sample rate feature enabled."
443-
)
444-
return value
445-
446431
def validate_tempestFetchScreenshots(self, value):
447432
organization = self.context["project"].organization
448433
actor = self.context["request"].user
@@ -773,14 +758,6 @@ def put(self, request: Request, project) -> Response:
773758
changed_proj_settings["sentry:dynamic_sampling_biases"] = result[
774759
"dynamicSamplingBiases"
775760
]
776-
if result.get("dynamicSamplingMinimumSampleRate") is not None:
777-
if project.update_option(
778-
"sentry:dynamic_sampling_minimum_sample_rate",
779-
result["dynamicSamplingMinimumSampleRate"],
780-
):
781-
changed_proj_settings["sentry:dynamic_sampling_minimum_sample_rate"] = result[
782-
"dynamicSamplingMinimumSampleRate"
783-
]
784761

785762
if result.get("autofixAutomationTuning") is not None:
786763
if project.update_option(

src/sentry/api/serializers/models/project.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from sentry.dynamic_sampling.utils import (
2626
has_custom_dynamic_sampling,
2727
has_dynamic_sampling,
28-
has_dynamic_sampling_minimum_sample_rate,
2928
is_project_mode_sampling,
3029
)
3130
from sentry.eventstore.models import DEFAULT_SUBJECT_TEMPLATE
@@ -950,7 +949,6 @@ class DetailedProjectResponse(ProjectWithTeamResponseDict):
950949
relayPiiConfig: str | None
951950
builtinSymbolSources: list[str]
952951
dynamicSamplingBiases: list[dict[str, str | bool]]
953-
dynamicSamplingMinimumSampleRate: bool
954952
eventProcessing: dict[str, bool]
955953
symbolSources: str
956954
isDynamicallySampled: bool
@@ -1101,9 +1099,6 @@ def serialize(
11011099
"dynamicSamplingBiases": self.get_value_with_default(
11021100
attrs, "sentry:dynamic_sampling_biases"
11031101
),
1104-
"dynamicSamplingMinimumSampleRate": self.get_value_with_default(
1105-
attrs, "sentry:dynamic_sampling_minimum_sample_rate"
1106-
),
11071102
"eventProcessing": {
11081103
"symbolicationDegraded": False,
11091104
},
@@ -1123,11 +1118,6 @@ def serialize(
11231118
)
11241119
data["tempestFetchDumps"] = attrs["options"].get("sentry:tempest_fetch_dumps", False)
11251120

1126-
if has_dynamic_sampling_minimum_sample_rate(obj.organization, user):
1127-
data["dynamicSamplingMinimumSampleRate"] = bool(
1128-
obj.get_option("sentry:dynamic_sampling_minimum_sample_rate")
1129-
)
1130-
11311121
return data
11321122

11331123
def format_options(self, attrs: Mapping[str, Any]) -> dict[str, Any]:

src/sentry/apidocs/examples/project_examples.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@
152152
"filters:releases": "",
153153
"filters:error_messages": "",
154154
"feedback:branding": True,
155-
"sentry:dynamic_sampling_minimum_sample_rate": True,
156155
},
157156
"digestsMinDelay": 180,
158157
"digestsMaxDelay": 600,
@@ -260,7 +259,6 @@
260259
{"id": "boostReplayId", "active": True},
261260
{"id": "recalibrationRule", "active": True},
262261
],
263-
"dynamicSamplingMinimumSampleRate": True,
264262
"eventProcessing": {"symbolicationDegraded": False},
265263
"symbolSources": "[]",
266264
"tempestFetchScreenshots": False,

src/sentry/dynamic_sampling/utils.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,6 @@ def has_custom_dynamic_sampling(
2525
)
2626

2727

28-
def has_dynamic_sampling_minimum_sample_rate(
29-
organization: Organization | None, actor: User | RpcUser | AnonymousUser | None = None
30-
) -> bool:
31-
return (
32-
organization is not None
33-
and features.has(
34-
"organizations:dynamic-sampling-minimum-sample-rate", organization, actor=actor
35-
)
36-
and has_custom_dynamic_sampling(organization, actor=actor)
37-
)
38-
39-
4028
def is_project_mode_sampling(organization: Organization | None) -> bool:
4129
return (
4230
organization is not None

src/sentry/models/options/project_option.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
"sentry:relay_pii_config",
6060
"sentry:dynamic_sampling",
6161
"sentry:dynamic_sampling_biases",
62-
"sentry:dynamic_sampling_minimum_sample_rate",
6362
"sentry:target_sample_rate",
6463
"sentry:tempest_fetch_screenshots",
6564
"sentry:tempest_fetch_dumps",

src/sentry/projectoptions/defaults.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,6 @@
193193
# Dynamic sampling rate in project-level "manual" configuration mode
194194
register(key="sentry:target_sample_rate", default=TARGET_SAMPLE_RATE_DEFAULT)
195195

196-
# Dynamic sampling minimum sample rate
197-
register(key="sentry:dynamic_sampling_minimum_sample_rate", default=False)
198-
199196
# Should tempest fetch screenshots for this project
200197
register(key="sentry:tempest_fetch_screenshots", default=False)
201198

src/sentry/seer/similarity/grouping_records.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
logger = logging.getLogger(__name__)
2222

2323
POST_BULK_GROUPING_RECORDS_TIMEOUT = 10000
24+
DELETE_HASH_METRIC = "grouping.similarity.delete_records_by_hash"
2425

2526

2627
class CreateGroupingRecordData(TypedDict):
@@ -125,7 +126,7 @@ def delete_project_grouping_records(
125126
return False
126127

127128

128-
def delete_grouping_records_by_hash(project_id: int, hashes: Sequence[str]) -> bool:
129+
def call_seer_to_delete_these_hashes(project_id: int, hashes: Sequence[str]) -> bool:
129130
extra = {"project_id": project_id, "hashes": hashes}
130131
try:
131132
body = {"project_id": project_id, "hash_list": hashes}
@@ -142,6 +143,11 @@ def delete_grouping_records_by_hash(project_id: int, hashes: Sequence[str]) -> b
142143
"seer.delete_grouping_records.hashes.timeout",
143144
extra=extra,
144145
)
146+
metrics.incr(
147+
DELETE_HASH_METRIC,
148+
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
149+
tags={"success": False, "reason": "ReadTimeoutError"},
150+
)
145151
return False
146152

147153
if response.status >= 200 and response.status < 300:
@@ -150,15 +156,15 @@ def delete_grouping_records_by_hash(project_id: int, hashes: Sequence[str]) -> b
150156
extra=extra,
151157
)
152158
metrics.incr(
153-
"grouping.similarity.delete_records_by_hash",
159+
DELETE_HASH_METRIC,
154160
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
155161
tags={"success": True},
156162
)
157163
return True
158164
else:
159165
logger.error("seer.delete_grouping_records.hashes.failure", extra=extra)
160166
metrics.incr(
161-
"grouping.similarity.delete_records_by_hash",
167+
DELETE_HASH_METRIC,
162168
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
163169
tags={"success": False},
164170
)

src/sentry/tasks/activity.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def get_activity_notifiers(project):
2929
queue="activity.notify",
3030
silo_mode=SiloMode.REGION,
3131
taskworker_config=TaskworkerConfig(
32-
namespace=notifications_tasks, processing_deadline_duration=60
32+
namespace=notifications_tasks,
33+
processing_deadline_duration=120,
3334
),
3435
)
3536
def send_activity_notifications(activity_id: int) -> None:

src/sentry/tasks/collect_project_platforms.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def paginate_project_ids(paginate):
3131
silo_mode=SiloMode.REGION,
3232
taskworker_config=TaskworkerConfig(
3333
namespace=issues_tasks,
34+
processing_deadline_duration=30,
3435
),
3536
)
3637
def collect_project_platforms(paginate=1000, **kwargs):

src/sentry/tasks/delete_seer_grouping_records.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,24 @@
66
from sentry.models.group import Group
77
from sentry.models.grouphash import GroupHash
88
from sentry.seer.similarity.grouping_records import (
9-
delete_grouping_records_by_hash,
9+
call_seer_to_delete_these_hashes,
1010
delete_project_grouping_records,
1111
)
1212
from sentry.seer.similarity.utils import ReferrerOptions, killswitch_enabled
1313
from sentry.silo.base import SiloMode
1414
from sentry.tasks.base import instrumented_task
1515
from sentry.taskworker.config import TaskworkerConfig
1616
from sentry.taskworker.namespaces import seer_tasks
17+
from sentry.utils import metrics
1718
from sentry.utils.query import RangeQuerySetWrapper
1819

19-
BATCH_SIZE = 1000
20-
2120
logger = logging.getLogger(__name__)
2221

2322

2423
@instrumented_task(
2524
name="sentry.tasks.delete_seer_grouping_records_by_hash",
2625
queue="delete_seer_grouping_records_by_hash",
27-
max_retries=0,
26+
max_retries=0, # XXX: Why do we not retry?
2827
silo_mode=SiloMode.REGION,
2928
soft_time_limit=60 * 15,
3029
time_limit=60 * (15 + 5),
@@ -42,19 +41,32 @@ def delete_seer_grouping_records_by_hash(
4241
) -> None:
4342
"""
4443
Task to delete seer grouping records by hash list.
45-
Calls the seer delete by hash endpoint with batches of hashes of size `BATCH_SIZE`.
44+
Calls the seer delete by hash endpoint with batches of hashes of size `batch_size`.
4645
"""
4746
if killswitch_enabled(project_id, ReferrerOptions.DELETION) or options.get(
4847
"seer.similarity-embeddings-delete-by-hash-killswitch.enabled"
4948
):
5049
return
5150

52-
batch_size = options.get("embeddings-grouping.seer.delete-record-batch-size")
51+
batch_size = options.get("embeddings-grouping.seer.delete-record-batch-size") or 100
5352
len_hashes = len(hashes)
54-
end_index = min(last_deleted_index + batch_size, len_hashes)
55-
delete_grouping_records_by_hash(project_id, hashes[last_deleted_index:end_index])
56-
if end_index < len_hashes:
57-
delete_seer_grouping_records_by_hash.apply_async(args=[project_id, hashes, end_index])
53+
if len_hashes <= batch_size: # Base case
54+
call_seer_to_delete_these_hashes(project_id, hashes)
55+
else:
56+
if last_deleted_index != 0:
57+
# This tracks which tasks are still being scheduled with the whole list of hashes
58+
metrics.incr(
59+
"grouping.similarity.delete_seer_grouping_records_by_hash.batch_size_exceeded",
60+
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
61+
)
62+
63+
# Iterate through hashes in chunks and schedule a task for each chunk
64+
# There are tasks passing last_deleted_index, thus, we need to start from that index
65+
# Eventually all tasks will pass 0
66+
for i in range(last_deleted_index, len_hashes, batch_size):
67+
# Slice operations are safe and will not raise IndexError
68+
chunked_hashes = hashes[i : i + batch_size]
69+
delete_seer_grouping_records_by_hash.apply_async(args=[project_id, chunked_hashes, 0])
5870

5971

6072
def call_delete_seer_grouping_records_by_hash(
@@ -71,15 +83,16 @@ def call_delete_seer_grouping_records_by_hash(
7183
and not options.get("seer.similarity-embeddings-delete-by-hash-killswitch.enabled")
7284
):
7385
group_hashes = []
86+
batch_size = options.get("embeddings-grouping.seer.delete-record-batch-size") or 100
7487

7588
for group_hash in RangeQuerySetWrapper(
7689
GroupHash.objects.filter(project_id=project.id, group__id__in=group_ids),
77-
step=BATCH_SIZE,
90+
step=batch_size,
7891
):
7992
group_hashes.append(group_hash.hash)
8093

81-
# Schedule task when we reach BATCH_SIZE
82-
if len(group_hashes) >= BATCH_SIZE:
94+
# Schedule task when we reach batch_size
95+
if len(group_hashes) >= batch_size:
8396
delete_seer_grouping_records_by_hash.apply_async(args=[project.id, group_hashes, 0])
8497
group_hashes = []
8598

0 commit comments

Comments
 (0)