13
13
from sentry_redis_tools .retrying_cluster import RetryingRedisCluster
14
14
15
15
from sentry import features
16
+ from sentry .api .exceptions import ResourceDoesNotExist
16
17
from sentry .constants import ObjectStatus
17
18
from sentry .incidents .logic import (
18
19
CRITICAL_TRIGGER_LABEL ,
50
51
)
51
52
from sentry .models .project import Project
52
53
from sentry .seer .anomaly_detection .get_anomaly_data import get_anomaly_data_from_seer_legacy
54
+ from sentry .seer .anomaly_detection .get_historical_anomalies import (
55
+ get_anomaly_evaluation_from_workflow_engine ,
56
+ )
53
57
from sentry .seer .anomaly_detection .utils import anomaly_has_confidence , has_anomaly
54
58
from sentry .snuba .dataset import Dataset
55
59
from sentry .snuba .models import QuerySubscription
@@ -253,6 +257,40 @@ def get_aggregation_value(
253
257
254
258
return aggregation_value
255
259
260
+ def handle_trigger_anomalies (
261
+ self ,
262
+ has_anomaly : bool ,
263
+ trigger : AlertRuleTrigger ,
264
+ aggregation_value : float ,
265
+ fired_incident_triggers : list [IncidentTrigger ],
266
+ ) -> list [IncidentTrigger ]:
267
+ trigger_matches_status = self .check_trigger_matches_status (trigger , TriggerStatus .ACTIVE )
268
+
269
+ if has_anomaly and not trigger_matches_status :
270
+ metrics .incr (
271
+ "incidents.alert_rules.threshold.alert" ,
272
+ tags = {"detection_type" : self .alert_rule .detection_type },
273
+ )
274
+ incident_trigger = self .trigger_alert_threshold (trigger , aggregation_value )
275
+ if incident_trigger is not None :
276
+ fired_incident_triggers .append (incident_trigger )
277
+ else :
278
+ self .trigger_alert_counts [trigger .id ] = 0
279
+
280
+ if not has_anomaly and self .active_incident and trigger_matches_status :
281
+ metrics .incr (
282
+ "incidents.alert_rules.threshold.resolve" ,
283
+ tags = {"detection_type" : self .alert_rule .detection_type },
284
+ )
285
+ incident_trigger = self .trigger_resolve_threshold (trigger , aggregation_value )
286
+
287
+ if incident_trigger is not None :
288
+ fired_incident_triggers .append (incident_trigger )
289
+ else :
290
+ self .trigger_resolve_counts [trigger .id ] = 0
291
+
292
+ return fired_incident_triggers
293
+
256
294
def process_update (self , subscription_update : QuerySubscriptionUpdate ) -> None :
257
295
"""
258
296
This is the core processing method utilized when Query Subscription Consumer fetches updates from kafka
@@ -311,12 +349,14 @@ def process_update(self, subscription_update: QuerySubscriptionUpdate) -> None:
311
349
has_metric_alert_processing = features .has (
312
350
"organizations:workflow-engine-metric-alert-processing" , organization
313
351
)
352
+ has_anomaly_detection = features .has (
353
+ "organizations:anomaly-detection-alerts" , organization
354
+ ) and features .has ("organizations:anomaly-detection-rollout" , organization )
355
+
314
356
comparison_delta = None
357
+ detector = None
315
358
316
- if (
317
- has_metric_alert_processing
318
- and not self .alert_rule .detection_type == AlertRuleDetectionType .DYNAMIC
319
- ):
359
+ if has_metric_alert_processing :
320
360
try :
321
361
detector = Detector .objects .get (
322
362
data_sources__source_id = str (self .subscription .id ),
@@ -335,51 +375,51 @@ def process_update(self, subscription_update: QuerySubscriptionUpdate) -> None:
335
375
336
376
if aggregation_value is not None :
337
377
if has_metric_alert_processing :
338
- packet = QuerySubscriptionUpdate (
339
- entity = subscription_update .get ("entity" , "" ),
340
- subscription_id = subscription_update ["subscription_id" ],
341
- values = {"value" : aggregation_value },
342
- timestamp = self .last_update ,
343
- )
378
+ if self .alert_rule .detection_type == AlertRuleDetectionType .DYNAMIC :
379
+ packet = QuerySubscriptionUpdate (
380
+ entity = subscription_update .get ("entity" , "" ),
381
+ subscription_id = subscription_update ["subscription_id" ],
382
+ values = {
383
+ "values" : {
384
+ "value" : aggregation_value ,
385
+ "source_id" : str (self .subscription .id ),
386
+ "subscription_id" : subscription_update ["subscription_id" ],
387
+ "timestamp" : self .last_update ,
388
+ },
389
+ },
390
+ timestamp = self .last_update ,
391
+ )
392
+ else :
393
+ packet = QuerySubscriptionUpdate (
394
+ entity = subscription_update .get ("entity" , "" ),
395
+ subscription_id = subscription_update ["subscription_id" ],
396
+ values = {"value" : aggregation_value },
397
+ timestamp = self .last_update ,
398
+ )
344
399
data_packet = DataPacket [QuerySubscriptionUpdate ](
345
400
source_id = str (self .subscription .id ), packet = packet
346
401
)
347
- # temporarily skip processing any anomaly detection alerts
348
- if self .alert_rule .detection_type != AlertRuleDetectionType .DYNAMIC :
349
- results = process_data_packets (
350
- [data_packet ], DATA_SOURCE_SNUBA_QUERY_SUBSCRIPTION
402
+ results = process_data_packets ([data_packet ], DATA_SOURCE_SNUBA_QUERY_SUBSCRIPTION )
403
+ if features .has (
404
+ "organizations:workflow-engine-metric-alert-dual-processing-logs" ,
405
+ self .alert_rule .organization ,
406
+ ):
407
+ logger .info (
408
+ "dual processing results for alert rule" ,
409
+ extra = {
410
+ "results" : results ,
411
+ "num_results" : len (results ),
412
+ "value" : aggregation_value ,
413
+ "rule_id" : self .alert_rule .id ,
414
+ },
351
415
)
352
- if features .has (
353
- "organizations:workflow-engine-metric-alert-dual-processing-logs" ,
354
- self .alert_rule .organization ,
355
- ):
356
- logger .info (
357
- "dual processing results for alert rule" ,
358
- extra = {
359
- "results" : results ,
360
- "num_results" : len (results ),
361
- "value" : aggregation_value ,
362
- "rule_id" : self .alert_rule .id ,
363
- },
364
- )
365
-
366
- has_anomaly_detection = features .has (
367
- "organizations:anomaly-detection-alerts" , organization
368
- ) and features .has ("organizations:anomaly-detection-rollout" , organization )
369
416
370
417
potential_anomalies = None
371
418
if (
372
419
has_anomaly_detection
373
420
and self .alert_rule .detection_type == AlertRuleDetectionType .DYNAMIC
421
+ and not has_metric_alert_processing
374
422
):
375
- logger .info (
376
- "Raw subscription update" ,
377
- extra = {
378
- "result" : subscription_update ,
379
- "aggregation_value" : aggregation_value ,
380
- "rule_id" : self .alert_rule .id ,
381
- },
382
- )
383
423
with metrics .timer (
384
424
"incidents.subscription_processor.process_update.get_anomaly_data_from_seer_legacy"
385
425
):
@@ -390,28 +430,37 @@ def process_update(self, subscription_update: QuerySubscriptionUpdate) -> None:
390
430
aggregation_value = aggregation_value ,
391
431
)
392
432
if potential_anomalies is None :
393
- logger .info (
394
- "No potential anomalies found" ,
395
- extra = {
396
- "subscription_id" : self .subscription .id ,
397
- "dataset" : self .alert_rule .snuba_query .dataset ,
398
- "organization_id" : self .subscription .project .organization .id ,
399
- "project_id" : self .subscription .project_id ,
400
- "alert_rule_id" : self .alert_rule .id ,
401
- },
402
- )
403
433
return
404
434
405
435
if aggregation_value is None :
406
436
metrics .incr ("incidents.alert_rules.skipping_update_invalid_aggregation_value" )
407
437
return
408
438
409
- fired_incident_triggers = []
439
+ fired_incident_triggers : list [ IncidentTrigger ] = []
410
440
with transaction .atomic (router .db_for_write (AlertRule )):
411
441
# Triggers is the threshold - NOT an instance of a trigger
412
442
metrics_incremented = False
413
443
for trigger in self .triggers :
414
- if potential_anomalies :
444
+ # dual processing of anomaly detection alerts
445
+ if (
446
+ has_anomaly_detection
447
+ and has_metric_alert_processing
448
+ and self .alert_rule .detection_type == AlertRuleDetectionType .DYNAMIC
449
+ ):
450
+ if not detector :
451
+ raise ResourceDoesNotExist ("Detector not found, cannot evaluate anomaly" )
452
+
453
+ is_anomalous = get_anomaly_evaluation_from_workflow_engine (detector , results )
454
+ if is_anomalous is None :
455
+ # we only care about True and False — None indicates no change
456
+ continue
457
+
458
+ assert isinstance (is_anomalous , bool )
459
+ fired_incident_triggers = self .handle_trigger_anomalies (
460
+ is_anomalous , trigger , aggregation_value , fired_incident_triggers
461
+ )
462
+
463
+ elif potential_anomalies :
415
464
# NOTE: There should only be one anomaly in the list
416
465
for potential_anomaly in potential_anomalies :
417
466
# check to see if we have enough data for the dynamic alert rule now
@@ -425,38 +474,10 @@ def process_update(self, subscription_update: QuerySubscriptionUpdate) -> None:
425
474
# we don't need to check if the alert should fire if the alert can't fire yet
426
475
continue
427
476
428
- if has_anomaly (
429
- potential_anomaly , trigger .label
430
- ) and not self .check_trigger_matches_status (trigger , TriggerStatus .ACTIVE ):
431
- metrics .incr (
432
- "incidents.alert_rules.threshold.alert" ,
433
- tags = {"detection_type" : self .alert_rule .detection_type },
434
- )
435
- incident_trigger = self .trigger_alert_threshold (
436
- trigger , aggregation_value
437
- )
438
- if incident_trigger is not None :
439
- fired_incident_triggers .append (incident_trigger )
440
- else :
441
- self .trigger_alert_counts [trigger .id ] = 0
442
-
443
- if (
444
- not has_anomaly (potential_anomaly , trigger .label )
445
- and self .active_incident
446
- and self .check_trigger_matches_status (trigger , TriggerStatus .ACTIVE )
447
- ):
448
- metrics .incr (
449
- "incidents.alert_rules.threshold.resolve" ,
450
- tags = {"detection_type" : self .alert_rule .detection_type },
451
- )
452
- incident_trigger = self .trigger_resolve_threshold (
453
- trigger , aggregation_value
454
- )
455
-
456
- if incident_trigger is not None :
457
- fired_incident_triggers .append (incident_trigger )
458
- else :
459
- self .trigger_resolve_counts [trigger .id ] = 0
477
+ is_anomalous = has_anomaly (potential_anomaly , trigger .label )
478
+ fired_incident_triggers = self .handle_trigger_anomalies (
479
+ is_anomalous , trigger , aggregation_value , fired_incident_triggers
480
+ )
460
481
else :
461
482
# OVER/UNDER value trigger
462
483
alert_operator , resolve_operator = self .THRESHOLD_TYPE_OPERATORS [
0 commit comments