Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions keep/api/alert_deduplicator/alert_deduplicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _apply_deduplication_rule(
alert: AlertDto,
rule: DeduplicationRuleDto,
last_alert_fingerprint_to_hash: dict[str, str] | None = None,
) -> bool:
) -> AlertDto:
"""
Apply a deduplication rule to an alert.

Expand All @@ -63,9 +63,10 @@ def _apply_deduplication_rule(
for field in rule.ignore_fields:
alert_copy = self._remove_field(field, alert_copy)

# calculate the hash
# calculate the hash for the alert
# we use the alert dict to get the fields in the correct order
alert_hash = hashlib.sha256(
json.dumps(alert_copy.dict(), default=str).encode()
json.dumps(alert_copy.dict(), default=str, sort_keys=True).encode()
).hexdigest()
alert.alert_hash = alert_hash
# Check if the hash is already in the database.
Expand Down
4 changes: 3 additions & 1 deletion keep/api/routes/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,10 @@ def pull_data_from_providers(
provider_id=provider.id,
provider_type=provider.type,
)


sorted_provider_alerts_by_fingerprint = (
provider_class.get_alerts_by_fingerprint(tenant_id=tenant_id)
provider_class.get_alerts_by_fingerprint_without_enrich(tenant_id=tenant_id)
)
logger.info(
f"Pulling alerts from provider {provider.type} ({provider.id}) completed",
Expand Down
13 changes: 11 additions & 2 deletions keep/api/tasks/process_event_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,15 @@ def process_event(
except Exception:
provider_class = ProvidersFactory.get_provider_class("keep")

if isinstance(event, list):
if isinstance(event, list) and all(isinstance(e, AlertDto) for e in event):
#set fingerprint to alerts pulled from provider _get_alerts() and started from preset.py
event = provider_class.format_alert_fingerprint(
tenant_id=tenant_id,
formatted_alerts=event,
provider_id=provider_id,
provider_type=provider_type
)
elif isinstance(event, list):
event_list = []
for event_item in event:
if not isinstance(event_item, AlertDto):
Expand All @@ -698,7 +706,8 @@ def process_event(
)
)
else:
event_list.append(event_item)
#this is a never happens, but just in case
event_list.append(event_item)
event = event_list
else:
event = provider_class.format_alert(
Expand Down
106 changes: 63 additions & 43 deletions keep/providers/base/base_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,14 +413,61 @@ def _format_alert(
"""
raise NotImplementedError("format_alert() method not implemented")


@classmethod
def format_alert_fingerprint(
cls,
formatted_alerts: list[AlertDto],
tenant_id: str | None,
provider_type: str | None,
provider_id: str | None,
) -> list[AlertDto] | None:
logger = logging.getLogger(__name__)


if formatted_alerts is None:
logger.debug(
"Provider returned None, which means it decided not to format the alert"
)
return None
logger.debug("Alert formatted")
# after the provider calculated the default fingerprint
# check if there is a custom deduplication rule and apply
custom_deduplication_rule = get_custom_deduplication_rule(
tenant_id=tenant_id,
provider_id=provider_id,
provider_type=provider_type,
)

# if there is no custom deduplication rule, return the formatted alert
if not custom_deduplication_rule:
return formatted_alerts

# if there is a custom deduplication rule, apply it
# apply the custom deduplication rule to calculate the fingerprint
for alert in formatted_alerts:
logger.info(
"Applying custom deduplication rule",
extra={
"tenant_id": tenant_id,
"provider_id": provider_id,
"alert_id": alert.id,
},
)
alert.fingerprint = cls.get_alert_fingerprint(
alert, custom_deduplication_rule.fingerprint_fields
)
return formatted_alert


@classmethod
def format_alert(
cls,
event: dict | list[dict],
tenant_id: str | None,
provider_type: str | None,
provider_id: str | None,
) -> AlertDto | list[AlertDto] | None:
) -> list[AlertDto] | None:
logger = logging.getLogger(__name__)

provider_instance: BaseProvider | None = None
Expand Down Expand Up @@ -451,19 +498,6 @@ def format_alert(
)
logger.debug("Formatting alert")
formatted_alert = cls._format_alert(event, provider_instance)
if formatted_alert is None:
logger.debug(
"Provider returned None, which means it decided not to format the alert"
)
return None
logger.debug("Alert formatted")
# after the provider calculated the default fingerprint
# check if there is a custom deduplication rule and apply
custom_deduplication_rule = get_custom_deduplication_rule(
tenant_id=tenant_id,
provider_id=provider_id,
provider_type=provider_type,
)

if not isinstance(formatted_alert, list):
formatted_alert.providerId = provider_id
Expand All @@ -475,39 +509,20 @@ def format_alert(
alert.providerId = provider_id
alert.providerType = provider_type

# if there is no custom deduplication rule, return the formatted alert
if not custom_deduplication_rule:
return formatted_alert
# if there is a custom deduplication rule, apply it
# apply the custom deduplication rule to calculate the fingerprint
for alert in formatted_alert:
logger.info(
"Applying custom deduplication rule",
extra={
"tenant_id": tenant_id,
"provider_id": provider_id,
"alert_id": alert.id,
},
)
alert.fingerprint = cls.get_alert_fingerprint(
alert, custom_deduplication_rule.fingerprint_fields
)
return formatted_alert
return cls.format_alert_fingerprint(
formatted_alert,
tenant_id,
provider_type,
provider_id,
)


@staticmethod
def get_alert_fingerprint(alert: AlertDto, fingerprint_fields: list = []) -> str:
"""
Get the fingerprint of an alert.

Args:
event (AlertDto): The alert to get the fingerprint of.
fingerprint_fields (list, optional): The fields we calculate the fingerprint upon. Defaults to [].

Returns:
str: hexdigest of the fingerprint or the event.name if no fingerprint_fields were given.
"""
logger = logging.getLogger(__name__)
if not fingerprint_fields:
return alert.name

fingerprint = hashlib.sha256()
event_dict = alert.dict()
for fingerprint_field in fingerprint_fields:
Expand Down Expand Up @@ -563,7 +578,7 @@ def get_alerts(self) -> list[AlertDto]:
alert.providerType = self.provider_type
return alerts

def get_alerts_by_fingerprint(self, tenant_id: str) -> dict[str, list[AlertDto]]:
def get_alerts_by_fingerprint_without_enrich(self, tenant_id: str) -> dict[str, list[AlertDto]]:
"""
Get alerts from the provider grouped by fingerprint, sorted by lastReceived.

Expand Down Expand Up @@ -591,7 +606,12 @@ def get_alerts_by_fingerprint(self, tenant_id: str) -> dict[str, list[AlertDto]]
get_attr,
)
}
return grouped_alerts

def get_alerts_by_fingerprint(self, tenant_id: str) -> dict[str, list[AlertDto]]:

grouped_alerts = self.get_alerts_by_fingerprint_without_enrich(tenant_id)

# enrich alerts
with tracer.start_as_current_span(f"{self.__class__.__name__}-enrich_alerts"):
pulled_alerts_enrichments = get_enrichments(
Expand Down
28 changes: 28 additions & 0 deletions tests/deduplication/test_deduplications.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from keep.api.core.dependencies import SINGLE_TENANT_UUID
from keep.api.models.alert import DeduplicationRuleDto, AlertStatus
from keep.api.models.db.alert import AlertDeduplicationRule, AlertDeduplicationEvent, Alert
from keep.api.alert_deduplicator.alert_deduplicator import AlertDeduplicator
from keep.api.utils.enrichment_helpers import convert_db_alerts_to_dto_alerts
from keep.providers.providers_factory import ProvidersFactory
from tests.fixtures.client import client, setup_api_key, test_app # noqa
Expand Down Expand Up @@ -906,3 +907,30 @@ def test_full_deduplication_last_received(db_session, create_alert):
alerts_dto = convert_db_alerts_to_dto_alerts(alerts)

assert alerts_dto[0].lastReceived == dt2.astimezone(pytz.UTC).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"


def test_alert_deduplicator_hash_consistency():
"""
Checks that the alert hash is the same when the order of keys in the dict is different
"""

deduplicator = AlertDeduplicator(tenant_id="test-tenant")
rule = DeduplicationRuleDto(id="dummy-rule", ignore_fields=[])

# First alert: normal order
alert1 = AlertDto(
id= 1,
message= Test,
timestamp="2025-06-30T12:00:00Z"
)
# Second alert: different key order (via dict)
alert2 = AlertDto(
message= Test,
id= 1,
timestamp="2025-06-30T12:00:00Z"
)

result1 = deduplicator._apply_deduplication_rule(alert1, rule, last_alert_fingerprint_to_hash={})
result2 = deduplicator._apply_deduplication_rule(alert2, rule, last_alert_fingerprint_to_hash={})

assert result1.alert_hash == result2.alert_hash, f"Hashes differ: {result1.alert_hash} != {result2.alert_hash}"