From 2f2f20f2bd77c4c51b48b6f25fc5e695f85c3dfe Mon Sep 17 00:00:00 2001
From: shahargl <shaharglazner@gmail.com>
Date: Sat, 28 Jun 2025 18:08:40 +0300
Subject: [PATCH] fix: for https://github.com/keephq/keep/issues/5107

---
 keep/rulesengine/rulesengine.py |  87 +++++++++++++++++--
 pyproject.toml                  |   2 +-
 tests/test_alert_evaluation.py  | 143 ++++++++++++++++++++++++++++++++
 3 files changed, 225 insertions(+), 7 deletions(-)

diff --git a/keep/rulesengine/rulesengine.py b/keep/rulesengine/rulesengine.py
index e13623b0af..c428748719 100644
--- a/keep/rulesengine/rulesengine.py
+++ b/keep/rulesengine/rulesengine.py
@@ -443,6 +443,9 @@ def _sanitize_dict(d):
         return sanitized
 
     def _check_if_rule_apply(self, rule: Rule, event: AlertDto) -> List[str]:
+        """
+        Evaluates if a rule applies to an event using CEL. Handles type coercion for ==/!= between int and str.
+        """
         sub_rules = self._extract_subrules(rule.definition_cel)
         payload = event.dict()
         # workaround since source is a list
@@ -474,12 +477,77 @@ def _check_if_rule_apply(self, rule: Rule, event: AlertDto) -> List[str]:
                 if "no such member" in str(e):
                     continue
                 # unknown
+                # --- Fix for https://github.com/keephq/keep/issues/5107 ---
+                if "no such overload" in str(e) or "found no matching overload" in str(
+                    e
+                ):
+                    try:
+                        coerced = self._coerce_eq_type_error(
+                            sub_rule, prgm, activation, event
+                        )
+                        if coerced:
+                            sub_rules_matched.append(sub_rule)
+                            continue
+                    except Exception:
+                        pass
                 raise
             if r:
                 sub_rules_matched.append(sub_rule)
         # no subrules matched
         return sub_rules_matched
 
+    def _coerce_eq_type_error(self, cel, prgm, activation, alert):
+        """
+        Helper for type coercion fallback for ==/!= between int and str in CEL.
+        Fixes https://github.com/keephq/keep/issues/5107
+        """
+        import re
+
+        m = re.match(r"([a-zA-Z0-9_\.]+)\s*([!=]=)\s*(.+)", cel)
+        if not m:
+            return False
+        left, op, right = m.groups()
+        left = left.strip()
+        right = (
+            right.strip().strip('"')
+            if right.strip().startswith('"') and right.strip().endswith('"')
+            else right.strip()
+        )
+        try:
+
+            def get_nested(d, path):
+                for part in path.split("."):
+                    if isinstance(d, dict):
+                        d = d.get(part)
+                    else:
+                        return None
+                return d
+
+            left_val = get_nested(activation, left)
+            try:
+                right_val = int(right)
+            except Exception:
+                try:
+                    right_val = float(right)
+                except Exception:
+                    right_val = right
+            # If one is str and the other is int/float, compare as str
+            if (isinstance(left_val, (int, float)) and isinstance(right_val, str)) or (
+                isinstance(left_val, str) and isinstance(right_val, (int, float))
+            ):
+                if op == "==":
+                    return str(left_val) == str(right_val)
+                else:
+                    return str(left_val) != str(right_val)
+            # Also handle both as str for robustness
+            if op == "==":
+                return str(left_val) == str(right_val)
+            else:
+                return str(left_val) != str(right_val)
+        except Exception:
+            pass
+        return False
+
     def _calc_rule_fingerprint(self, event: AlertDto, rule: Rule) -> list[list[str]]:
         # extract all the grouping criteria from the event
         # e.g. if the grouping criteria is ["event.labels.queue", "event.labels.cluster"]
@@ -639,12 +707,19 @@ def filter_alerts(
                 if "no such member" in str(e):
                     continue
                 # unknown
-                elif "no such overload" in str(e):
-                    logger.debug(
-                        f"Type mismtach between operator and operand in the CEL expression {cel} for alert {alert.id}"
-                    )
-                    continue
-                elif "found no matching overload" in str(e):
+                elif "no such overload" in str(
+                    e
+                ) or "found no matching overload" in str(e):
+                    # Try type coercion for == and !=
+                    try:
+                        coerced = self._coerce_eq_type_error(
+                            cel, prgm, activation, alert
+                        )
+                        if coerced:
+                            filtered_alerts.append(alert)
+                            continue
+                    except Exception:
+                        pass
                     logger.debug(
                         f"Type mismtach between operator and operand in the CEL expression {cel} for alert {alert.id}"
                     )
diff --git a/pyproject.toml b/pyproject.toml
index 87077da0d2..7eeb726254 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "keep"
-version = "0.45.8"
+version = "0.45.9"
 description = "Alerting. for developers, by developers."
 authors = ["Keep Alerting LTD"]
 packages = [{include = "keep"}]
diff --git a/tests/test_alert_evaluation.py b/tests/test_alert_evaluation.py
index 766a8ed1ac..81b9ab94c4 100644
--- a/tests/test_alert_evaluation.py
+++ b/tests/test_alert_evaluation.py
@@ -931,3 +931,146 @@ def test_state_alerts_flapping(db_session):
                 assert alert.status == AlertStatus.PENDING
             else:
                 assert alert.status == AlertStatus.FIRING
+
+
+def test_cel_equality_int_str_type_coercion(db_session):
+    """
+    Reproduce the bug: CEL 'field == "2"' should match payload {"field": 2} and vice versa.
+    """
+    from keep.api.models.alert import AlertDto
+    from keep.rulesengine.rulesengine import RulesEngine
+
+    # Case 1: field is int, CEL checks for string
+    alert1 = AlertDto(id="a1", name="test", field=2, fingerprint="fp1")
+    cel1 = 'field == "2"'
+    engine = RulesEngine()
+    result1 = engine.filter_alerts([alert1], cel1)
+    print(f"Case 1 result: {result1}")
+    assert len(result1) == 1, "CEL 'field == \"2\"' should match payload {field: 2}"
+
+    # Case 2: field is str, CEL checks for int
+    alert2 = AlertDto(id="a2", name="test", field="2", fingerprint="fp2")
+    cel2 = "field == 2"
+    result2 = engine.filter_alerts([alert2], cel2)
+    print(f"Case 2 result: {result2}")
+    assert len(result2) == 1, "CEL 'field == 2' should match payload {field: '2'}"
+
+    # Case 3: field is int, CEL checks for int (should match)
+    alert3 = AlertDto(id="a3", name="test", field=2, fingerprint="fp3")
+    cel3 = "field == 2"
+    result3 = engine.filter_alerts([alert3], cel3)
+    assert len(result3) == 1
+
+    # Case 4: field is str, CEL checks for str (should match)
+    alert4 = AlertDto(id="a4", name="test", field="2", fingerprint="fp4")
+    cel4 = 'field == "2"'
+    result4 = engine.filter_alerts([alert4], cel4)
+    assert len(result4) == 1
+
+
+def test_check_if_rule_apply_int_str_type_coercion(db_session):
+    """
+    Test that _check_if_rule_apply handles type coercion between int and str in CEL expressions.
+    This reproduces the same bug as test_cel_equality_int_str_type_coercion but for rule evaluation.
+    """
+    from datetime import datetime
+
+    from keep.api.core.dependencies import SINGLE_TENANT_UUID
+    from keep.api.models.alert import AlertDto
+    from keep.api.models.db.rule import Rule
+    from keep.rulesengine.rulesengine import RulesEngine
+
+    # Create a test rule with CEL expression that checks for string equality with int payload
+    rule = Rule(
+        id="test-rule-1",
+        tenant_id=SINGLE_TENANT_UUID,
+        name="Test Rule - Int Str Coercion",
+        definition_cel='field == "2"',  # CEL checks for string "2"
+        definition={},
+        timeframe=60,
+        timeunit="seconds",
+        created_by="test@keephq.dev",
+        creation_time=datetime.utcnow(),
+        grouping_criteria=[],
+        threshold=1,
+    )
+
+    engine = RulesEngine(tenant_id=SINGLE_TENANT_UUID)
+
+    # Case 1: field is int (2), CEL checks for string ("2") - should match
+    alert1 = AlertDto(id="a1", name="test", field=2, fingerprint="fp1", source=["test"])
+    matched_rules1 = engine._check_if_rule_apply(rule, alert1)
+    print(f"Case 1 - field=2, CEL='field == \"2\"': matched_rules={matched_rules1}")
+    assert (
+        len(matched_rules1) == 1
+    ), "Rule with 'field == \"2\"' should match alert with field=2"
+
+    # Case 2: field is string ("2"), CEL checks for int (2) - should match
+    rule2 = Rule(
+        id="test-rule-2",
+        tenant_id=SINGLE_TENANT_UUID,
+        name="Test Rule - Str Int Coercion",
+        definition_cel="field == 2",  # CEL checks for int 2
+        definition={},
+        timeframe=60,
+        timeunit="seconds",
+        created_by="test@keephq.dev",
+        creation_time=datetime.utcnow(),
+        grouping_criteria=[],
+        threshold=1,
+    )
+
+    alert2 = AlertDto(
+        id="a2", name="test", field="2", fingerprint="fp2", source=["test"]
+    )
+    matched_rules2 = engine._check_if_rule_apply(rule2, alert2)
+    print(f"Case 2 - field='2', CEL='field == 2': matched_rules={matched_rules2}")
+    assert (
+        len(matched_rules2) == 1
+    ), "Rule with 'field == 2' should match alert with field='2'"
+
+    # Case 3: field is int (2), CEL checks for int (2) - should match
+    rule3 = Rule(
+        id="test-rule-3",
+        tenant_id=SINGLE_TENANT_UUID,
+        name="Test Rule - Int Int",
+        definition_cel="field == 2",  # CEL checks for int 2
+        definition={},
+        timeframe=60,
+        timeunit="seconds",
+        created_by="test@keephq.dev",
+        creation_time=datetime.utcnow(),
+        grouping_criteria=[],
+        threshold=1,
+    )
+
+    alert3 = AlertDto(id="a3", name="test", field=2, fingerprint="fp3", source=["test"])
+    matched_rules3 = engine._check_if_rule_apply(rule3, alert3)
+    print(f"Case 3 - field=2, CEL='field == 2': matched_rules={matched_rules3}")
+    assert (
+        len(matched_rules3) == 1
+    ), "Rule with 'field == 2' should match alert with field=2"
+
+    # Case 4: field is string ("2"), CEL checks for string ("2") - should match
+    rule4 = Rule(
+        id="test-rule-4",
+        tenant_id=SINGLE_TENANT_UUID,
+        name="Test Rule - Str Str",
+        definition_cel='field == "2"',  # CEL checks for string "2"
+        definition={},
+        timeframe=60,
+        timeunit="seconds",
+        created_by="test@keephq.dev",
+        creation_time=datetime.utcnow(),
+        grouping_criteria=[],
+        threshold=1,
+    )
+
+    alert4 = AlertDto(
+        id="a4", name="test", field="2", fingerprint="fp4", source=["test"]
+    )
+    matched_rules4 = engine._check_if_rule_apply(rule4, alert4)
+    print(f"Case 4 - field='2', CEL='field == \"2\"': matched_rules={matched_rules4}")
+    assert (
+        len(matched_rules4) == 1
+    ), "Rule with 'field == \"2\"' should match alert with field='2'"