Feature/datadog metrics (#1)

jitapichab · web-flow · commit f6dec88062d4 · 2023-06-08T14:55:02.000+02:00
* metrics initial implementation

Signed-off-by: Jorge Tapicha &lt;jitapichab@gmail.com&gt;
Signed-off-by: jorge tapicha &lt;jorge.tapicha@rappi.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased][]
 
+### Added
+
+* Metrics probe `chaosdatadog.metrics.get_metrics_state`
+
 [Unreleased]: https://github.com/chaostoolkit-incubator/chaostoolkit-datadog/compare/0.1.1...HEAD
 
 ## [0.1.1][]
diff --git a/chaosdatadog/__init__.py b/chaosdatadog/__init__.py
@@ -63,5 +63,6 @@ def load_exported_activities() -> List[DiscoveredActivities]:
     activities = []  # type: ignore
 
     activities.extend(discover_probes("chaosdatadog.slo.probes"))
+    activities.extend(discover_probes("chaosdatadog.metrics.probes"))
 
     return activities
diff --git a/chaosdatadog/metrics/__init__.py b/chaosdatadog/metrics/__init__.py
diff --git a/chaosdatadog/metrics/probes.py b/chaosdatadog/metrics/probes.py
@@ -0,0 +1,92 @@
+from datetime import datetime
+
+from chaoslib.exceptions import ActivityFailed
+from chaoslib.types import Configuration, Secrets
+from datadog_api_client.exceptions import ApiTypeError, NotFoundException
+from datadog_api_client.v1.api.metrics_api import MetricsApi
+from dateutil.relativedelta import relativedelta
+from logzero import logger
+
+from chaosdatadog import get_client
+from chaosdatadog.metrics.utils import (
+    check_comparison_values,
+    extract_metric_name,
+    get_comparison_operator,
+)
+
+__all__ = ["get_metrics_state"]
+
+
+def get_metrics_state(
+    query: str,
+    comparison: str,
+    threshold: float,
+    minutes_before: int,
+    configuration: Configuration = None,
+    secrets: Secrets = None,
+) -> bool:
+    """
+    The next function is to:
+
+    * Query metrics from any time period (timeseries and scalar)
+    * Compare the metrics to some treshold in some time.
+      Ex.(CPU, Memory, Network)
+    * Check is the sum of datapoins is over some value.
+      Ex. (requests, errors, custom metrics)
+
+    you can use a comparison to check if all data points in the query
+    satisfy the steady state condition
+
+    Ex. cumsum(sum:istio.mesh.request.count.total{kube_service:test,
+               response_code:500}.as_count())
+
+    the above query is a cumulative sum of all requests with response
+    code of 500. if you want your request in a window of time
+    you have a deviant hypothesis if you have more than 30 http_500 errors
+    the comparison should be <. so any value below 30 is a steady state.
+
+    the allowed comparison values are [">", "<", ">=", "<=", "=="]
+
+    """
+
+    try:
+        check_comparison_values(comparison)
+    except ValueError as e:
+        raise ActivityFailed(e)
+
+    with get_client(configuration, secrets) as c:
+        api = MetricsApi(c)
+
+        metric_name = extract_metric_name(query)
+
+        try:
+            api.get_metric_metadata(metric_name)
+        except NotFoundException as e:
+            logger.debug(e)
+            raise ActivityFailed("The metric name doesn't exist !")
+        except ApiTypeError as e:
+            logger.debug(e)
+            raise ActivityFailed("The metric name wasn't in datadog format!")
+
+        metrics = api.query_metrics(
+            _from=int(
+                (
+                    datetime.now() + relativedelta(minutes=-minutes_before)
+                ).timestamp()
+            ),
+            to=int(datetime.now().timestamp()),
+            query=query,
+        )
+
+        metrics = metrics.to_dict()
+        series = metrics.get("series", [{}])
+        if not series:
+            point_list = [
+                [datetime.now().timestamp(), 0],
+            ]
+            series = [{"pointlist": point_list}]
+        series = series[0] if len(series) > 0 else {}
+        point_list = series.get("pointlist", [])
+        point_value_list = [subpoints[1] for subpoints in point_list]
+        compare_function = get_comparison_operator(comparison)
+        return all(compare_function(_, threshold) for _ in point_value_list)
diff --git a/chaosdatadog/metrics/utils.py b/chaosdatadog/metrics/utils.py
@@ -0,0 +1,28 @@
+import re
+
+COMPARISON_VALUES = [">", "<", ">=", "<=", "==", "!="]
+
+
+def extract_metric_name(query):
+    pattern = r"(?::|^)([^{}:]+)(?:{|$)"
+    match = re.search(pattern, query)
+    return match[1] if match else None
+
+
+def check_comparison_values(comparison):
+    if comparison not in COMPARISON_VALUES:
+        raise ValueError(
+            "Invalid value. Expected one of: '>', '<', '>=', '<=', '==', '!='"
+        )
+
+
+def get_comparison_operator(comparison):
+    operators = {
+        ">": lambda x, y: x > y,
+        "<": lambda x, y: x < y,
+        ">=": lambda x, y: x >= y,
+        "<=": lambda x, y: x <= y,
+        "==": lambda x, y: x == y,
+        "!=": lambda x, y: x != y,
+    }
+    return operators.get(comparison)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -0,0 +1,43 @@
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+
+from dateutil.relativedelta import relativedelta
+
+from chaosdatadog.metrics.probes import get_metrics_state
+
+
+@patch("datadog_api_client.api_client.rest", autospec=False)
+def test_get_metrics_state(mock_get_client):
+    query = "query"
+    comparison = ">"
+    threshold = 40
+    minutes_before = 1
+
+    with patch("chaosdatadog.metrics.probes.MetricsApi") as MockMetricsApi:
+        api_mock = MagicMock()
+        MockMetricsApi.return_value = api_mock
+
+        point_list = [
+            [datetime.now().timestamp(), 51],
+            [datetime.now().timestamp(), 35],
+            [datetime.now().timestamp(), 20],
+            [datetime.now().timestamp(), 10],
+        ]
+        series = {"pointlist": point_list}
+        api_mock.query_metrics.return_value.to_dict.return_value = {
+            "series": [series]
+        }
+
+        result = get_metrics_state(query, comparison, threshold, minutes_before)
+
+    assert result is False
+
+    api_mock.query_metrics.assert_called_once_with(
+        _from=int(
+            (
+                datetime.now() + relativedelta(minutes=-minutes_before)
+            ).timestamp()
+        ),
+        to=int(datetime.now().timestamp()),
+        query=query,
+    )