feat(ai): Automatically fetch billing info for LLM models (#94026)

vgrozdanic · web-flow · commit eb9a0fb88a8d · 2025-06-25T08:00:15.000+02:00
Fetches automatically pricing info for LLM models. So far we had that info hardcoded into Sentry codebase, and each update in pricing required manual work. This PR introduces an automatic way of doing it by fetching that info from Open Router public API. Closes: [TET-645: Automate cost/pricing updating](https://linear.app/getsentry/issue/TET-645/automate-costpricing-updating)
diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py
@@ -811,6 +811,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
     "sentry.tasks.process_buffer",
     "sentry.tasks.relay",
     "sentry.tasks.release_registry",
+    "sentry.tasks.ai_agent_monitoring",
     "sentry.tasks.summaries.weekly_reports",
     "sentry.tasks.summaries.daily_summary",
     "sentry.tasks.reprocessing2",
@@ -1323,6 +1324,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
         "task": "sentry.relocation.transfer.find_relocation_transfer_region",
         "schedule": crontab(minute="*/5"),
     },
+    "fetch-ai-model-costs": {
+        "task": "sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
+        # Run every 30 minutes
+        "schedule": crontab(minute="*/30"),
+        "options": {"expires": 25 * 60},  # 25 minutes
+    },
 }
 
 # Assign the configuration keys celery uses based on our silo mode.
@@ -1487,6 +1494,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
     "sentry.tasks.post_process",
     "sentry.tasks.process_buffer",
     "sentry.tasks.relay",
+    "sentry.tasks.ai_agent_monitoring",
     "sentry.tasks.release_registry",
     "sentry.tasks.repository",
     "sentry.tasks.reprocessing2",
@@ -1680,6 +1688,10 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
         "task": "relocation:sentry.relocation.transfer.find_relocation_transfer_region",
         "schedule": task_crontab("*/5", "*", "*", "*", "*"),
     },
+    "fetch-ai-model-costs": {
+        "task": "ai_agent_monitoring:sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
+        "schedule": task_crontab("*/30", "*", "*", "*", "*"),
+    },
     "sync_options_trial": {
         "schedule": timedelta(minutes=5),
         "task": "options:sentry.tasks.options.sync_options",
diff --git a/src/sentry/relay/config/ai_model_costs.py b/src/sentry/relay/config/ai_model_costs.py
@@ -1,4 +1,8 @@
-from typing import TypedDict
+from typing import NotRequired, Required, TypedDict
+
+from sentry.utils.cache import cache
+
+type ModelId = str
 
 
 class AIModelCost(TypedDict):
@@ -7,9 +11,23 @@ class AIModelCost(TypedDict):
     costPer1kTokens: float
 
 
-class AIModelCosts(TypedDict):
-    version: int
-    costs: list[AIModelCost]
+# Cache key for storing AI model costs
+AI_MODEL_COSTS_CACHE_KEY = "ai-model-costs:v2"
+# Cache timeout: 30 days (we re-fetch every 30 minutes, so this provides more than enough overlap)
+AI_MODEL_COSTS_CACHE_TTL = 30 * 24 * 60 * 60
+
+
+class AIModelCostV2(TypedDict):
+    inputPerToken: float
+    outputPerToken: float
+    outputReasoningPerToken: float
+    inputCachedPerToken: float
+
+
+class AIModelCosts(TypedDict, total=False):
+    version: Required[int]
+    costs: NotRequired[list[AIModelCost]]
+    models: NotRequired[dict[ModelId, AIModelCostV2]]
 
 
 _AI_MODEL_COST_DATA = [
@@ -363,4 +381,21 @@ class AIModelCosts(TypedDict):
 
 
 def ai_model_costs_config() -> AIModelCosts:
+    """
+    Get AI model costs configuration.
+
+    This function first tries to get updated costs from cache (fetched from OpenRouter),
+    and falls back to the precomputed costs if cache is empty.
+
+    Returns:
+        AIModelCosts object containing cost information for AI models
+    """
+    # NOTE (vgrozdanic): in the transition period from v1 to v2, we need to
+    # support both versions of the AI model costs config.
+    # Once we've fully migrated to v2, we can remove the v1 config.
+    cached_costs = cache.get(AI_MODEL_COSTS_CACHE_KEY)
+    if cached_costs is not None:
+        return cached_costs
+
+    # Fall back to precomputed costs (v1)
     return _PRECOMPUTED_AI_MODEL_COSTS
diff --git a/src/sentry/tasks/ai_agent_monitoring.py b/src/sentry/tasks/ai_agent_monitoring.py
@@ -0,0 +1,132 @@
+import logging
+from typing import Any
+
+import sentry_sdk
+
+from sentry.http import safe_urlopen
+from sentry.relay.config.ai_model_costs import (
+    AI_MODEL_COSTS_CACHE_KEY,
+    AI_MODEL_COSTS_CACHE_TTL,
+    AIModelCosts,
+    AIModelCostV2,
+    ModelId,
+)
+from sentry.tasks.base import instrumented_task
+from sentry.taskworker.config import TaskworkerConfig
+from sentry.taskworker.namespaces import ai_agent_monitoring_tasks
+from sentry.utils.cache import cache
+
+logger = logging.getLogger(__name__)
+
+
+# OpenRouter API endpoint
+OPENROUTER_MODELS_API_URL = "https://openrouter.ai/api/v1/models"
+
+
+@instrumented_task(
+    name="sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
+    queue="ai_agent_monitoring",
+    default_retry_delay=5,
+    max_retries=3,
+    soft_time_limit=30,  # 30 seconds
+    time_limit=35,  # 35 seconds
+    taskworker_config=TaskworkerConfig(
+        namespace=ai_agent_monitoring_tasks,
+        processing_deadline_duration=35,
+        expires=30,
+    ),
+)
+def fetch_ai_model_costs() -> None:
+    """
+    Fetch AI model costs from OpenRouter API and store them in cache.
+
+    This task fetches model pricing data from OpenRouter and converts it to
+    the AIModelCostV2 format for use by Sentry's LLM cost tracking.
+    """
+
+    # Fetch data from OpenRouter API
+    response = safe_urlopen(
+        OPENROUTER_MODELS_API_URL,
+    )
+    response.raise_for_status()
+
+    # Parse the response
+    data = response.json()
+
+    if not isinstance(data, dict) or "data" not in data:
+        logger.error(
+            "fetch_ai_model_costs.invalid_response_format",
+            extra={"response_keys": list(data.keys()) if isinstance(data, dict) else "not_dict"},
+        )
+        return
+
+    models_data = data["data"]
+    if not isinstance(models_data, list):
+        logger.error(
+            "fetch_ai_model_costs.invalid_models_data_format",
+            extra={"type": type(models_data).__name__},
+        )
+        return
+
+    # Convert to AIModelCostV2 format
+    models_dict: dict[ModelId, AIModelCostV2] = {}
+
+    for model_data in models_data:
+        if not isinstance(model_data, dict):
+            continue
+
+        model_id = model_data.get("id")
+        if not model_id:
+            continue
+
+        # OpenRouter includes provider name in the model ID, e.g. openai/gpt-4o-mini
+        # We need to extract the model name, since our SDKs only send the model name
+        # (e.g. gpt-4o-mini)
+        if "/" in model_id:
+            model_id = model_id.split("/", maxsplit=1)[1]
+
+        pricing = model_data.get("pricing", {})
+
+        # Convert pricing data to AIModelCostV2 format
+        # OpenRouter provides costs as strings, we need to convert to float
+        try:
+            ai_model_cost = AIModelCostV2(
+                inputPerToken=safe_float_conversion(pricing.get("prompt")),
+                outputPerToken=safe_float_conversion(pricing.get("completion")),
+                outputReasoningPerToken=safe_float_conversion(pricing.get("internal_reasoning")),
+                inputCachedPerToken=safe_float_conversion(pricing.get("input_cache_read")),
+            )
+
+            models_dict[model_id] = ai_model_cost
+
+        except (ValueError, TypeError) as e:
+            sentry_sdk.capture_exception(e)
+            continue
+
+    ai_model_costs: AIModelCosts = {"version": 2, "models": models_dict}
+    cache.set(AI_MODEL_COSTS_CACHE_KEY, ai_model_costs, AI_MODEL_COSTS_CACHE_TTL)
+
+
+def safe_float_conversion(value: Any) -> float:
+    """
+    Safely convert a value to float, handling string inputs and None values.
+
+    Args:
+        value: The value to convert (could be string, float, int, or None)
+
+    Returns:
+        The float value, or 0.0 if the value is None or cannot be converted
+    """
+    if value is None:
+        return 0.0
+
+    if isinstance(value, (int, float)):
+        return float(value)
+
+    if isinstance(value, str):
+        try:
+            return float(value)
+        except ValueError:
+            return 0.0
+
+    return 0.0
diff --git a/tests/sentry/tasks/test_ai_agent_monitoring.py b/tests/sentry/tasks/test_ai_agent_monitoring.py