Skip to content

Commit eb9a0fb

Browse files
authored
feat(ai): Automatically fetch billing info for LLM models (#94026)
Fetches automatically pricing info for LLM models. So far we had that info hardcoded into Sentry codebase, and each update in pricing required manual work. This PR introduces an automatic way of doing it by fetching that info from Open Router public API. Closes: [TET-645: Automate cost/pricing updating](https://linear.app/getsentry/issue/TET-645/automate-costpricing-updating)
1 parent ea5a84a commit eb9a0fb

File tree

4 files changed

+455
-4
lines changed

4 files changed

+455
-4
lines changed

src/sentry/conf/server.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
811811
"sentry.tasks.process_buffer",
812812
"sentry.tasks.relay",
813813
"sentry.tasks.release_registry",
814+
"sentry.tasks.ai_agent_monitoring",
814815
"sentry.tasks.summaries.weekly_reports",
815816
"sentry.tasks.summaries.daily_summary",
816817
"sentry.tasks.reprocessing2",
@@ -1323,6 +1324,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
13231324
"task": "sentry.relocation.transfer.find_relocation_transfer_region",
13241325
"schedule": crontab(minute="*/5"),
13251326
},
1327+
"fetch-ai-model-costs": {
1328+
"task": "sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
1329+
# Run every 30 minutes
1330+
"schedule": crontab(minute="*/30"),
1331+
"options": {"expires": 25 * 60}, # 25 minutes
1332+
},
13261333
}
13271334

13281335
# Assign the configuration keys celery uses based on our silo mode.
@@ -1487,6 +1494,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
14871494
"sentry.tasks.post_process",
14881495
"sentry.tasks.process_buffer",
14891496
"sentry.tasks.relay",
1497+
"sentry.tasks.ai_agent_monitoring",
14901498
"sentry.tasks.release_registry",
14911499
"sentry.tasks.repository",
14921500
"sentry.tasks.reprocessing2",
@@ -1680,6 +1688,10 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
16801688
"task": "relocation:sentry.relocation.transfer.find_relocation_transfer_region",
16811689
"schedule": task_crontab("*/5", "*", "*", "*", "*"),
16821690
},
1691+
"fetch-ai-model-costs": {
1692+
"task": "ai_agent_monitoring:sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
1693+
"schedule": task_crontab("*/30", "*", "*", "*", "*"),
1694+
},
16831695
"sync_options_trial": {
16841696
"schedule": timedelta(minutes=5),
16851697
"task": "options:sentry.tasks.options.sync_options",

src/sentry/relay/config/ai_model_costs.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
from typing import TypedDict
1+
from typing import NotRequired, Required, TypedDict
2+
3+
from sentry.utils.cache import cache
4+
5+
type ModelId = str
26

37

48
class AIModelCost(TypedDict):
@@ -7,9 +11,23 @@ class AIModelCost(TypedDict):
711
costPer1kTokens: float
812

913

10-
class AIModelCosts(TypedDict):
11-
version: int
12-
costs: list[AIModelCost]
14+
# Cache key for storing AI model costs
15+
AI_MODEL_COSTS_CACHE_KEY = "ai-model-costs:v2"
16+
# Cache timeout: 30 days (we re-fetch every 30 minutes, so this provides more than enough overlap)
17+
AI_MODEL_COSTS_CACHE_TTL = 30 * 24 * 60 * 60
18+
19+
20+
class AIModelCostV2(TypedDict):
21+
inputPerToken: float
22+
outputPerToken: float
23+
outputReasoningPerToken: float
24+
inputCachedPerToken: float
25+
26+
27+
class AIModelCosts(TypedDict, total=False):
28+
version: Required[int]
29+
costs: NotRequired[list[AIModelCost]]
30+
models: NotRequired[dict[ModelId, AIModelCostV2]]
1331

1432

1533
_AI_MODEL_COST_DATA = [
@@ -363,4 +381,21 @@ class AIModelCosts(TypedDict):
363381

364382

365383
def ai_model_costs_config() -> AIModelCosts:
384+
"""
385+
Get AI model costs configuration.
386+
387+
This function first tries to get updated costs from cache (fetched from OpenRouter),
388+
and falls back to the precomputed costs if cache is empty.
389+
390+
Returns:
391+
AIModelCosts object containing cost information for AI models
392+
"""
393+
# NOTE (vgrozdanic): in the transition period from v1 to v2, we need to
394+
# support both versions of the AI model costs config.
395+
# Once we've fully migrated to v2, we can remove the v1 config.
396+
cached_costs = cache.get(AI_MODEL_COSTS_CACHE_KEY)
397+
if cached_costs is not None:
398+
return cached_costs
399+
400+
# Fall back to precomputed costs (v1)
366401
return _PRECOMPUTED_AI_MODEL_COSTS
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import logging
2+
from typing import Any
3+
4+
import sentry_sdk
5+
6+
from sentry.http import safe_urlopen
7+
from sentry.relay.config.ai_model_costs import (
8+
AI_MODEL_COSTS_CACHE_KEY,
9+
AI_MODEL_COSTS_CACHE_TTL,
10+
AIModelCosts,
11+
AIModelCostV2,
12+
ModelId,
13+
)
14+
from sentry.tasks.base import instrumented_task
15+
from sentry.taskworker.config import TaskworkerConfig
16+
from sentry.taskworker.namespaces import ai_agent_monitoring_tasks
17+
from sentry.utils.cache import cache
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
# OpenRouter API endpoint
23+
OPENROUTER_MODELS_API_URL = "https://openrouter.ai/api/v1/models"
24+
25+
26+
@instrumented_task(
27+
name="sentry.tasks.ai_agent_monitoring.fetch_ai_model_costs",
28+
queue="ai_agent_monitoring",
29+
default_retry_delay=5,
30+
max_retries=3,
31+
soft_time_limit=30, # 30 seconds
32+
time_limit=35, # 35 seconds
33+
taskworker_config=TaskworkerConfig(
34+
namespace=ai_agent_monitoring_tasks,
35+
processing_deadline_duration=35,
36+
expires=30,
37+
),
38+
)
39+
def fetch_ai_model_costs() -> None:
40+
"""
41+
Fetch AI model costs from OpenRouter API and store them in cache.
42+
43+
This task fetches model pricing data from OpenRouter and converts it to
44+
the AIModelCostV2 format for use by Sentry's LLM cost tracking.
45+
"""
46+
47+
# Fetch data from OpenRouter API
48+
response = safe_urlopen(
49+
OPENROUTER_MODELS_API_URL,
50+
)
51+
response.raise_for_status()
52+
53+
# Parse the response
54+
data = response.json()
55+
56+
if not isinstance(data, dict) or "data" not in data:
57+
logger.error(
58+
"fetch_ai_model_costs.invalid_response_format",
59+
extra={"response_keys": list(data.keys()) if isinstance(data, dict) else "not_dict"},
60+
)
61+
return
62+
63+
models_data = data["data"]
64+
if not isinstance(models_data, list):
65+
logger.error(
66+
"fetch_ai_model_costs.invalid_models_data_format",
67+
extra={"type": type(models_data).__name__},
68+
)
69+
return
70+
71+
# Convert to AIModelCostV2 format
72+
models_dict: dict[ModelId, AIModelCostV2] = {}
73+
74+
for model_data in models_data:
75+
if not isinstance(model_data, dict):
76+
continue
77+
78+
model_id = model_data.get("id")
79+
if not model_id:
80+
continue
81+
82+
# OpenRouter includes provider name in the model ID, e.g. openai/gpt-4o-mini
83+
# We need to extract the model name, since our SDKs only send the model name
84+
# (e.g. gpt-4o-mini)
85+
if "/" in model_id:
86+
model_id = model_id.split("/", maxsplit=1)[1]
87+
88+
pricing = model_data.get("pricing", {})
89+
90+
# Convert pricing data to AIModelCostV2 format
91+
# OpenRouter provides costs as strings, we need to convert to float
92+
try:
93+
ai_model_cost = AIModelCostV2(
94+
inputPerToken=safe_float_conversion(pricing.get("prompt")),
95+
outputPerToken=safe_float_conversion(pricing.get("completion")),
96+
outputReasoningPerToken=safe_float_conversion(pricing.get("internal_reasoning")),
97+
inputCachedPerToken=safe_float_conversion(pricing.get("input_cache_read")),
98+
)
99+
100+
models_dict[model_id] = ai_model_cost
101+
102+
except (ValueError, TypeError) as e:
103+
sentry_sdk.capture_exception(e)
104+
continue
105+
106+
ai_model_costs: AIModelCosts = {"version": 2, "models": models_dict}
107+
cache.set(AI_MODEL_COSTS_CACHE_KEY, ai_model_costs, AI_MODEL_COSTS_CACHE_TTL)
108+
109+
110+
def safe_float_conversion(value: Any) -> float:
111+
"""
112+
Safely convert a value to float, handling string inputs and None values.
113+
114+
Args:
115+
value: The value to convert (could be string, float, int, or None)
116+
117+
Returns:
118+
The float value, or 0.0 if the value is None or cannot be converted
119+
"""
120+
if value is None:
121+
return 0.0
122+
123+
if isinstance(value, (int, float)):
124+
return float(value)
125+
126+
if isinstance(value, str):
127+
try:
128+
return float(value)
129+
except ValueError:
130+
return 0.0
131+
132+
return 0.0

0 commit comments

Comments
 (0)