Skip to content

Commit 07f27ea

Browse files
authored
fix(trace): Handle too many errors (#95004)
- When there's over 10k errors and the id we want isn't in that 10k we aren't rendering it. This biasess the error query so that we'll return the requested eventId when requested
1 parent 10d26f3 commit 07f27ea

File tree

2 files changed

+73
-19
lines changed

2 files changed

+73
-19
lines changed

src/sentry/api/endpoints/organization_trace.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import sentry_sdk
77
from django.http import HttpRequest, HttpResponse
8+
from rest_framework.exceptions import ParseError
89
from rest_framework.request import Request
910
from rest_framework.response import Response
1011
from snuba_sdk import Column, Function
@@ -27,9 +28,12 @@
2728
from sentry.snuba.referrer import Referrer
2829
from sentry.snuba.spans_rpc import run_trace_query
2930
from sentry.utils.numbers import base32_encode
31+
from sentry.utils.validators import is_event_id
3032

3133
# 1 worker each for spans, errors, performance issues
3234
_query_thread_pool = ThreadPoolExecutor(max_workers=3)
35+
# Mostly here for testing
36+
ERROR_LIMIT = 10_000
3337

3438

3539
class SerializedEvent(TypedDict):
@@ -204,30 +208,39 @@ def serialize_rpc_event(
204208
else:
205209
return self.serialize_rpc_issue(event, group_cache)
206210

207-
def errors_query(self, snuba_params: SnubaParams, trace_id: str) -> DiscoverQueryBuilder:
211+
def errors_query(
212+
self, snuba_params: SnubaParams, trace_id: str, error_id: str | None
213+
) -> DiscoverQueryBuilder:
208214
"""Run an error query, getting all the errors for a given trace id"""
209215
# TODO: replace this with EAP calls, this query is copied from the old trace view
216+
columns = [
217+
"id",
218+
"project.name",
219+
"project.id",
220+
"timestamp",
221+
"timestamp_ms",
222+
"trace.span",
223+
"transaction",
224+
"issue",
225+
"title",
226+
"message",
227+
"tags[level]",
228+
]
229+
orderby = ["id"]
230+
# If there's an error_id included in the request, bias the orderby to try to return that error_id over others so
231+
# that we can render it in the trace view, even if we hit the error_limit
232+
if error_id is not None:
233+
columns.append(f'to_other(id, "{error_id}", 0, 1) AS target')
234+
orderby.insert(0, "-target")
210235
return DiscoverQueryBuilder(
211236
Dataset.Events,
212237
params={},
213238
snuba_params=snuba_params,
214239
query=f"trace:{trace_id}",
215-
selected_columns=[
216-
"id",
217-
"project.name",
218-
"project.id",
219-
"timestamp",
220-
"timestamp_ms",
221-
"trace.span",
222-
"transaction",
223-
"issue",
224-
"title",
225-
"message",
226-
"tags[level]",
227-
],
240+
selected_columns=columns,
228241
# Don't add timestamp to this orderby as snuba will have to split the time range up and make multiple queries
229-
orderby=["id"],
230-
limit=10_000,
242+
orderby=orderby,
243+
limit=ERROR_LIMIT,
231244
config=QueryBuilderConfig(
232245
auto_fields=True,
233246
),
@@ -292,7 +305,9 @@ def run_perf_issues_query(self, occurrence_query: DiscoverQueryBuilder):
292305
return result
293306

294307
@sentry_sdk.tracing.trace
295-
def query_trace_data(self, snuba_params: SnubaParams, trace_id: str) -> list[SerializedEvent]:
308+
def query_trace_data(
309+
self, snuba_params: SnubaParams, trace_id: str, error_id: str | None = None
310+
) -> list[SerializedEvent]:
296311
"""Queries span/error data for a given trace"""
297312
# This is a hack, long term EAP will store both errors and performance_issues eventually but is not ready
298313
# currently. But we want to move performance data off the old tables immediately. To keep the code simpler I'm
@@ -302,7 +317,7 @@ def query_trace_data(self, snuba_params: SnubaParams, trace_id: str) -> list[Ser
302317
# the thread pool, database connections can hang around as the threads are not cleaned
303318
# up. Because of that, tests can fail during tear down as there are active connections
304319
# to the database preventing a DROP.
305-
errors_query = self.errors_query(snuba_params, trace_id)
320+
errors_query = self.errors_query(snuba_params, trace_id, error_id)
306321
occurrence_query = self.perf_issues_query(snuba_params, trace_id)
307322

308323
spans_future = _query_thread_pool.submit(
@@ -380,10 +395,14 @@ def get(self, request: Request, organization: Organization, trace_id: str) -> Ht
380395

381396
update_snuba_params_with_timestamp(request, snuba_params)
382397

398+
error_id = request.GET.get("errorId")
399+
if error_id is not None and not is_event_id(error_id):
400+
raise ParseError(f"eventId: {error_id} needs to be a valid uuid")
401+
383402
def data_fn(offset: int, limit: int) -> list[SerializedEvent]:
384403
"""offset and limit don't mean anything on this endpoint currently"""
385404
with handle_query_errors():
386-
spans = self.query_trace_data(snuba_params, trace_id)
405+
spans = self.query_trace_data(snuba_params, trace_id, error_id)
387406
return spans
388407

389408
return self.paginate(

tests/snuba/api/endpoints/test_organization_trace.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from unittest import mock
23
from uuid import uuid4
34

45
from django.urls import reverse
@@ -232,3 +233,37 @@ def test_with_only_errors(self):
232233
data = response.data
233234
assert len(data) == 1
234235
assert data[0]["event_id"] == error.event_id
236+
237+
def test_with_target_error(self):
238+
start, _ = self.get_start_end_from_day_ago(1000)
239+
error_data = load_data(
240+
"javascript",
241+
timestamp=start,
242+
)
243+
error_data["contexts"]["trace"] = {
244+
"type": "trace",
245+
"trace_id": self.trace_id,
246+
"span_id": "a" * 16,
247+
}
248+
error_data["tags"] = [["transaction", "/transaction/gen1-0"]]
249+
error = self.store_event(error_data, project_id=self.project.id)
250+
for _ in range(5):
251+
self.store_event(error_data, project_id=self.project.id)
252+
253+
with mock.patch("sentry.api.endpoints.organization_trace.ERROR_LIMIT", 1):
254+
with self.feature(self.FEATURES):
255+
response = self.client_get(
256+
data={"timestamp": self.day_ago, "errorId": error.event_id},
257+
)
258+
assert response.status_code == 200, response.content
259+
data = response.data
260+
assert len(data) == 1
261+
assert data[0]["event_id"] == error.event_id
262+
263+
def test_with_invalid_error_id(self):
264+
with self.feature(self.FEATURES):
265+
response = self.client_get(
266+
data={"timestamp": self.day_ago, "errorId": ",blah blah,"},
267+
)
268+
269+
assert response.status_code == 400, response.content

0 commit comments

Comments
 (0)