Skip to content

Commit 43ce228

Browse files
feat(replay): add feedback to LLM context (#94315)
closes https://linear.app/getsentry/issue/REPLAY-418/add-error-and-user-feedback-context-to-llm adds feedback context to the LLM input for AI breadcrumbs. we do this somewhat differently from how we fetch errors for error context; feedback breadcrumbs are readily available in the rrweb payloads, so we can get the feedback (event) ID from the breadcrumb. from there, we fetch the feedback event and use the info in the event (timestamp, message) to populate the log message.
1 parent 4954bdc commit 43ce228

File tree

4 files changed

+201
-20
lines changed

4 files changed

+201
-20
lines changed

src/sentry/replays/endpoints/project_replay_summarize_breadcrumbs.py

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
logger = logging.getLogger(__name__)
3535

3636

37-
class ErrorEvent(TypedDict):
37+
class GroupEvent(TypedDict):
3838
id: str
3939
title: str
4040
message: str
@@ -97,23 +97,24 @@ def get(self, request: Request, project: Project, replay_id: str) -> Response:
9797
error_events = []
9898
else:
9999
error_events = fetch_error_details(project_id=project.id, error_ids=error_ids)
100-
101100
return self.paginate(
102101
request=request,
103102
paginator_cls=GenericOffsetPaginator,
104103
data_fn=functools.partial(fetch_segments_metadata, project.id, replay_id),
105-
on_results=functools.partial(analyze_recording_segments, error_events, replay_id),
104+
on_results=functools.partial(
105+
analyze_recording_segments, error_events, replay_id, project.id
106+
),
106107
)
107108

108109

109-
def fetch_error_details(project_id: int, error_ids: list[str]) -> list[ErrorEvent]:
110-
"""Fetch error details given error IDs and return a list of ErrorEvent objects."""
110+
def fetch_error_details(project_id: int, error_ids: list[str]) -> list[GroupEvent]:
111+
"""Fetch error details given error IDs and return a list of GroupEvent objects."""
111112
try:
112113
node_ids = [Event.generate_node_id(project_id, event_id=id) for id in error_ids]
113114
events = nodestore.backend.get_multi(node_ids)
114115

115116
return [
116-
ErrorEvent(
117+
GroupEvent(
117118
category="error",
118119
id=event_id,
119120
title=data.get("title", ""),
@@ -128,24 +129,64 @@ def fetch_error_details(project_id: int, error_ids: list[str]) -> list[ErrorEven
128129
return []
129130

130131

131-
def generate_error_log_message(error: ErrorEvent) -> str:
132+
def fetch_feedback_details(feedback_id: str | None, project_id):
133+
"""
134+
Fetch user feedback associated with a specific feedback event ID.
135+
"""
136+
if feedback_id is None:
137+
return None
138+
139+
try:
140+
node_id = Event.generate_node_id(project_id, event_id=feedback_id)
141+
event = nodestore.backend.get(node_id)
142+
143+
return (
144+
GroupEvent(
145+
category="feedback",
146+
id=feedback_id,
147+
title="User Feedback",
148+
timestamp=event.get("timestamp", 0.0) * 1000, # feedback timestamp is in seconds
149+
message=event.get("contexts", {}).get("feedback", {}).get("message", ""),
150+
)
151+
if event
152+
else None
153+
)
154+
155+
except Exception as e:
156+
sentry_sdk.capture_exception(e)
157+
return None
158+
159+
160+
def generate_error_log_message(error: GroupEvent) -> str:
132161
title = error["title"]
133162
message = error["message"]
134163
timestamp = error["timestamp"]
135164

136165
return f"User experienced an error: '{title}: {message}' at {timestamp}"
137166

138167

168+
def generate_feedback_log_message(feedback: GroupEvent) -> str:
169+
title = feedback["title"]
170+
message = feedback["message"]
171+
timestamp = feedback["timestamp"]
172+
173+
return f"User submitted feedback: '{title}: {message}' at {timestamp}"
174+
175+
139176
def get_request_data(
140-
iterator: Iterator[tuple[int, memoryview]], error_events: list[ErrorEvent]
177+
iterator: Iterator[tuple[int, memoryview]],
178+
error_events: list[GroupEvent],
179+
project_id: int,
141180
) -> list[str]:
142181
# Sort error events by timestamp
143182
error_events.sort(key=lambda x: x["timestamp"])
144-
return list(gen_request_data(iterator, error_events))
183+
return list(gen_request_data(iterator, error_events, project_id))
145184

146185

147186
def gen_request_data(
148-
iterator: Iterator[tuple[int, memoryview]], error_events: list[ErrorEvent]
187+
iterator: Iterator[tuple[int, memoryview]],
188+
error_events: list[GroupEvent],
189+
project_id,
149190
) -> Generator[str]:
150191
"""Generate log messages from events and errors in chronological order."""
151192
error_idx = 0
@@ -163,7 +204,14 @@ def gen_request_data(
163204
error_idx += 1
164205

165206
# Yield the current event's log message
166-
if message := as_log_message(event):
207+
event_type = which(event)
208+
if event_type == EventType.FEEDBACK:
209+
feedback_id = event["data"]["payload"].get("data", {}).get("feedback_id", None)
210+
feedback = fetch_feedback_details(feedback_id, project_id)
211+
if feedback:
212+
yield generate_feedback_log_message(feedback)
213+
214+
elif message := as_log_message(event):
167215
yield message
168216

169217
# Yield any remaining error messages
@@ -175,12 +223,15 @@ def gen_request_data(
175223

176224
@sentry_sdk.trace
177225
def analyze_recording_segments(
178-
error_events: list[ErrorEvent],
226+
error_events: list[GroupEvent],
179227
replay_id: str,
228+
project_id: int,
180229
segments: list[RecordingSegmentStorageMeta],
181230
) -> dict[str, Any]:
182231
# Combine breadcrumbs and error details
183-
request_data = json.dumps({"logs": get_request_data(iter_segment_data(segments), error_events)})
232+
request_data = json.dumps(
233+
{"logs": get_request_data(iter_segment_data(segments), error_events, project_id)}
234+
)
184235

185236
# Log when the input string is too large. This is potential for timeout.
186237
if len(request_data) > 100000:
@@ -271,6 +322,8 @@ def as_log_message(event: dict[str, Any]) -> str | None:
271322
return None
272323
case EventType.OPTIONS:
273324
return None
325+
case EventType.FEEDBACK:
326+
return None # the log message is processed before this method is called
274327

275328

276329
def make_seer_request(request_data: str) -> bytes:

src/sentry/replays/usecases/ingest/event_parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ class EventType(Enum):
7676
UNKNOWN = 13
7777
CANVAS = 14
7878
OPTIONS = 15
79+
FEEDBACK = 16
7980

8081

8182
def which(event: dict[str, Any]) -> EventType:
@@ -135,6 +136,8 @@ def which(event: dict[str, Any]) -> EventType:
135136
return EventType.HYDRATION_ERROR
136137
elif category == "replay.mutations":
137138
return EventType.MUTATIONS
139+
elif category == "sentry.feedback":
140+
return EventType.FEEDBACK
138141
else:
139142
return EventType.UNKNOWN
140143
elif event["data"]["tag"] == "performanceSpan":

tests/sentry/replays/test_project_replay_summarize_breadcrumbs.py

Lines changed: 125 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import uuid
22
import zlib
3-
from datetime import datetime, timezone
3+
from datetime import UTC, datetime, timezone
44
from unittest.mock import patch
55

66
import requests
@@ -11,13 +11,14 @@
1111
from sentry import nodestore
1212
from sentry.eventstore.models import Event
1313
from sentry.replays.endpoints.project_replay_summarize_breadcrumbs import (
14-
ErrorEvent,
14+
GroupEvent,
1515
as_log_message,
1616
get_request_data,
1717
)
1818
from sentry.replays.lib.storage import FilestoreBlob, RecordingSegmentStorageMeta
1919
from sentry.replays.testutils import mock_replay
2020
from sentry.testutils.cases import TransactionTestCase
21+
from sentry.testutils.pytest.fixtures import django_db_all
2122
from sentry.testutils.skips import requires_snuba
2223
from sentry.utils import json
2324

@@ -56,6 +57,42 @@ def save_recording_segment(
5657
)
5758
FilestoreBlob().set(metadata, zlib.compress(data) if compressed else data)
5859

60+
def mock_create_feedback_occurrence(self, project_id: int, replay_id: str | None = None):
61+
dt = datetime.now(UTC)
62+
63+
event = {
64+
"project_id": project_id,
65+
"event_id": "56b08cf7852c42cbb95e4a6998c66ad6",
66+
"timestamp": dt.timestamp(),
67+
"received": dt.isoformat(),
68+
"first_seen": dt.isoformat(),
69+
"user": {
70+
"ip_address": "72.164.175.154",
71+
"email": "josh.ferge@sentry.io",
72+
"id": 880461,
73+
"isStaff": False,
74+
"name": "Josh Ferge",
75+
},
76+
"contexts": {
77+
"feedback": {
78+
"contact_email": "josh.ferge@sentry.io",
79+
"name": "Josh Ferge",
80+
"message": "Great website!",
81+
"replay_id": replay_id,
82+
"url": "https://sentry.sentry.io/feedback/?statsPeriod=14d",
83+
},
84+
},
85+
}
86+
87+
self.store_event(
88+
data={
89+
"event_id": event["event_id"],
90+
"timestamp": event["timestamp"],
91+
"contexts": event["contexts"],
92+
},
93+
project_id=self.project.id,
94+
)
95+
5996
@patch("sentry.replays.endpoints.project_replay_summarize_breadcrumbs.make_seer_request")
6097
def test_get(self, make_seer_request):
6198
return_value = json.dumps({"hello": "world"}).encode()
@@ -215,7 +252,7 @@ def test_get_with_error(self, make_seer_request):
215252
assert response.content == return_value
216253

217254
@patch("sentry.replays.endpoints.project_replay_summarize_breadcrumbs.make_seer_request")
218-
def test_get_with_error_context_disabled(self, make_seer_request):
255+
def test_get_with_error_context_disabled_and_enabled(self, make_seer_request):
219256
"""Test handling of breadcrumbs with error context disabled"""
220257
return_value = json.dumps({"error": "An error happened"}).encode()
221258
make_seer_request.return_value = return_value
@@ -261,6 +298,7 @@ def test_get_with_error_context_disabled(self, make_seer_request):
261298
]
262299
self.save_recording_segment(0, json.dumps(data).encode())
263300

301+
# with error context disabled
264302
with self.feature(
265303
{
266304
"organizations:session-replay": True,
@@ -280,8 +318,88 @@ def test_get_with_error_context_disabled(self, make_seer_request):
280318
assert response.get("Content-Type") == "application/json"
281319
assert response.content == return_value
282320

321+
# with error context enabled
322+
with self.feature(
323+
{
324+
"organizations:session-replay": True,
325+
"organizations:replay-ai-summaries": True,
326+
"organizations:gen-ai-features": True,
327+
}
328+
):
329+
response = self.client.get(self.url, {"enable_error_context": "true"})
330+
331+
call_args = json.loads(make_seer_request.call_args[0][0])
332+
assert "logs" in call_args
333+
assert any("ZeroDivisionError" in log for log in call_args["logs"])
334+
assert any("division by zero" in log for log in call_args["logs"])
335+
336+
assert response.status_code == 200
337+
assert response.get("Content-Type") == "application/json"
338+
assert response.content == return_value
339+
340+
@patch("sentry.replays.endpoints.project_replay_summarize_breadcrumbs.make_seer_request")
341+
def test_get_with_feedback(self, make_seer_request):
342+
"""Test handling of breadcrumbs with user feedback"""
343+
return_value = json.dumps({"feedback": "Feedback was submitted"}).encode()
344+
make_seer_request.return_value = return_value
345+
346+
self.mock_create_feedback_occurrence(self.project.id, replay_id=self.replay_id)
347+
348+
now = datetime.now(timezone.utc)
349+
350+
self.store_replays(
351+
mock_replay(
352+
now,
353+
self.project.id,
354+
self.replay_id,
355+
)
356+
)
357+
358+
data = [
359+
{
360+
"type": 5,
361+
"timestamp": float(now.timestamp()),
362+
"data": {
363+
"tag": "breadcrumb",
364+
"payload": {"category": "console", "message": "hello"},
365+
},
366+
},
367+
{
368+
"type": 5,
369+
"timestamp": float(now.timestamp()),
370+
"data": {
371+
"tag": "breadcrumb",
372+
"payload": {
373+
"category": "sentry.feedback",
374+
"data": {"feedback_id": "56b08cf7852c42cbb95e4a6998c66ad6"},
375+
},
376+
},
377+
},
378+
]
379+
self.save_recording_segment(0, json.dumps(data).encode())
380+
381+
with self.feature(
382+
{
383+
"organizations:session-replay": True,
384+
"organizations:replay-ai-summaries": True,
385+
"organizations:gen-ai-features": True,
386+
}
387+
):
388+
response = self.client.get(self.url)
389+
390+
make_seer_request.assert_called_once()
391+
call_args = json.loads(make_seer_request.call_args[0][0])
392+
assert "logs" in call_args
393+
assert any("Great website!" in log for log in call_args["logs"])
394+
assert any("User submitted feedback" in log for log in call_args["logs"])
395+
396+
assert response.status_code == 200
397+
assert response.get("Content-Type") == "application/json"
398+
assert response.content == return_value
399+
283400

284-
def test_get_request_data():
401+
@django_db_all
402+
def test_get_request_data(default_project):
285403
def _faker():
286404
yield 0, memoryview(
287405
json.dumps(
@@ -307,14 +425,14 @@ def _faker():
307425
)
308426

309427
error_events = [
310-
ErrorEvent(
428+
GroupEvent(
311429
category="error",
312430
id="123",
313431
title="ZeroDivisionError",
314432
timestamp=3.0,
315433
message="division by zero",
316434
),
317-
ErrorEvent(
435+
GroupEvent(
318436
category="error",
319437
id="234",
320438
title="BadError",
@@ -323,7 +441,7 @@ def _faker():
323441
),
324442
]
325443

326-
result = get_request_data(_faker(), error_events=error_events)
444+
result = get_request_data(_faker(), error_events=error_events, project_id=default_project.id)
327445
assert result == [
328446
"User experienced an error: 'BadError: something else bad' at 1.0",
329447
"Logged: hello at 1.5",

tests/sentry/replays/unit/test_event_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,13 @@ def test_which():
590590
}
591591
assert which(event) == EventType.CONSOLE
592592

593+
event = {
594+
"type": 5,
595+
"timestamp": 0.0,
596+
"data": {"tag": "breadcrumb", "payload": {"category": "sentry.feedback"}},
597+
}
598+
assert which(event) == EventType.FEEDBACK
599+
593600
event = {
594601
"type": 5,
595602
"timestamp": 0.0,

0 commit comments

Comments
 (0)