Skip to content

Commit e018642

Browse files
authored
ref(feedback): move userreport ingest and endpoints to feedback module (#95392)
User reports are the same product as feedback and owned by replay team. I'd like to move the code here for organizational purposes. Same goes for the feedback summary endpoint. This PR only moves things around, no implementation changes. Ref [REPLAY-513: \[PR Tracker\] refactor backend user feedback code](https://linear.app/getsentry/issue/REPLAY-513/pr-tracker-refactor-backend-user-feedback-code) I plan to follow up with quality improvements to the ingest functions, making them more composable, so that module's subject to change. Also fixes a flaky test `test_openai` described in #95577
1 parent 77b9e05 commit e018642

33 files changed

+370
-346
lines changed

src/sentry/api/urls.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from sentry.api.endpoints.organization_events_root_cause_analysis import (
1818
OrganizationEventsRootCauseAnalysisEndpoint,
1919
)
20-
from sentry.api.endpoints.organization_feedback_summary import OrganizationFeedbackSummaryEndpoint
2120
from sentry.api.endpoints.organization_fork import OrganizationForkEndpoint
2221
from sentry.api.endpoints.organization_insights_tree import OrganizationInsightsTreeEndpoint
2322
from sentry.api.endpoints.organization_member_invite.details import (
@@ -94,6 +93,11 @@
9493
from sentry.explore.endpoints.explore_saved_query_starred_order import (
9594
ExploreSavedQueryStarredOrderEndpoint,
9695
)
96+
from sentry.feedback.endpoints.organization_feedback_summary import (
97+
OrganizationFeedbackSummaryEndpoint,
98+
)
99+
from sentry.feedback.endpoints.organization_user_reports import OrganizationUserReportsEndpoint
100+
from sentry.feedback.endpoints.project_user_reports import ProjectUserReportsEndpoint
97101
from sentry.flags.endpoints.hooks import OrganizationFlagsHooksEndpoint
98102
from sentry.flags.endpoints.logs import (
99103
OrganizationFlagLogDetailsEndpoint,
@@ -649,7 +653,6 @@
649653
OrganizationTraceSpansEndpoint,
650654
)
651655
from .endpoints.organization_user_details import OrganizationUserDetailsEndpoint
652-
from .endpoints.organization_user_reports import OrganizationUserReportsEndpoint
653656
from .endpoints.organization_user_teams import OrganizationUserTeamsEndpoint
654657
from .endpoints.organization_users import OrganizationUsersEndpoint
655658
from .endpoints.project_artifact_bundle_file_details import ProjectArtifactBundleFileDetailsEndpoint
@@ -716,7 +719,6 @@
716719
ProjectTransactionThresholdOverrideEndpoint,
717720
)
718721
from .endpoints.project_transfer import ProjectTransferEndpoint
719-
from .endpoints.project_user_reports import ProjectUserReportsEndpoint
720722
from .endpoints.project_user_stats import ProjectUserStatsEndpoint
721723
from .endpoints.project_users import ProjectUsersEndpoint
722724
from .endpoints.prompts_activity import PromptsActivityEndpoint

src/sentry/feedback/endpoints/__init__.py

Whitespace-only changes.

src/sentry/api/endpoints/project_user_reports.py renamed to src/sentry/feedback/endpoints/project_user_reports.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from sentry.api.paginator import DateTimePaginator
1717
from sentry.api.serializers import UserReportWithGroupSerializer, serialize
1818
from sentry.feedback.lib.utils import FeedbackCreationSource
19-
from sentry.ingest.userreport import Conflict, save_userreport
19+
from sentry.feedback.usecases.ingest.userreport import Conflict, save_userreport
2020
from sentry.models.environment import Environment
2121
from sentry.models.userreport import UserReport
2222
from sentry.utils.dates import epoch

src/sentry/feedback/usecases/ingest/__init__.py

Whitespace-only changes.

src/sentry/feedback/usecases/save_feedback_event.py renamed to src/sentry/feedback/usecases/ingest/save_event_feedback.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,24 @@
44
from typing import Any
55

66
from sentry.feedback.lib.utils import FeedbackCreationSource
7-
from sentry.feedback.usecases.create_feedback import create_feedback_issue
8-
from sentry.ingest.userreport import Conflict, save_userreport
7+
from sentry.feedback.usecases.ingest.create_feedback import create_feedback_issue
8+
from sentry.feedback.usecases.ingest.userreport import Conflict, save_userreport
99
from sentry.models.environment import Environment
1010
from sentry.models.project import Project
1111
from sentry.utils import metrics
1212

1313
logger = logging.getLogger(__name__)
1414

1515

16-
def save_feedback_event(event_data: Mapping[str, Any], project_id: int):
17-
"""Saves a feedback from a feedback event envelope.
16+
def save_event_feedback(event_data: Mapping[str, Any], project_id: int):
17+
"""Saves feedback given data in an event format. This function should only
18+
be called by the new feedback consumer's ingest strategy, to process
19+
feedback envelopes (feedback v2). It is currently instrumented as a task in
20+
sentry.tasks.store.
1821
19-
If the save is successful and the `associated_event_id` field is present, this will
20-
also save a UserReport in Postgres. This is to ensure the feedback can be queried by
21-
group_id, which is hard to associate in clickhouse.
22+
If the save is successful and the `associated_event_id` field is present,
23+
this will also save a UserReport in Postgres (shim to v1). This is to allow
24+
queries by the group_id relation, which we don't have in clickhouse.
2225
"""
2326
if not isinstance(event_data, dict):
2427
event_data = dict(event_data)
@@ -32,6 +35,9 @@ def save_feedback_event(event_data: Mapping[str, Any], project_id: int):
3235

3336
try:
3437
# Shim to UserReport
38+
# TODO: this logic should be extracted to a shim_to_userreport function
39+
# which returns a report dict. After that this function can be removed
40+
# and the store task can directly call feedback ingest functions.
3541
feedback_context = fixed_event_data["contexts"]["feedback"]
3642
associated_event_id = feedback_context.get("associated_event_id")
3743

src/sentry/feedback/usecases/shim_to_feedback.py renamed to src/sentry/feedback/usecases/ingest/shim_to_feedback.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sentry.eventstore.models import Event, GroupEvent
99
from sentry.feedback.lib.types import UserReportDict
1010
from sentry.feedback.lib.utils import FeedbackCreationSource, is_in_feedback_denylist
11-
from sentry.feedback.usecases.create_feedback import create_feedback_issue
11+
from sentry.feedback.usecases.ingest.create_feedback import create_feedback_issue
1212
from sentry.models.project import Project
1313
from sentry.utils import metrics
1414
from sentry.utils.outcomes import Outcome, track_outcome
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import random
5+
import uuid
6+
from datetime import datetime, timedelta
7+
8+
from django.core.exceptions import PermissionDenied
9+
from django.db import IntegrityError, router
10+
from django.utils import timezone
11+
12+
from sentry import eventstore, options
13+
from sentry.api.exceptions import BadRequest
14+
from sentry.constants import DataCategory
15+
from sentry.eventstore.models import Event, GroupEvent
16+
from sentry.feedback.lib.types import UserReportDict
17+
from sentry.feedback.lib.utils import (
18+
UNREAL_FEEDBACK_UNATTENDED_MESSAGE,
19+
FeedbackCreationSource,
20+
is_in_feedback_denylist,
21+
)
22+
from sentry.feedback.usecases.ingest.shim_to_feedback import shim_to_feedback
23+
from sentry.models.project import Project
24+
from sentry.models.userreport import UserReport
25+
from sentry.signals import user_feedback_received
26+
from sentry.utils import metrics
27+
from sentry.utils.db import atomic_transaction
28+
from sentry.utils.outcomes import Outcome, track_outcome
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
class Conflict(Exception):
34+
pass
35+
36+
37+
def save_userreport(
38+
project: Project,
39+
report: UserReportDict,
40+
source: FeedbackCreationSource,
41+
start_time: datetime | None = None,
42+
) -> UserReport | None:
43+
with metrics.timer("userreport.create_user_report", tags={"referrer": source.value}):
44+
if start_time is None:
45+
start_time = timezone.now()
46+
47+
if is_in_feedback_denylist(project.organization):
48+
metrics.incr(
49+
"user_report.create_user_report.filtered",
50+
tags={"reason": "org.denylist", "referrer": source.value},
51+
)
52+
track_outcome(
53+
org_id=project.organization_id,
54+
project_id=project.id,
55+
key_id=None,
56+
outcome=Outcome.RATE_LIMITED,
57+
reason="feedback_denylist",
58+
timestamp=start_time,
59+
event_id=None, # Note report["event_id"] is id of the associated event, not the report itself.
60+
category=DataCategory.USER_REPORT_V2,
61+
quantity=1,
62+
)
63+
if (
64+
source == FeedbackCreationSource.USER_REPORT_DJANGO_ENDPOINT
65+
or source == FeedbackCreationSource.CRASH_REPORT_EMBED_FORM
66+
):
67+
raise PermissionDenied()
68+
return None
69+
70+
should_filter, metrics_reason, display_reason = validate_user_report(
71+
report, project.id, source=source
72+
)
73+
if should_filter:
74+
metrics.incr(
75+
"user_report.create_user_report.filtered",
76+
tags={"reason": metrics_reason, "referrer": source.value},
77+
)
78+
track_outcome(
79+
org_id=project.organization_id,
80+
project_id=project.id,
81+
key_id=None,
82+
outcome=Outcome.INVALID,
83+
reason=display_reason,
84+
timestamp=start_time,
85+
event_id=None, # Note report["event_id"] is id of the associated event, not the report itself.
86+
category=DataCategory.USER_REPORT_V2,
87+
quantity=1,
88+
)
89+
if (
90+
source == FeedbackCreationSource.USER_REPORT_DJANGO_ENDPOINT
91+
or source == FeedbackCreationSource.CRASH_REPORT_EMBED_FORM
92+
):
93+
raise BadRequest(display_reason)
94+
return None
95+
96+
# XXX(dcramer): enforce case insensitivity by coercing this to a lowercase string
97+
report["event_id"] = report["event_id"].lower()
98+
report["project_id"] = project.id
99+
100+
# Use the associated event to validate and update the report.
101+
event: Event | GroupEvent | None = eventstore.backend.get_event_by_id(
102+
project.id, report["event_id"]
103+
)
104+
105+
if event:
106+
# if the event is more than 30 minutes old, we don't allow updates
107+
# as it might be abusive
108+
if event.datetime < start_time - timedelta(minutes=30):
109+
metrics.incr(
110+
"user_report.create_user_report.filtered",
111+
tags={"reason": "event_too_old", "referrer": source.value},
112+
)
113+
track_outcome(
114+
org_id=project.organization_id,
115+
project_id=project.id,
116+
key_id=None,
117+
outcome=Outcome.INVALID,
118+
reason="Associated event is too old",
119+
timestamp=start_time,
120+
event_id=None,
121+
category=DataCategory.USER_REPORT_V2,
122+
quantity=1,
123+
)
124+
raise Conflict("Feedback for this event cannot be modified.")
125+
126+
report["environment_id"] = event.get_environment().id
127+
if event.group_id:
128+
report["group_id"] = event.group_id
129+
130+
# Save the report.
131+
try:
132+
with atomic_transaction(using=router.db_for_write(UserReport)):
133+
report_instance = UserReport.objects.create(**report)
134+
135+
except IntegrityError:
136+
# There was a duplicate, so just overwrite the existing
137+
# row with the new one. The only way this ever happens is
138+
# if someone is messing around with the API, or doing
139+
# something wrong with the SDK, but this behavior is
140+
# more reasonable than just hard erroring and is more
141+
# expected.
142+
existing_report = UserReport.objects.get(
143+
project_id=report["project_id"], event_id=report["event_id"]
144+
)
145+
146+
# if the existing report was submitted more than 5 minutes ago, we dont
147+
# allow updates as it might be abusive (replay attacks)
148+
if existing_report.date_added < timezone.now() - timedelta(minutes=5):
149+
metrics.incr(
150+
"user_report.create_user_report.filtered",
151+
tags={"reason": "duplicate_report", "referrer": source.value},
152+
)
153+
track_outcome(
154+
org_id=project.organization_id,
155+
project_id=project.id,
156+
key_id=None,
157+
outcome=Outcome.INVALID,
158+
reason="Duplicate report",
159+
timestamp=start_time,
160+
event_id=None,
161+
category=DataCategory.USER_REPORT_V2,
162+
quantity=1,
163+
)
164+
raise Conflict("Feedback for this event cannot be modified.")
165+
166+
existing_report.update(
167+
name=report.get("name", ""),
168+
email=report.get("email", ""),
169+
comments=report["comments"],
170+
)
171+
report_instance = existing_report
172+
173+
metrics.incr(
174+
"user_report.create_user_report.overwrite_duplicate",
175+
tags={"referrer": source.value},
176+
)
177+
178+
else:
179+
if report_instance.group_id:
180+
report_instance.notify()
181+
182+
# Additionally processing if save is successful.
183+
user_feedback_received.send_robust(project=project, sender=save_userreport)
184+
185+
metrics.incr(
186+
"user_report.create_user_report.saved",
187+
tags={"has_event": bool(event), "referrer": source.value},
188+
)
189+
if event and source.value in FeedbackCreationSource.old_feedback_category_values():
190+
shim_to_feedback(report, event, project, source)
191+
# XXX(aliu): the update_user_reports task will still try to shim the report after a period, unless group_id or environment is set.
192+
193+
return report_instance
194+
195+
196+
def validate_user_report(
197+
report: UserReportDict,
198+
project_id: int,
199+
source: FeedbackCreationSource = FeedbackCreationSource.USER_REPORT_ENVELOPE,
200+
) -> tuple[bool, str | None, str | None]:
201+
"""
202+
Validates required fields, field lengths, and garbage messages. Also checks that event_id is a valid UUID. Does not raise errors.
203+
204+
Reformatting: strips whitespace from comments and dashes from event_id.
205+
206+
Returns a tuple of (should_filter, metrics_reason, display_reason). XXX: ensure metrics and outcome reasons have bounded cardinality.
207+
"""
208+
if "comments" not in report:
209+
return True, "missing_comments", "Missing comments" # type: ignore[unreachable]
210+
if "event_id" not in report:
211+
return True, "missing_event_id", "Missing event_id" # type: ignore[unreachable]
212+
213+
report["comments"] = report["comments"].strip()
214+
215+
name, email, comments = (
216+
report.get("name", ""),
217+
report.get("email", ""),
218+
report["comments"],
219+
)
220+
221+
if options.get("feedback.filter_garbage_messages"): # Message-based filter kill-switch.
222+
if not comments:
223+
return True, "empty", "Empty Feedback Messsage"
224+
225+
if comments == UNREAL_FEEDBACK_UNATTENDED_MESSAGE:
226+
return True, "unreal.unattended", "Sent in Unreal Unattended Mode"
227+
228+
max_comment_length = UserReport._meta.get_field("comments").max_length
229+
if max_comment_length and len(comments) > max_comment_length:
230+
metrics.distribution(
231+
"feedback.large_message",
232+
len(comments),
233+
tags={
234+
"entrypoint": "save_userreport",
235+
"referrer": source.value,
236+
},
237+
)
238+
if random.random() < 0.1:
239+
logger.info(
240+
"Feedback message exceeds max size.",
241+
extra={
242+
"project_id": project_id,
243+
"entrypoint": "save_userreport",
244+
"referrer": source.value,
245+
"length": len(comments),
246+
"feedback_message": comments[:100],
247+
},
248+
)
249+
return True, "too_large.message", "Message Too Large"
250+
251+
max_name_length = UserReport._meta.get_field("name").max_length
252+
if max_name_length and len(name) > max_name_length:
253+
return True, "too_large.name", "Name Too Large"
254+
255+
max_email_length = UserReport._meta.get_field("email").max_length
256+
if max_email_length and len(email) > max_email_length:
257+
return True, "too_large.email", "Email Too Large"
258+
259+
try:
260+
# Validates UUID and strips dashes.
261+
report["event_id"] = uuid.UUID(report["event_id"].lower()).hex
262+
except ValueError:
263+
return True, "invalid_event_id", "Invalid Event ID"
264+
265+
return False, None, None

0 commit comments

Comments
 (0)