Skip to content

Commit e805e47

Browse files
authored
✨ feat(integration slos): add create_issue parameter to integration SLO metrics (#94140)
1 parent 60cf36f commit e805e47

File tree

8 files changed

+135
-35
lines changed

8 files changed

+135
-35
lines changed

src/sentry/integrations/utils/metrics.py

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ def add_extras(self, extras: Mapping[str, Any]) -> None:
123123
self._extra.update(extras)
124124

125125
def record_event(
126-
self, outcome: EventLifecycleOutcome, outcome_reason: BaseException | str | None = None
126+
self,
127+
outcome: EventLifecycleOutcome,
128+
outcome_reason: BaseException | str | None = None,
129+
create_issue: bool = False,
127130
) -> None:
128131
"""Record a starting or halting event.
129132
@@ -146,16 +149,26 @@ def record_event(
146149
}
147150

148151
if isinstance(outcome_reason, BaseException):
149-
# (iamrajjoshi): Log the full exception and the repr of the exception
150-
# This is an experiment to dogfood Sentry logs
151-
# Logs are paid by bytes, we save money by mking optimizations like this - we should try to dogfood from a similar perspective
152-
log_params["exc_info"] = outcome_reason
152+
# Capture exception in Sentry if create_issue is True
153+
if create_issue:
154+
# If the outcome is halted, we want to set the level to warning
155+
if outcome == EventLifecycleOutcome.HALTED:
156+
sentry_sdk.set_level("warning")
157+
158+
event_id = sentry_sdk.capture_exception(
159+
outcome_reason,
160+
)
161+
162+
log_params["extra"]["slo_event_id"] = event_id
163+
164+
# Add exception summary but don't include full stack trace in logs
165+
# TODO(iamrajjoshi): Phase this out once everyone is comfortable with just using the sentry issue
153166
log_params["extra"]["exception_summary"] = repr(outcome_reason)
154167
elif isinstance(outcome_reason, str):
155168
extra["outcome_reason"] = outcome_reason
156169

157170
if outcome == EventLifecycleOutcome.FAILURE:
158-
logger.error(key, **log_params)
171+
logger.warning(key, **log_params)
159172
elif outcome == EventLifecycleOutcome.HALTED:
160173
logger.warning(key, **log_params)
161174

@@ -164,14 +177,17 @@ def _report_flow_error(message) -> None:
164177
logger.error("EventLifecycle flow error: %s", message)
165178

166179
def _terminate(
167-
self, new_state: EventLifecycleOutcome, outcome_reason: BaseException | str | None = None
180+
self,
181+
new_state: EventLifecycleOutcome,
182+
outcome_reason: BaseException | str | None = None,
183+
create_issue: bool = False,
168184
) -> None:
169185
if self._state is None:
170186
self._report_flow_error("The lifecycle has not yet been entered")
171187
if self._state != EventLifecycleOutcome.STARTED:
172188
self._report_flow_error("The lifecycle has already been exited")
173189
self._state = new_state
174-
self.record_event(new_state, outcome_reason)
190+
self.record_event(new_state, outcome_reason, create_issue)
175191

176192
def record_success(self) -> None:
177193
"""Record that the event halted successfully.
@@ -184,14 +200,20 @@ def record_success(self) -> None:
184200
self._terminate(EventLifecycleOutcome.SUCCESS)
185201

186202
def record_failure(
187-
self, failure_reason: BaseException | str | None = None, extra: dict[str, Any] | None = None
203+
self,
204+
failure_reason: BaseException | str | None = None,
205+
extra: dict[str, Any] | None = None,
206+
create_issue: bool = True,
188207
) -> None:
189208
"""Record that the event halted in failure. Additional data may be passed
190209
to be logged.
191210
192211
Calling it means that the feature is broken and requires immediate attention.
193212
194-
An error will be reported to Sentry.
213+
An error will be reported to Sentry if create_issue is True (default).
214+
The default is True because we want to create an issue for all failures
215+
because it will provide a stack trace and help us debug the issue.
216+
There needs to be a compelling reason to not create an issue for a failure
195217
196218
There is no need to call this method directly if an exception is raised from
197219
inside the context. It will be called automatically when exiting the context
@@ -205,20 +227,27 @@ def record_failure(
205227

206228
if extra:
207229
self._extra.update(extra)
208-
self._terminate(EventLifecycleOutcome.FAILURE, failure_reason)
230+
self._terminate(EventLifecycleOutcome.FAILURE, failure_reason, create_issue)
209231

210232
def record_halt(
211-
self, halt_reason: BaseException | str | None = None, extra: dict[str, Any] | None = None
233+
self,
234+
halt_reason: BaseException | str | None = None,
235+
extra: dict[str, Any] | None = None,
236+
create_issue: bool = False,
212237
) -> None:
213238
"""Record that the event halted in an ambiguous state.
214239
215-
It will be logged to GCP but no Sentry error will be reported.
240+
It will be logged to GCP but no Sentry error will be reported by default.
241+
The default is False because we don't want to create an issue for all halts.
242+
However for certain debugging cases, we may want to create an issue.
216243
217244
This method can be called in response to a sufficiently ambiguous exception
218245
or other error condition, where it may have been caused by a user error or
219246
other expected condition, but there is some substantial chance that it
220247
represents a bug.
221248
249+
Set create_issue=True if you want to create a Sentry issue for this halt.
250+
222251
Such cases usually mean that we want to:
223252
(1) document the ambiguity;
224253
(2) monitor it for sudden spikes in frequency; and
@@ -228,7 +257,7 @@ def record_halt(
228257

229258
if extra:
230259
self._extra.update(extra)
231-
self._terminate(EventLifecycleOutcome.HALTED, halt_reason)
260+
self._terminate(EventLifecycleOutcome.HALTED, halt_reason, create_issue)
232261

233262
def __enter__(self) -> Self:
234263
if self._state is not None:
@@ -250,7 +279,8 @@ def __exit__(
250279

251280
if exc_value is not None:
252281
# We were forced to exit the context by a raised exception.
253-
self.record_failure(exc_value)
282+
# Default to creating a Sentry issue for unhandled exceptions
283+
self.record_failure(exc_value, create_issue=True)
254284
else:
255285
# We exited the context without record_success or record_failure being
256286
# called. Assume success if we were told to do so. Else, log a halt

tests/sentry/api/endpoints/test_group_integration_details.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def test_simple_put(self, mock_record_event):
256256
assert response.status_code == 201
257257
self.assert_correctly_linked(group, "APP-123", integration, org)
258258

259-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
259+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
260260

261261
@mock.patch.object(ExampleIntegration, "get_issue")
262262
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_halt")
@@ -422,7 +422,7 @@ def test_simple_post(self, mock_record_event):
422422
"new": True,
423423
}
424424

425-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
425+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
426426

427427
@mock.patch.object(ExampleIntegration, "create_issue")
428428
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_halt")

tests/sentry/integrations/jira/test_installed.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def test_with_shared_secret(self, mock_set_tag: MagicMock, mock_record_event):
121121

122122
mock_set_tag.assert_any_call("integration_id", integration.id)
123123
assert integration.status == ObjectStatus.ACTIVE
124-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
124+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
125125

126126
@patch("sentry_sdk.set_tag")
127127
@responses.activate

tests/sentry/integrations/jira/test_ticket_action.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def test_ticket_rules(self, mock_record_event):
123123

124124
# assert new ticket NOT created in DB
125125
assert ExternalIssue.objects.count() == external_issue_count
126-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
126+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
127127

128128
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_event")
129129
@mock.patch.object(MockJira, "create_issue")

tests/sentry/integrations/tasks/test_sync_assignee_outbound.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_syncs_outbound_assignee(self, mock_sync_assignee, mock_record_event):
5454
assert isinstance(user_arg, RpcUser)
5555
assert user_arg.id == self.user.id
5656

57-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
57+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
5858

5959
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_failure")
6060
@mock.patch.object(ExampleIntegration, "sync_assignee_outbound")

tests/sentry/integrations/tasks/test_sync_status_outbound.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_successful_outbound_sync(self, mock_sync_status, mock_record_event):
5252

5353
sync_status_outbound(self.group.id, external_issue_id=external_issue.id)
5454
mock_sync_status.assert_called_once_with(external_issue, False, self.group.project_id)
55-
mock_record_event.assert_any_call(EventLifecycleOutcome.SUCCESS, None)
55+
mock_record_event.assert_any_call(EventLifecycleOutcome.SUCCESS, None, False)
5656

5757
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_event")
5858
@mock.patch.object(ExampleIntegration, "sync_status_outbound")
@@ -69,7 +69,7 @@ def test_should_not_sync(self, mock_should_sync, mock_sync_status, mock_record_e
6969
assert mock_record_event.call_count == 2
7070
start, success = mock_record_event.mock_calls
7171
assert start.args == (EventLifecycleOutcome.STARTED,)
72-
assert success.args == (EventLifecycleOutcome.SUCCESS, None)
72+
assert success.args == (EventLifecycleOutcome.SUCCESS, None, False)
7373

7474
@mock.patch.object(ExampleIntegration, "sync_status_outbound")
7575
def test_missing_external_issue(self, mock_sync_status):

tests/sentry/integrations/utils/test_lifecycle_metrics.py

Lines changed: 79 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def test_recording_success(self, mock_metrics, mock_logger):
6666
pass
6767
self._check_metrics_call_args(mock_metrics, "success")
6868
mock_logger.error.assert_not_called()
69+
mock_logger.warning.assert_not_called()
6970

7071
@mock.patch("sentry.integrations.utils.metrics.logger")
7172
@mock.patch("sentry.integrations.utils.metrics.metrics")
@@ -74,7 +75,14 @@ def test_recording_halt(self, mock_metrics, mock_logger):
7475
with metric_obj.capture(assume_success=False):
7576
pass
7677
self._check_metrics_call_args(mock_metrics, "halted")
77-
mock_logger.error.assert_not_called()
78+
mock_logger.warning.assert_called_once_with(
79+
"integrations.slo.halted",
80+
extra={
81+
"integration_domain": "messaging",
82+
"integration_name": "my_integration",
83+
"interaction_type": "my_interaction",
84+
},
85+
)
7886

7987
@mock.patch("sentry.integrations.utils.metrics.logger")
8088
@mock.patch("sentry.integrations.utils.metrics.metrics")
@@ -95,7 +103,6 @@ def test_recording_explicit_halt_with_exception(self, mock_metrics, mock_logger)
95103
"interaction_type": "my_interaction",
96104
"exception_summary": repr(ExampleException("")),
97105
},
98-
exc_info=mock.ANY,
99106
)
100107

101108
@mock.patch("sentry.integrations.utils.metrics.logger")
@@ -119,31 +126,40 @@ def test_recording_explicit_halt_with_str(self, mock_metrics, mock_logger):
119126
},
120127
)
121128

129+
@mock.patch("sentry.integrations.utils.metrics.sentry_sdk")
122130
@mock.patch("sentry.integrations.utils.metrics.logger")
123131
@mock.patch("sentry.integrations.utils.metrics.metrics")
124-
def test_recording_failure(self, mock_metrics, mock_logger):
132+
def test_recording_failure(self, mock_metrics, mock_logger, mock_sentry_sdk):
133+
mock_sentry_sdk.capture_exception.return_value = "test-event-id"
134+
125135
metric_obj = self.TestLifecycleMetric()
126136
with pytest.raises(ExampleException):
127137
with metric_obj.capture() as lifecycle:
128138
lifecycle.add_extra("extra", "value")
129139
raise ExampleException()
130140

131141
self._check_metrics_call_args(mock_metrics, "failure")
132-
mock_logger.error.assert_called_once_with(
142+
mock_sentry_sdk.capture_exception.assert_called_once()
143+
mock_logger.warning.assert_called_once_with(
133144
"integrations.slo.failure",
134145
extra={
135146
"extra": "value",
136147
"integration_domain": "messaging",
137148
"integration_name": "my_integration",
138149
"interaction_type": "my_interaction",
139150
"exception_summary": repr(ExampleException()),
151+
"slo_event_id": "test-event-id",
140152
},
141-
exc_info=mock.ANY,
142153
)
143154

155+
@mock.patch("sentry.integrations.utils.metrics.sentry_sdk")
144156
@mock.patch("sentry.integrations.utils.metrics.logger")
145157
@mock.patch("sentry.integrations.utils.metrics.metrics")
146-
def test_recording_explicit_failure_with_exception(self, mock_metrics, mock_logger):
158+
def test_recording_explicit_failure_with_exception(
159+
self, mock_metrics, mock_logger, mock_sentry_sdk
160+
):
161+
mock_sentry_sdk.capture_exception.return_value = "test-event-id"
162+
147163
metric_obj = self.TestLifecycleMetric()
148164
with metric_obj.capture() as lifecycle:
149165
try:
@@ -153,7 +169,8 @@ def test_recording_explicit_failure_with_exception(self, mock_metrics, mock_logg
153169
lifecycle.record_failure(exc, extra={"even": "more"})
154170

155171
self._check_metrics_call_args(mock_metrics, "failure")
156-
mock_logger.error.assert_called_once_with(
172+
mock_sentry_sdk.capture_exception.assert_called_once()
173+
mock_logger.warning.assert_called_once_with(
157174
"integrations.slo.failure",
158175
extra={
159176
"extra": "value",
@@ -162,8 +179,8 @@ def test_recording_explicit_failure_with_exception(self, mock_metrics, mock_logg
162179
"integration_name": "my_integration",
163180
"interaction_type": "my_interaction",
164181
"exception_summary": repr(ExampleException()),
182+
"slo_event_id": "test-event-id",
165183
},
166-
exc_info=mock.ANY,
167184
)
168185

169186
@mock.patch("sentry.integrations.utils.metrics.logger")
@@ -175,7 +192,7 @@ def test_recording_explicit_failure_with_str(self, mock_metrics, mock_logger):
175192
lifecycle.record_failure("Integration went boom", extra={"even": "more"})
176193

177194
self._check_metrics_call_args(mock_metrics, "failure")
178-
mock_logger.error.assert_called_once_with(
195+
mock_logger.warning.assert_called_once_with(
179196
"integrations.slo.failure",
180197
extra={
181198
"outcome_reason": "Integration went boom",
@@ -186,3 +203,56 @@ def test_recording_explicit_failure_with_str(self, mock_metrics, mock_logger):
186203
"interaction_type": "my_interaction",
187204
},
188205
)
206+
207+
@mock.patch("sentry.integrations.utils.metrics.sentry_sdk")
208+
@mock.patch("sentry.integrations.utils.metrics.logger")
209+
@mock.patch("sentry.integrations.utils.metrics.metrics")
210+
def test_recording_halt_with_create_issue_true(
211+
self, mock_metrics, mock_logger, mock_sentry_sdk
212+
):
213+
"""
214+
Test that halt can create Sentry issues when create_issue=True
215+
"""
216+
mock_sentry_sdk.capture_exception.return_value = "test-event-id"
217+
218+
metric_obj = self.TestLifecycleMetric()
219+
with metric_obj.capture() as lifecycle:
220+
lifecycle.add_extra("extra", "value")
221+
lifecycle.record_halt(ExampleException("test"), create_issue=True)
222+
223+
self._check_metrics_call_args(mock_metrics, "halted")
224+
mock_sentry_sdk.capture_exception.assert_called_once()
225+
mock_logger.warning.assert_called_once_with(
226+
"integrations.slo.halted",
227+
extra={
228+
"extra": "value",
229+
"integration_domain": "messaging",
230+
"integration_name": "my_integration",
231+
"interaction_type": "my_interaction",
232+
"exception_summary": repr(ExampleException("test")),
233+
"slo_event_id": "test-event-id",
234+
},
235+
)
236+
237+
@mock.patch("sentry.integrations.utils.metrics.logger")
238+
@mock.patch("sentry.integrations.utils.metrics.metrics")
239+
def test_recording_failure_with_create_issue_false(self, mock_metrics, mock_logger):
240+
"""
241+
Test that failure can skip creating Sentry issues when create_issue=False
242+
"""
243+
metric_obj = self.TestLifecycleMetric()
244+
with metric_obj.capture() as lifecycle:
245+
lifecycle.add_extra("extra", "value")
246+
lifecycle.record_failure(ExampleException("test"), create_issue=False)
247+
248+
self._check_metrics_call_args(mock_metrics, "failure")
249+
mock_logger.warning.assert_called_once_with(
250+
"integrations.slo.failure",
251+
extra={
252+
"extra": "value",
253+
"integration_domain": "messaging",
254+
"integration_name": "my_integration",
255+
"interaction_type": "my_interaction",
256+
"exception_summary": repr(ExampleException("test")),
257+
},
258+
)

tests/sentry/integrations/utils/test_sync.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_unassign(self, mock_record_event):
7676
)
7777

7878
assert self.group.get_assignee() is None
79-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
79+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
8080

8181
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_event")
8282
def test_assignment(self, mock_record_event):
@@ -99,7 +99,7 @@ def test_assignment(self, mock_record_event):
9999
assert updated_assignee is not None
100100
assert updated_assignee.id == self.test_user.id
101101
assert updated_assignee.email == "test@example.com"
102-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
102+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
103103

104104
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_event")
105105
def test_assign_with_multiple_groups(self, mock_record_event):
@@ -149,7 +149,7 @@ def test_assign_with_multiple_groups(self, mock_record_event):
149149
assert assignee.id == self.test_user.id
150150
assert assignee.email == "test@example.com"
151151

152-
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None)
152+
mock_record_event.assert_called_with(EventLifecycleOutcome.SUCCESS, None, False)
153153

154154
@mock.patch("sentry.integrations.utils.metrics.EventLifecycle.record_halt")
155155
def test_assign_with_no_user_found(self, mock_record_halt):
@@ -208,7 +208,7 @@ def raise_exception(*args, **kwargs):
208208
updated_assignee = self.group.get_assignee()
209209
assert updated_assignee is None
210210

211-
mock_record_failure.assert_called_once_with(mock.ANY)
211+
mock_record_failure.assert_called_once_with(mock.ANY, create_issue=True)
212212

213213
exception_param = mock_record_failure.call_args_list[0].args[0]
214214

0 commit comments

Comments
 (0)