Skip to content

Commit 4058f20

Browse files
authored
ref(replays): Remove dead code and reorganize ingest-replay-recordings consumer (#95030)
Problem statement: There was a variety of code paths which were no longer used. These were dropped. Additionally some of the code was not well factored. Too many concerns were put into one module and which reduced its ability to integrate with multiple systems and also led to weird control flow for cross module communication. This has been refactored and unit test and integration test coverage has been added. What changed: - Message parsing, header parsing, and decompression were all moved to the recording consumer module. - The ingest use case focuses on ingesting the data received. Not the format it was received in. - 700 lines of unit test coverage added. - A bunch of e2e test coverage was removed because it was slow and didn't properly test the way the unit tests do.
1 parent 634e1ba commit 4058f20

File tree

9 files changed

+973
-553
lines changed

9 files changed

+973
-553
lines changed

src/sentry/replays/consumers/recording.py

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,41 @@
11
import logging
2+
import zlib
23
from collections.abc import Mapping
4+
from typing import cast
35

46
import sentry_sdk
7+
import sentry_sdk.scope
58
from arroyo.backends.kafka.consumer import KafkaPayload
69
from arroyo.processing.strategies import RunTask, RunTaskInThreads
710
from arroyo.processing.strategies.abstract import ProcessingStrategy, ProcessingStrategyFactory
811
from arroyo.processing.strategies.commit import CommitOffsets
912
from arroyo.types import Commit, FilteredPayload, Message, Partition
1013
from django.conf import settings
14+
from sentry_kafka_schemas.codecs import Codec, ValidationError
15+
from sentry_kafka_schemas.schema_types.ingest_replay_recordings_v1 import ReplayRecording
16+
from sentry_sdk import set_tag
1117

18+
from sentry.conf.types.kafka_definition import Topic, get_topic_codec
1219
from sentry.filestore.gcs import GCS_RETRYABLE_ERRORS
1320
from sentry.replays.usecases.ingest import (
14-
DropSilently,
15-
ProcessedRecordingMessage,
21+
DropEvent,
22+
Event,
23+
ProcessedEvent,
1624
commit_recording_message,
17-
parse_recording_message,
18-
process_recording_message,
25+
process_recording_event,
1926
track_recording_metadata,
2027
)
28+
from sentry.utils import json
29+
30+
RECORDINGS_CODEC: Codec[ReplayRecording] = get_topic_codec(Topic.INGEST_REPLAYS_RECORDINGS)
2131

2232
logger = logging.getLogger(__name__)
2333

2434

35+
class DropSilently(Exception):
36+
pass
37+
38+
2539
class ProcessReplayRecordingStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
2640
def __init__(
2741
self,
@@ -61,7 +75,10 @@ def create_with_partitions(
6175
)
6276

6377

64-
def process_message(message: Message[KafkaPayload]) -> ProcessedRecordingMessage | FilteredPayload:
78+
# Processing Task
79+
80+
81+
def process_message(message: Message[KafkaPayload]) -> ProcessedEvent | FilteredPayload:
6582
with sentry_sdk.start_transaction(
6683
name="replays.consumer.recording_buffered.process_message",
6784
op="replays.consumer.recording_buffered.process_message",
@@ -70,15 +87,87 @@ def process_message(message: Message[KafkaPayload]) -> ProcessedRecordingMessage
7087
},
7188
):
7289
try:
73-
return process_recording_message(parse_recording_message(message.payload.value))
90+
recording_event = parse_recording_event(message.payload.value)
91+
set_tag("org_id", recording_event["context"]["org_id"])
92+
set_tag("project_id", recording_event["context"]["project_id"])
93+
return process_recording_event(recording_event)
7494
except DropSilently:
7595
return FilteredPayload()
7696
except Exception:
7797
logger.exception("Failed to process replay recording message.")
7898
return FilteredPayload()
7999

80100

81-
def commit_message(message: Message[ProcessedRecordingMessage]) -> None:
101+
@sentry_sdk.trace
102+
def parse_recording_event(message: bytes) -> Event:
103+
recording = parse_request_message(message)
104+
segment_id, payload = parse_headers(cast(bytes, recording["payload"]), recording["replay_id"])
105+
compressed, decompressed = decompress_segment(payload)
106+
107+
replay_event_json = recording.get("replay_event")
108+
if replay_event_json:
109+
replay_event = json.loads(cast(bytes, replay_event_json))
110+
else:
111+
replay_event = None
112+
113+
replay_video_raw = recording.get("replay_video")
114+
if replay_video_raw is not None:
115+
replay_video = cast(bytes, replay_video_raw)
116+
else:
117+
replay_video = None
118+
119+
return {
120+
"context": {
121+
"key_id": recording.get("key_id"),
122+
"org_id": recording["org_id"],
123+
"project_id": recording["project_id"],
124+
"received": recording["received"],
125+
"replay_id": recording["replay_id"],
126+
"retention_days": recording["retention_days"],
127+
"segment_id": segment_id,
128+
},
129+
"payload_compressed": compressed,
130+
"payload": decompressed,
131+
"replay_event": replay_event,
132+
"replay_video": replay_video,
133+
}
134+
135+
136+
@sentry_sdk.trace
137+
def parse_request_message(message: bytes) -> ReplayRecording:
138+
try:
139+
return RECORDINGS_CODEC.decode(message)
140+
except ValidationError:
141+
logger.exception("Could not decode recording message.")
142+
raise DropSilently()
143+
144+
145+
@sentry_sdk.trace
146+
def decompress_segment(segment: bytes) -> tuple[bytes, bytes]:
147+
try:
148+
return (segment, zlib.decompress(segment))
149+
except zlib.error:
150+
if segment and segment[0] == ord("["):
151+
return (zlib.compress(segment), segment)
152+
else:
153+
logger.exception("Invalid recording body.")
154+
raise DropSilently()
155+
156+
157+
@sentry_sdk.trace
158+
def parse_headers(recording: bytes, replay_id: str) -> tuple[int, bytes]:
159+
try:
160+
recording_headers_json, recording_segment = recording.split(b"\n", 1)
161+
return int(json.loads(recording_headers_json)["segment_id"]), recording_segment
162+
except Exception:
163+
logger.exception("Recording headers could not be extracted %s", replay_id)
164+
raise DropSilently()
165+
166+
167+
# I/O Task
168+
169+
170+
def commit_message(message: Message[ProcessedEvent]) -> None:
82171
isolation_scope = sentry_sdk.get_isolation_scope().fork()
83172
with sentry_sdk.scope.use_isolation_scope(isolation_scope):
84173
with sentry_sdk.start_transaction(
@@ -96,7 +185,7 @@ def commit_message(message: Message[ProcessedRecordingMessage]) -> None:
96185
return None
97186
except GCS_RETRYABLE_ERRORS:
98187
raise
99-
except DropSilently:
188+
except DropEvent:
100189
return None
101190
except Exception:
102191
logger.exception("Failed to commit replay recording message.")

0 commit comments

Comments
 (0)