Skip to content

Commit d1e2fec

Browse files
Swatinemandrewshie-sentry
authored andcommitted
Add metrics for various file/blob uploads (#95416)
This adds `PUT` size and latency metrics to all the use-cases that we care about: - Attachments - Debug Files and Artifact Bundles - Replays - Profiles - Nodestore The size metrics are being emitted for both compressed and uncompressed sizes, tagged with the used compression algorithm. The `latency` metric is only relevant for the compressed payload however.
1 parent 5dbd648 commit d1e2fec

File tree

7 files changed

+77
-6
lines changed

7 files changed

+77
-6
lines changed

src/sentry/eventstore/processing/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def store(self, event: Event, unprocessed: bool = False) -> str:
4545
if unprocessed:
4646
key = self.__get_unprocessed_key(key)
4747
self.inner.set(key, event, self.timeout)
48+
# TODO(swatinem): we would like to gather size metrics for things stored
49+
# in the processing store, though we need `bytes` for that, and it looks
50+
# like the processing store is used with `dict`s directly, for which the
51+
# encoding is non-obvious.
4852
return key
4953

5054
def get(self, key: str, unprocessed: bool = False) -> MutableMapping[str, Any] | None:

src/sentry/models/debugfile.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from sentry.db.models.manager.base import BaseManager
3636
from sentry.models.files.file import File
3737
from sentry.models.files.utils import clear_cached_files
38-
from sentry.utils import json
38+
from sentry.utils import json, metrics
3939
from sentry.utils.zip import safe_extract_zip
4040

4141
if TYPE_CHECKING:
@@ -315,6 +315,13 @@ def create_dif_from_id(
315315
file.headers["Content-Type"] = DIF_MIMETYPES[meta.file_format]
316316
file.save()
317317

318+
metrics.distribution(
319+
"storage.put.size",
320+
file.size,
321+
tags={"usecase": "debug-files", "compression": "none"},
322+
unit="byte",
323+
)
324+
318325
dif = ProjectDebugFile.objects.create(
319326
file=file,
320327
checksum=file.checksum,

src/sentry/models/eventattachment.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from sentry.db.models.fields.bounded import BoundedIntegerField
1818
from sentry.db.models.manager.base_query_set import BaseQuerySet
1919
from sentry.models.files.utils import get_size_and_checksum, get_storage
20+
from sentry.utils import metrics
2021

2122
# Attachment file types that are considered a crash report (PII relevant)
2223
CRASH_REPORT_TYPES = ("event.minidump", "event.applecrashreport")
@@ -147,14 +148,29 @@ def putfile(cls, project_id: int, attachment: CachedAttachment) -> PutfileResult
147148
blob = BytesIO(data)
148149
size, checksum = get_size_and_checksum(blob)
149150

151+
metrics.distribution(
152+
"storage.put.size",
153+
size,
154+
tags={"usecase": "attachments", "compression": "none"},
155+
unit="byte",
156+
)
157+
150158
if can_store_inline(data):
151159
blob_path = ":" + data.decode()
152160
else:
153161
blob_path = "eventattachments/v1/" + FileBlob.generate_unique_path()
154162

155163
storage = get_storage()
156-
compressed_blob = BytesIO(zstandard.compress(data))
157-
storage.save(blob_path, compressed_blob)
164+
compressed_blob = zstandard.compress(data)
165+
166+
metrics.distribution(
167+
"storage.put.size",
168+
len(compressed_blob),
169+
tags={"usecase": "attachments", "compression": "zstd"},
170+
unit="byte",
171+
)
172+
with metrics.timer("storage.put.latency", tags={"usecase": "attachments"}):
173+
storage.save(blob_path, BytesIO(compressed_blob))
158174

159175
return PutfileResult(
160176
content_type=content_type, size=size, sha1=checksum, blob_path=blob_path

src/sentry/profiles/task.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1406,13 +1406,27 @@ def _process_vroomrs_chunk_profile(profile: Profile) -> bool:
14061406
len(json_profile),
14071407
tags={"type": "chunk", "platform": profile["platform"]},
14081408
)
1409+
metrics.distribution(
1410+
"storage.put.size",
1411+
len(json_profile),
1412+
tags={"usecase": "profiling", "compression": "none"},
1413+
unit="byte",
1414+
)
14091415
with sentry_sdk.start_span(op="json.unmarshal"):
14101416
chunk = vroomrs.profile_chunk_from_json_str(json_profile, profile["platform"])
14111417
chunk.normalize()
14121418
with sentry_sdk.start_span(op="gcs.write", name="compress and write"):
14131419
storage = get_profiles_storage()
14141420
compressed_chunk = chunk.compress()
1415-
storage.save(chunk.storage_path(), io.BytesIO(compressed_chunk))
1421+
1422+
metrics.distribution(
1423+
"storage.put.size",
1424+
len(compressed_chunk),
1425+
tags={"usecase": "profiling", "compression": "lz4"},
1426+
unit="byte",
1427+
)
1428+
with metrics.timer("storage.put.latency", tags={"usecase": "profiling"}):
1429+
storage.save(chunk.storage_path(), io.BytesIO(compressed_chunk))
14161430
with sentry_sdk.start_span(op="processing", name="send chunk to kafka"):
14171431
payload = build_chunk_kafka_message(chunk)
14181432
topic = ArroyoTopic(get_topic_definition(Topic.PROFILE_CHUNKS)["real_topic_name"])

src/sentry/replays/lib/storage.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,14 @@ def get(self, key: str) -> bytes | None:
206206
def set(self, key: str, value: bytes) -> None:
207207
storage = get_storage(self._make_storage_options())
208208
try:
209-
storage.save(key, BytesIO(value))
209+
metrics.distribution(
210+
"storage.put.size",
211+
len(value),
212+
tags={"usecase": "replays", "compression": "gzip"},
213+
unit="byte",
214+
)
215+
with metrics.timer("storage.put.latency", tags={"usecase": "replays"}):
216+
storage.save(key, BytesIO(value))
210217
except TooManyRequests:
211218
# if we 429 because of a dupe segment problem, ignore it
212219
metrics.incr("replays.lib.storage.TooManyRequests")

src/sentry/tasks/assemble.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,10 +515,19 @@ def _create_or_update_artifact_bundle(
515515

516516
# In case there is not ArtifactBundle with a specific bundle_id, we just create it and return.
517517
if existing_artifact_bundle is None:
518+
file = self.assemble_result.bundle
519+
520+
metrics.distribution(
521+
"storage.put.size",
522+
file.size,
523+
tags={"usecase": "artifact-bundles", "compression": "none"},
524+
unit="byte",
525+
)
526+
518527
artifact_bundle = ArtifactBundle.objects.create(
519528
organization_id=self.organization.id,
520529
bundle_id=bundle_id,
521-
file=self.assemble_result.bundle,
530+
file=file,
522531
artifact_count=self.archive.artifact_count,
523532
# By default, a bundle is not indexed.
524533
indexing_state=ArtifactBundleIndexingState.NOT_INDEXED.value,

src/sentry/utils/kvstore/bigtable.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from google.cloud.bigtable.row_set import RowSet
1616
from google.cloud.bigtable.table import Table
1717

18+
from sentry.utils import metrics
1819
from sentry.utils.codecs import Codec, ZlibCodec, ZstdCodec
1920
from sentry.utils.kvstore.abstract import KVStorage
2021

@@ -235,11 +236,24 @@ def _set(self, key: str, value: bytes, ttl: timedelta | None = None) -> None:
235236
# tracking now is whether compression is on or not for the data column.
236237
flags = self.Flags(0)
237238

239+
metrics.distribution(
240+
"storage.put.size",
241+
len(value),
242+
tags={"usecase": "nodestore", "compression": "none"},
243+
unit="byte",
244+
)
238245
if self.compression:
239246
compression_flag, strategy = self.compression_strategies[self.compression]
240247
flags |= compression_flag
241248
value = strategy.encode(value)
242249

250+
metrics.distribution(
251+
"storage.put.size",
252+
len(value),
253+
tags={"usecase": "nodestore", "compression": self.compression},
254+
unit="byte",
255+
)
256+
243257
# Only need to write the column at all if any flags are enabled. And if
244258
# so, pack it into a single byte.
245259
if flags:

0 commit comments

Comments
 (0)