feat(taskworker): Send uncompressed profile tasks to worker (#95783)

enochtangg · web-flow · commit 5151349336c3 · 2025-07-17T15:25:19.000-04:00
The process_profiles worker code now supports uncompressed parameter payloads (#95692). This PR is responsible for removing double compression by changing the process_profile call site to send uncompressed tasks to taskworker as taskworker already handles zstd compression in its platform.
diff --git a/src/sentry/profiles/consumers/process/factory.py b/src/sentry/profiles/consumers/process/factory.py
@@ -1,5 +1,3 @@
-import time
-import zlib
 from base64 import b64encode
 from collections.abc import Iterable, Mapping
 
@@ -12,28 +10,14 @@
 from sentry import options
 from sentry.processing.backpressure.arroyo import HealthChecker, create_backpressure_step
 from sentry.profiles.task import process_profile_task
-from sentry.utils import metrics
 
 
 def process_message(message: Message[KafkaPayload]) -> None:
     sampled = is_sampled(message.payload.headers)
 
     if sampled or options.get("profiling.profile_metrics.unsampled_profiles.enabled"):
-        start_time = time.perf_counter()
-        b64encoded_compressed = b64encode(
-            zlib.compress(
-                message.payload.value,
-                level=options.get("taskworker.try_compress.profile_metrics.level"),
-            )
-        ).decode("utf-8")
-        end_time = time.perf_counter()
-        metrics.distribution(
-            "profiling.profile_metrics.compression_time",
-            end_time - start_time,
-        )
-        process_profile_task.delay(
-            payload=b64encoded_compressed, sampled=sampled, compressed_profile=True
-        )
+        b64encoded = b64encode(message.payload.value).decode("utf-8")
+        process_profile_task.delay(payload=b64encoded, sampled=sampled, compressed_profile=False)
 
 
 class ProcessProfileStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
diff --git a/tests/sentry/profiles/consumers/test_process.py b/tests/sentry/profiles/consumers/test_process.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import zlib
 from base64 import b64encode
 from datetime import datetime
 from typing import Any
@@ -55,9 +54,9 @@ def test_basic_profile_to_celery(self, process_profile_task):
         processing_strategy.terminate()
 
         process_profile_task.assert_called_with(
-            payload=b64encode(zlib.compress(payload)).decode("utf-8"),
+            payload=b64encode(payload).decode("utf-8"),
             sampled=True,
-            compressed_profile=True,
+            compressed_profile=False,
         )
 
 
diff --git a/tests/sentry/profiles/test_task.py b/tests/sentry/profiles/test_task.py
@@ -7,7 +7,6 @@
 from unittest import mock
 from unittest.mock import patch
 
-import msgpack
 import pytest
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.urls import reverse
@@ -33,7 +32,7 @@
 )
 from sentry.profiles.utils import Profile
 from sentry.signals import first_profile_received
-from sentry.testutils.cases import TestCase, TransactionTestCase
+from sentry.testutils.cases import TransactionTestCase
 from sentry.testutils.factories import Factories, get_fixture_path
 from sentry.testutils.helpers import Feature, override_options
 from sentry.testutils.pytest.fixtures import django_db_all
@@ -1172,103 +1171,3 @@ def test_process_profile_task_should_flip_project_flag(
     )
     project.refresh_from_db()
     assert project.flags.has_profiles
-
-
-class TestProcessProfileTaskDoubleCompression(TestCase):
-    """
-    TODO(taskworker): Remove this test once we have deleted zlib compression.
-    Test class for validating the double compression flow:
-    1. Consumer does zlib compression and calls process_profile_task.delay()
-    2. Taskworker does zstd compression on the task parameters
-    3. Task worker decompresses zstd and task decompresses zlib
-    """
-
-    @patch("sentry.profiles.task._track_outcome")
-    @patch("sentry.profiles.task._track_duration_outcome")
-    @patch("sentry.profiles.task._symbolicate_profile")
-    @patch("sentry.profiles.task._deobfuscate_profile")
-    @patch("sentry.profiles.task._push_profile_to_vroom")
-    def test_consumer_to_task_double_compression_flow(
-        self,
-        _push_profile_to_vroom,
-        _deobfuscate_profile,
-        _symbolicate_profile,
-        _track_duration_outcome,
-        _track_outcome,
-    ):
-        """
-        Test that the full consumer -> task flow works with double compression.
-
-        This test validates:
-        1. process_message in factory.py does zlib compression
-        2. taskworker layer does zstd compression
-        3. Both decompressions work correctly in the task execution
-        """
-        from datetime import datetime
-
-        from arroyo.backends.kafka import KafkaPayload
-        from arroyo.types import BrokerValue, Message, Partition, Topic
-        from django.utils import timezone
-
-        from sentry.profiles.consumers.process.factory import ProcessProfileStrategyFactory
-
-        # Mock the task functions
-        _push_profile_to_vroom.return_value = True
-        _deobfuscate_profile.return_value = True
-        _symbolicate_profile.return_value = True
-
-        # Get the profile fixture data
-        profile = generate_sample_v2_profile()
-
-        # Create a message dict like the consumer would receive from Kafka
-        message_dict = {
-            "organization_id": self.organization.id,
-            "project_id": self.project.id,
-            "key_id": 1,
-            "received": int(timezone.now().timestamp()),
-            "payload": json.dumps(profile),
-        }
-
-        # Pack the message with msgpack (like the consumer receives from Kafka)
-        payload = msgpack.packb(message_dict)
-
-        # Create the processing strategy (this will call process_message)
-        processing_strategy = ProcessProfileStrategyFactory().create_with_partitions(
-            commit=mock.Mock(), partitions={}
-        )
-
-        # Use self.tasks() to run the actual task with both compression layers
-        with self.tasks():
-            # Submit the message to the processing strategy
-            # This calls process_message which does:
-            # 1. zlib compression of the msgpack data
-            # 2. process_profile_task.delay() which adds zstd compression
-            processing_strategy.submit(
-                Message(
-                    BrokerValue(
-                        KafkaPayload(
-                            b"key",
-                            payload,
-                            [],
-                        ),
-                        Partition(Topic("profiles"), 1),
-                        1,
-                        datetime.now(),
-                    )
-                )
-            )
-            processing_strategy.poll()
-            processing_strategy.join(1)
-            processing_strategy.terminate()
-
-        # Verify the task was executed successfully
-        assert _push_profile_to_vroom.call_count == 1
-        assert _deobfuscate_profile.call_count == 1
-        assert _symbolicate_profile.call_count == 1
-        assert _track_duration_outcome.call_count == 1
-
-        # Verify the profile was processed with correct data
-        processed_profile = _push_profile_to_vroom.call_args[0][0]
-        assert processed_profile["organization_id"] == self.organization.id
-        assert processed_profile["project_id"] == self.project.id
-        assert processed_profile["platform"] == profile["platform"]