feat(upsampling) - Support upsampled error count in events-stats API (#94376)

yuvmen · web-flow · commit 9c80bbc00a47 · 2025-06-26T14:35:38.000-07:00
Part of the Error Upsampling project: https://www.notion.so/sentry/Tech-Spec-Error-Up-Sampling-1e58b10e4b5d80af855cf3b992f75894?source=copy_link Events-stats API will now check if all projects in the query are allowlisted for upsampling, and convert the count query to a sum over `sample_weight` in Snuba, this is done by defining a new SnQL function `upsampled_count()`. I noticed there are also eps() and epm() functions in use in this endpoint. I considered (and even worked on) also supporting swapping eps() and epm() which for correctness should probably also not count naively and use `sample_weight`, but this caused some complications and since they are only in use by specific dashboard widgets and not available in discover I decided to defer changing them until we realize it is needed.
diff --git a/pyproject.toml b/pyproject.toml
@@ -173,6 +173,7 @@ module = [
     "sentry.api.event_search",
     "sentry.api.helpers.deprecation",
     "sentry.api.helpers.environments",
+    "sentry.api.helpers.error_upsampling",
     "sentry.api.helpers.group_index.delete",
     "sentry.api.helpers.group_index.update",
     "sentry.api.helpers.source_map_helper",
@@ -460,6 +461,7 @@ module = [
     "tests.sentry.api.endpoints.issues.test_organization_derive_code_mappings",
     "tests.sentry.api.endpoints.test_browser_reporting_collector",
     "tests.sentry.api.endpoints.test_project_repo_path_parsing",
+    "tests.sentry.api.helpers.test_error_upsampling",
     "tests.sentry.audit_log.services.*",
     "tests.sentry.deletions.test_group",
     "tests.sentry.event_manager.test_event_manager",
diff --git a/src/sentry/api/endpoints/organization_events_stats.py b/src/sentry/api/endpoints/organization_events_stats.py
@@ -11,6 +11,10 @@
 from sentry.api.api_publish_status import ApiPublishStatus
 from sentry.api.base import region_silo_endpoint
 from sentry.api.bases import OrganizationEventsV2EndpointBase
+from sentry.api.helpers.error_upsampling import (
+    is_errors_query_for_error_upsampled_projects,
+    transform_query_columns_for_error_upsampling,
+)
 from sentry.constants import MAX_TOP_EVENTS
 from sentry.models.dashboard_widget import DashboardWidget, DashboardWidgetTypes
 from sentry.models.organization import Organization
@@ -117,7 +121,7 @@ def get(self, request: Request, organization: Organization) -> Response:
                         status=400,
                     )
                 elif top_events <= 0:
-                    return Response({"detail": "If topEvents needs to be at least 1"}, status=400)
+                    return Response({"detail": "topEvents needs to be at least 1"}, status=400)
 
             comparison_delta = None
             if "comparisonDelta" in request.GET:
@@ -211,12 +215,19 @@ def _get_event_stats(
             zerofill_results: bool,
             comparison_delta: timedelta | None,
         ) -> SnubaTSResult | dict[str, SnubaTSResult]:
+            should_upsample = is_errors_query_for_error_upsampled_projects(
+                snuba_params, organization, dataset, request
+            )
+            final_columns = query_columns
+            if should_upsample:
+                final_columns = transform_query_columns_for_error_upsampling(query_columns)
+
             if top_events > 0:
                 if use_rpc:
                     return scoped_dataset.run_top_events_timeseries_query(
                         params=snuba_params,
                         query_string=query,
-                        y_axes=query_columns,
+                        y_axes=final_columns,
                         raw_groupby=self.get_field_list(organization, request),
                         orderby=self.get_orderby(request),
                         limit=top_events,
@@ -231,7 +242,7 @@ def _get_event_stats(
                         equations=self.get_equation_list(organization, request),
                     )
                 return scoped_dataset.top_events_timeseries(
-                    timeseries_columns=query_columns,
+                    timeseries_columns=final_columns,
                     selected_columns=self.get_field_list(organization, request),
                     equations=self.get_equation_list(organization, request),
                     user_query=query,
@@ -255,7 +266,7 @@ def _get_event_stats(
                 return scoped_dataset.run_timeseries_query(
                     params=snuba_params,
                     query_string=query,
-                    y_axes=query_columns,
+                    y_axes=final_columns,
                     referrer=referrer,
                     config=SearchResolverConfig(
                         auto_fields=False,
@@ -268,7 +279,7 @@ def _get_event_stats(
                 )
 
             return scoped_dataset.timeseries_query(
-                selected_columns=query_columns,
+                selected_columns=final_columns,
                 query=query,
                 snuba_params=snuba_params,
                 rollup=rollup,
diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py
@@ -0,0 +1,105 @@
+from collections.abc import Sequence
+from types import ModuleType
+from typing import Any
+
+from rest_framework.request import Request
+
+from sentry import options
+from sentry.models.organization import Organization
+from sentry.search.events.types import SnubaParams
+
+
+def is_errors_query_for_error_upsampled_projects(
+    snuba_params: SnubaParams,
+    organization: Organization,
+    dataset: ModuleType,
+    request: Request,
+) -> bool:
+    """
+    Determine if this query should use error upsampling transformations.
+    Only applies when ALL projects are allowlisted and we're querying error events.
+    """
+    if not _are_all_projects_error_upsampled(snuba_params.project_ids, organization):
+        return False
+
+    return _should_apply_sample_weight_transform(dataset, request)
+
+
+def _are_all_projects_error_upsampled(
+    project_ids: Sequence[int], organization: Organization
+) -> bool:
+    """
+    Check if ALL projects in the query are allowlisted for error upsampling.
+    Only returns True if all projects pass the allowlist condition.
+    """
+    if not project_ids:
+        return False
+
+    allowlist = options.get("issues.client_error_sampling.project_allowlist", [])
+    if not allowlist:
+        return False
+
+    # All projects must be in the allowlist
+    result = all(project_id in allowlist for project_id in project_ids)
+    return result
+
+
+def transform_query_columns_for_error_upsampling(
+    query_columns: Sequence[str],
+) -> list[str]:
+    """
+    Transform aggregation functions to use sum(sample_weight) instead of count()
+    for error upsampling. Only called when all projects are allowlisted.
+    """
+    transformed_columns = []
+    for column in query_columns:
+        column_lower = column.lower().strip()
+
+        if column_lower == "count()":
+            # Simple count becomes sum of sample weights
+            transformed_columns.append("upsampled_count() as count")
+
+        else:
+            transformed_columns.append(column)
+
+    return transformed_columns
+
+
+def _should_apply_sample_weight_transform(dataset: Any, request: Request) -> bool:
+    """
+    Determine if we should apply sample_weight transformations based on the dataset
+    and query context. Only apply for error events since sample_weight doesn't exist
+    for transactions.
+    """
+    from sentry.snuba import discover, errors
+
+    # Always apply for the errors dataset
+    if dataset == errors:
+        return True
+
+    from sentry.snuba import transactions
+
+    # Never apply for the transactions dataset
+    if dataset == transactions:
+        return False
+
+    # For the discover dataset, check if we're querying errors specifically
+    if dataset == discover:
+        result = _is_error_focused_query(request)
+        return result
+
+    # For other datasets (spans, metrics, etc.), don't apply
+    return False
+
+
+def _is_error_focused_query(request: Request) -> bool:
+    """
+    Check if a query is focused on error events.
+    Reduced to only check for event.type:error to err on the side of caution.
+    """
+    query = request.GET.get("query", "").lower()
+
+    if "event.type:error" in query:
+        return True
+
+    return False
diff --git a/src/sentry/search/events/datasets/discover.py b/src/sentry/search/events/datasets/discover.py
@@ -1038,6 +1038,16 @@ def function_converter(self) -> Mapping[str, SnQLFunction]:
                     default_result_type="integer",
                     private=True,
                 ),
+                SnQLFunction(
+                    "upsampled_count",
+                    required_args=[],
+                    snql_aggregate=lambda args, alias: Function(
+                        "toInt64",
+                        [Function("sum", [Function("ifNull", [Column("sample_weight"), 1])])],
+                        alias,
+                    ),
+                    default_result_type="number",
+                ),
             ]
         }
 
diff --git a/src/sentry/testutils/factories.py b/src/sentry/testutils/factories.py
@@ -8,7 +8,7 @@
 import zipfile
 from base64 import b64encode
 from binascii import hexlify
-from collections.abc import Mapping, Sequence
+from collections.abc import Mapping, MutableMapping, Sequence
 from datetime import UTC, datetime
 from enum import Enum
 from hashlib import sha1
@@ -341,6 +341,22 @@ def _patch_artifact_manifest(path, org=None, release=None, project=None, extra_f
     return orjson.dumps(manifest).decode()
 
 
+def _set_sample_rate_from_error_sampling(normalized_data: MutableMapping[str, Any]) -> None:
+    """Set 'sample_rate' on normalized_data if contexts.error_sampling.client_sample_rate is present and valid."""
+    client_sample_rate = None
+    try:
+        client_sample_rate = (
+            normalized_data.get("contexts", {}).get("error_sampling", {}).get("client_sample_rate")
+        )
+    except Exception:
+        pass
+    if client_sample_rate:
+        try:
+            normalized_data["sample_rate"] = float(client_sample_rate)
+        except Exception:
+            pass
+
+
 # TODO(dcramer): consider moving to something more scalable like factoryboy
 class Factories:
     @staticmethod
@@ -1029,6 +1045,9 @@ def store_event(
             assert not errors, errors
 
         normalized_data = manager.get_data()
+
+        _set_sample_rate_from_error_sampling(normalized_data)
+
         event = None
 
         # When fingerprint is present on transaction, inject performance problems
diff --git a/tests/sentry/api/helpers/test_error_upsampling.py b/tests/sentry/api/helpers/test_error_upsampling.py
@@ -0,0 +1,101 @@
+from unittest.mock import Mock, patch
+
+from django.http import QueryDict
+from django.test import RequestFactory
+from rest_framework.request import Request
+
+from sentry.api.helpers.error_upsampling import (
+    _are_all_projects_error_upsampled,
+    _is_error_focused_query,
+    _should_apply_sample_weight_transform,
+    transform_query_columns_for_error_upsampling,
+)
+from sentry.models.organization import Organization
+from sentry.search.events.types import SnubaParams
+from sentry.snuba import discover, errors, transactions
+from sentry.testutils.cases import TestCase
+
+
+class ErrorUpsamplingTest(TestCase):
+    def setUp(self) -> None:
+        self.organization = Organization.objects.create(name="test-org")
+        self.projects = [
+            self.create_project(organization=self.organization, name="Project 1"),
+            self.create_project(organization=self.organization, name="Project 2"),
+            self.create_project(organization=self.organization, name="Project 3"),
+        ]
+        self.project_ids = [p.id for p in self.projects]
+        self.snuba_params = SnubaParams(
+            start=None,
+            end=None,
+            projects=self.projects,
+        )
+        factory = RequestFactory()
+        self.request = Request(factory.get("/"))
+        self.request.GET = QueryDict("")
+
+    @patch("sentry.api.helpers.error_upsampling.options")
+    def test_are_all_projects_error_upsampled(self, mock_options: Mock) -> None:
+        # Test when all projects are allowlisted
+        mock_options.get.return_value = self.project_ids
+        assert _are_all_projects_error_upsampled(self.project_ids, self.organization) is True
+
+        # Test when some projects are not allowlisted
+        mock_options.get.return_value = self.project_ids[:-1]
+        assert _are_all_projects_error_upsampled(self.project_ids, self.organization) is False
+
+        # Test when no projects are allowlisted
+        mock_options.get.return_value = []
+        assert _are_all_projects_error_upsampled(self.project_ids, self.organization) is False
+
+        # Test when no project IDs provided
+        assert _are_all_projects_error_upsampled([], self.organization) is False
+
+    def test_transform_query_columns_for_error_upsampling(self) -> None:
+        # Test count() transformation
+        columns = ["count()", "other_column"]
+        expected = [
+            "upsampled_count() as count",
+            "other_column",
+        ]
+        assert transform_query_columns_for_error_upsampling(columns) == expected
+
+        # Test case insensitivity
+        columns = ["COUNT()"]
+        expected = [
+            "upsampled_count() as count",
+        ]
+        assert transform_query_columns_for_error_upsampling(columns) == expected
+
+        # Test whitespace handling
+        columns = [" count() "]
+        expected = [
+            "upsampled_count() as count",
+        ]
+        assert transform_query_columns_for_error_upsampling(columns) == expected
+
+    def test_is_error_focused_query(self) -> None:
+        # Test explicit error type
+        self.request.GET = QueryDict("query=event.type:error")
+        assert _is_error_focused_query(self.request) is True
+
+        # Test explicit transaction type
+        self.request.GET = QueryDict("query=event.type:transaction")
+        assert _is_error_focused_query(self.request) is False
+
+        # Test empty query
+        self.request.GET = QueryDict("")
+        assert _is_error_focused_query(self.request) is False
+
+    def test_should_apply_sample_weight_transform(self) -> None:
+        # Test errors dataset
+        assert _should_apply_sample_weight_transform(errors, self.request) is True
+
+        # Test transactions dataset
+        assert _should_apply_sample_weight_transform(transactions, self.request) is False
+
+        self.request.GET = QueryDict("query=event.type:error")
+        assert _should_apply_sample_weight_transform(discover, self.request) is True
+
+        self.request.GET = QueryDict("query=event.type:transaction")
+        assert _should_apply_sample_weight_transform(discover, self.request) is False
diff --git a/tests/snuba/api/endpoints/test_organization_events_stats.py b/tests/snuba/api/endpoints/test_organization_events_stats.py