feat(explore) Add equation support to stats queries (#93252)

wmak · web-flow · commit 7f656e6009f4 · 2025-06-19T10:30:48.000-04:00
- Follow up to #92354, add equation support to the timeseries endpoints
diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py
@@ -63,9 +63,12 @@ def get_query_columns(columns, rollup):
 
 
 def resolve_axis_column(
-    column: str, index: int = 0, transform_alias_to_input_format: bool = False
+    column: str,
+    index: int = 0,
+    transform_alias_to_input_format: bool = False,
+    use_rpc: bool = False,
 ) -> str:
-    if is_equation(column):
+    if is_equation(column) and not use_rpc:
         return f"equation[{index}]"
 
     # Function columns on input have names like `"p95(duration)"`. By default, we convert them to their aliases like `"p95_duration"`. Here, we want to preserve the original name, so we return the column as-is
@@ -87,10 +90,14 @@ def has_feature(self, organization: Organization, request: Request) -> bool:
             )
         )
 
-    def get_equation_list(self, organization: Organization, request: Request) -> list[str]:
+    def get_equation_list(
+        self, organization: Organization, request: Request, param_name: str = "field"
+    ) -> list[str]:
         """equations have a prefix so that they can be easily included alongside our existing fields"""
         return [
-            strip_equation(field) for field in request.GET.getlist("field")[:] if is_equation(field)
+            strip_equation(field)
+            for field in request.GET.getlist(param_name)[:]
+            if is_equation(field)
         ]
 
     def get_field_list(
@@ -546,14 +553,15 @@ def get_event_stats_data(
                             zerofill_results=zerofill_results,
                             dataset=dataset,
                             transform_alias_to_input_format=transform_alias_to_input_format,
+                            use_rpc=use_rpc,
                         )
                         if request.query_params.get("useOnDemandMetrics") == "true":
                             results[key]["isMetricsExtractedData"] = self._query_if_extracted_data(
                                 results, key, query_columns
                             )
                     else:
                         column = resolve_axis_column(
-                            query_columns[0], 0, transform_alias_to_input_format
+                            query_columns[0], 0, transform_alias_to_input_format, use_rpc
                         )
                         results[key] = serializer.serialize(
                             event_result,
@@ -586,14 +594,17 @@ def get_event_stats_data(
                     zerofill_results=zerofill_results,
                     dataset=dataset,
                     transform_alias_to_input_format=transform_alias_to_input_format,
+                    use_rpc=use_rpc,
                 )
                 if top_events > 0 and isinstance(result, SnubaTSResult):
                     serialized_result = {"": serialized_result}
             else:
                 extra_columns = None
                 if comparison_delta:
                     extra_columns = ["comparisonCount"]
-                column = resolve_axis_column(query_columns[0], 0, transform_alias_to_input_format)
+                column = resolve_axis_column(
+                    query_columns[0], 0, transform_alias_to_input_format, use_rpc
+                )
                 serialized_result = serializer.serialize(
                     result,
                     column=column,
@@ -643,6 +654,7 @@ def serialize_multiple_axis(
         zerofill_results: bool = True,
         dataset: Any | None = None,
         transform_alias_to_input_format: bool = False,
+        use_rpc: bool = False,
     ) -> dict[str, Any]:
         # Return with requested yAxis as the key
         result = {}
@@ -658,7 +670,9 @@ def serialize_multiple_axis(
         for index, query_column in enumerate(query_columns):
             result[columns[index]] = serializer.serialize(
                 event_result,
-                resolve_axis_column(query_column, equations, transform_alias_to_input_format),
+                resolve_axis_column(
+                    query_column, equations, transform_alias_to_input_format, use_rpc
+                ),
                 order=index,
                 allow_partial_buckets=allow_partial_buckets,
                 zerofill_results=zerofill_results,
diff --git a/src/sentry/api/endpoints/organization_events_stats.py b/src/sentry/api/endpoints/organization_events_stats.py
@@ -226,6 +226,7 @@ def _get_event_stats(
                             use_aggregate_conditions=True,
                         ),
                         sampling_mode=snuba_params.sampling_mode,
+                        equations=self.get_equation_list(organization, request),
                     )
                 return scoped_dataset.top_events_timeseries(
                     timeseries_columns=query_columns,
diff --git a/src/sentry/api/endpoints/organization_events_timeseries.py b/src/sentry/api/endpoints/organization_events_timeseries.py
@@ -248,6 +248,7 @@ def get_event_stats(
                         use_aggregate_conditions=True,
                     ),
                     sampling_mode=snuba_params.sampling_mode,
+                    equations=self.get_equation_list(organization, request, param_name="groupBy"),
                 )
             return dataset.top_events_timeseries(
                 timeseries_columns=query_columns,
diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py
@@ -28,6 +28,7 @@
 )
 
 from sentry.api.event_search import SearchFilter, SearchKey, SearchValue
+from sentry.discover import arithmetic
 from sentry.exceptions import InvalidSearchQuery
 from sentry.search.eap.columns import (
     ResolvedAggregate,
@@ -104,20 +105,22 @@ def categorize_column(
 
 
 def categorize_aggregate(
-    column: ResolvedAggregate | ResolvedConditionalAggregate | ResolvedFormula,
+    column: ResolvedAggregate | ResolvedConditionalAggregate | ResolvedFormula | ResolvedEquation,
 ) -> Expression:
-    if isinstance(column, ResolvedFormula):
+    if isinstance(column, (ResolvedFormula, ResolvedEquation)):
         # TODO: Remove when https://github.com/getsentry/eap-planning/issues/206 is merged, since we can use formulas in both APIs at that point
         return Expression(
             formula=transform_binary_formula_to_expression(column.proto_definition),
             label=column.public_alias,
         )
-    if isinstance(column, ResolvedAggregate):
+    elif isinstance(column, ResolvedAggregate):
         return Expression(aggregation=column.proto_definition, label=column.public_alias)
-    if isinstance(column, ResolvedConditionalAggregate):
+    elif isinstance(column, ResolvedConditionalAggregate):
         return Expression(
             conditional_aggregation=column.proto_definition, label=column.public_alias
         )
+    else:
+        raise Exception(f"Unknown column type {type(column)}")
 
 
 def update_timestamps(
@@ -189,13 +192,15 @@ def get_timeseries_query(
     extra_conditions: TraceItemFilter | None = None,
 ) -> tuple[
     TimeSeriesRequest,
-    list[ResolvedFormula | ResolvedAggregate | ResolvedConditionalAggregate],
+    list[ResolvedFormula | ResolvedAggregate | ResolvedConditionalAggregate | ResolvedEquation],
     list[ResolvedAttribute],
 ]:
     timeseries_filter, params = update_timestamps(params, search_resolver)
     meta = search_resolver.resolve_meta(referrer=referrer, sampling_mode=sampling_mode)
     query, _, query_contexts = search_resolver.resolve_query(query_string)
-    (functions, _) = search_resolver.resolve_functions(y_axes)
+    selected_equations, selected_axes = arithmetic.categorize_columns(y_axes)
+    (functions, _) = search_resolver.resolve_functions(selected_axes)
+    equations, _ = search_resolver.resolve_equations(selected_equations)
     groupbys, groupby_contexts = search_resolver.resolve_attributes(groupby)
 
     # Virtual context columns (VCCs) are currently only supported in TraceItemTable.
@@ -223,15 +228,17 @@ def get_timeseries_query(
         TimeSeriesRequest(
             meta=meta,
             filter=query,
-            expressions=[categorize_aggregate(fn) for fn in functions if fn.is_aggregate],
+            expressions=[
+                categorize_aggregate(fn) for fn in (functions + equations) if fn.is_aggregate
+            ],
             group_by=[
                 groupby.proto_definition
                 for groupby in groupbys
                 if isinstance(groupby.proto_definition, AttributeKey)
             ],
             granularity_secs=params.timeseries_granularity_secs,
         ),
-        functions,
+        (functions + equations),
         groupbys,
     )
 
@@ -489,6 +496,7 @@ def run_top_events_timeseries_query(
     referrer: str,
     config: SearchResolverConfig,
     sampling_mode: SAMPLING_MODES | None,
+    equations: list[str] | None = None,
 ) -> Any:
     """We intentionally duplicate run_timeseries_query code here to reduce the complexity of needing multiple helper
     functions that both would call
@@ -508,17 +516,18 @@ def run_top_events_timeseries_query(
     table_search_resolver = get_resolver(table_query_params, config)
 
     # Make a table query first to get what we need to filter by
+    _, non_equation_axes = arithmetic.categorize_columns(y_axes)
     top_events = run_table_query(
         TableQuery(
             query_string=query_string,
-            selected_columns=raw_groupby + y_axes,
+            selected_columns=raw_groupby + non_equation_axes,
             orderby=orderby,
             offset=0,
             limit=limit,
             referrer=referrer,
             sampling_mode=sampling_mode,
             resolver=table_search_resolver,
-            equations=[],
+            equations=equations,
         )
     )
     if len(top_events["data"]) == 0:
diff --git a/src/sentry/snuba/spans_rpc.py b/src/sentry/snuba/spans_rpc.py
@@ -159,6 +159,7 @@ def run_top_events_timeseries_query(
     referrer: str,
     config: SearchResolverConfig,
     sampling_mode: SAMPLING_MODES | None,
+    equations: list[str] | None = None,
 ) -> Any:
     return rpc_dataset_common.run_top_events_timeseries_query(
         get_resolver=get_resolver,
@@ -171,6 +172,7 @@ def run_top_events_timeseries_query(
         referrer=referrer,
         config=config,
         sampling_mode=sampling_mode,
+        equations=equations,
     )
 
 
diff --git a/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py b/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py
diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py

Original file line number	Diff line number	Diff line change
`@@ -226,6 +226,7 @@ def _get_event_stats(`
`226`	`226`	`use_aggregate_conditions=True,`
`227`	`227`	`),`
`228`	`228`	`sampling_mode=snuba_params.sampling_mode,`
	`229`	`+ equations=self.get_equation_list(organization, request),`
`229`	`230`	`)`
`230`	`231`	`return scoped_dataset.top_events_timeseries(`
`231`	`232`	`timeseries_columns=query_columns,`
Original file line number	Diff line number	Diff line change
`@@ -248,6 +248,7 @@ def get_event_stats(`
`248`	`248`	`use_aggregate_conditions=True,`
`249`	`249`	`),`
`250`	`250`	`sampling_mode=snuba_params.sampling_mode,`
	`251`	`+ equations=self.get_equation_list(organization, request, param_name="groupBy"),`
`251`	`252`	`)`
`252`	`253`	`return dataset.top_events_timeseries(`
`253`	`254`	`timeseries_columns=query_columns,`