Skip to content

Commit b2458bf

Browse files
thetruecpaulCharles Paul
andauthored
fix(teamStats): Fix timeouts on Unresolved Issues graph (#95715)
The Unresolved Issues graph is currently hanging, then eventually erroring out. Investigating, it seems like it's the DB query timing out. Signs point to that coming from the large number of subqueries involved in the `GroupHistory` query. Those subqueries exist because `GroupHistory` may contain multiple rows with the same `status` for any given `group_id` (or with different `status` in our same "state" category), so we need to deduplicate to only rows that change between an open or closed state. The current solution (subqueries) is inefficient on large # of group_ids, which can lead to timeouts. Another option would be to use `DISTINCT ON` queries to ensure we only get one row per each `(group_id, open/closed state)` pair. Unfortunately Django does not currently support `annotate()` and `distinct(fields)` together. Instead, what we do here is pull additional data and do the deduplication logic in Python. Test plan: loads without error in my sandbox. --------- Co-authored-by: Charles Paul <charlespaul@R4KCFH7MHY.local>
1 parent e984842 commit b2458bf

File tree

2 files changed

+54
-28
lines changed

2 files changed

+54
-28
lines changed

src/sentry/api/endpoints/team_all_unresolved_issues.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import copy
22
import datetime
3+
from collections import defaultdict
34
from datetime import timedelta
45
from itertools import chain
56

6-
from django.db.models import Count, OuterRef, Q, QuerySet, Subquery
7-
from django.db.models.functions import Coalesce, TruncDay
7+
from django.db.models import Case, Count, Q, QuerySet, Value, When
8+
from django.db.models.functions import TruncDay
89
from rest_framework.request import Request
910
from rest_framework.response import Response
1011

@@ -53,22 +54,6 @@ def calculate_unresolved_counts(
5354
group_history_environment_filter = (
5455
Q(group__groupenvironment__environment_id=environment_id) if environment_id else Q()
5556
)
56-
prev_status_sub_qs = Coalesce(
57-
Subquery(
58-
GroupHistory.objects.filter(
59-
group_id=OuterRef("group_id"),
60-
date_added__lt=OuterRef("date_added"),
61-
status__in=OPEN_STATUSES + CLOSED_STATUSES,
62-
)
63-
.order_by("-id")
64-
.values("status")[:1]
65-
),
66-
-1,
67-
)
68-
dedupe_status_filter = Q(
69-
(~Q(prev_status__in=OPEN_STATUSES) & Q(status__in=OPEN_STATUSES))
70-
| (~Q(prev_status__in=CLOSED_STATUSES) & Q(status__in=CLOSED_STATUSES))
71-
)
7257

7358
# Grab the historical data bucketed by day
7459
new_issues = (
@@ -80,19 +65,62 @@ def calculate_unresolved_counts(
8065
.annotate(open=Count("id"))
8166
)
8267

68+
# Pull extra data to do deduplication in Python. (Inefficient to do in SQL via subqueries
69+
# (see ISWF-549); cannot do via DISTINCT ON state because of Django limitations.)
8370
bucketed_issues = (
8471
GroupHistory.objects.filter_to_team(team)
85-
.filter(group_history_environment_filter, date_added__gte=start, date_added__lte=end)
86-
.annotate(bucket=TruncDay("date_added"), prev_status=prev_status_sub_qs)
87-
.filter(dedupe_status_filter)
88-
.order_by("bucket")
89-
.values("project", "bucket")
72+
.filter(
73+
group_history_environment_filter,
74+
date_added__gte=start,
75+
date_added__lte=end,
76+
status__in=OPEN_STATUSES + CLOSED_STATUSES,
77+
)
9078
.annotate(
91-
open=Count("id", filter=Q(status__in=OPEN_STATUSES)),
92-
closed=Count("id", filter=Q(status__in=CLOSED_STATUSES)),
79+
bucket=TruncDay("date_added"),
80+
state=Case(
81+
When(status__in=OPEN_STATUSES, then=Value("open")),
82+
When(status__in=CLOSED_STATUSES, then=Value("closed")),
83+
default=Value("other"),
84+
),
85+
)
86+
.order_by(
87+
"group_id",
88+
"bucket",
89+
"id",
9390
)
91+
.values("project", "group_id", "bucket", "state")[:200_000]
9492
)
9593

94+
most_recent_group_state: defaultdict[str, str] = defaultdict(lambda: "other")
95+
# Project => Bucket => State => Count
96+
deduping_map: defaultdict[str, defaultdict[str, defaultdict[str, int]]] = defaultdict(
97+
lambda: defaultdict(lambda: defaultdict(int))
98+
)
99+
for r in bucketed_issues:
100+
# Don't process the row if it doesn't set the group to open or closed.
101+
if r["state"] == "other":
102+
continue
103+
104+
# Don't process the row if it doesn't change the state.
105+
if r["state"] == most_recent_group_state[r["group_id"]]:
106+
continue
107+
108+
deduping_map[r["project"]][r["bucket"]][r["state"]] += 1
109+
most_recent_group_state[r["group_id"]] = r["state"]
110+
111+
deduped_bucketed_issues = []
112+
for p in deduping_map.keys():
113+
bucket_counts = deduping_map[p]
114+
for b in bucket_counts.keys():
115+
deduped_bucketed_issues.append(
116+
{
117+
"project": p,
118+
"bucket": b,
119+
"open": bucket_counts[b]["open"],
120+
"closed": bucket_counts[b]["closed"],
121+
}
122+
)
123+
96124
current_day, date_series_dict = start, {}
97125
while current_day < end:
98126
date_series_dict[current_day.isoformat()] = {"open": 0, "closed": 0}
@@ -101,7 +129,7 @@ def calculate_unresolved_counts(
101129
agg_project_precounts = {
102130
project.id: copy.deepcopy(date_series_dict) for project in project_list
103131
}
104-
for r in chain(bucketed_issues, new_issues):
132+
for r in chain(deduped_bucketed_issues, new_issues):
105133
bucket = agg_project_precounts[r["project"]][r["bucket"].isoformat()]
106134
bucket["open"] += r.get("open", 0)
107135
bucket["closed"] += r.get("closed", 0)

tests/sentry/api/endpoints/test_team_all_unresolved_issues.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from datetime import datetime, timedelta, timezone
22

3-
import pytest
43
from django.utils.timezone import now
54

65
from sentry.models.group import GroupStatus
@@ -15,7 +14,6 @@
1514
class TeamIssueBreakdownTest(APITestCase):
1615
endpoint = "sentry-api-0-team-all-unresolved-issues"
1716

18-
@pytest.mark.xfail(reason="flakey")
1917
def test_status_format(self):
2018
project1 = self.create_project(teams=[self.team])
2119
group1_1 = self.create_group(project=project1, first_seen=before_now(days=40))

0 commit comments

Comments
 (0)