Skip to content

Commit 9c3536a

Browse files
Optimize queryset annotations & prefetches to cut DB time for test / finding / product views (issue #12575) (#12603)
* Optimize the DB queries for engagement, finding, product and product_type views. Mostly counters fixes * Apply suggestions from code review Fix typo in the "build_count_subquery" docstring Co-authored-by: valentijnscholten <valentijnscholten@gmail.com> * Add subquery for test_count for engagements getting --------- Co-authored-by: valentijnscholten <valentijnscholten@gmail.com>
1 parent e2d71aa commit 9c3536a

File tree

6 files changed

+195
-183
lines changed

6 files changed

+195
-183
lines changed

dojo/engagement/views.py

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import re
66
import time
77
from datetime import datetime
8-
from functools import reduce
8+
from functools import partial, reduce
99
from pathlib import Path
1010
from tempfile import NamedTemporaryFile
1111
from time import strftime
@@ -16,7 +16,8 @@
1616
from django.contrib.auth.models import User
1717
from django.core.exceptions import PermissionDenied, ValidationError
1818
from django.db import DEFAULT_DB_ALIAS
19-
from django.db.models import Count, Q
19+
from django.db.models import Count, OuterRef, Q, Value
20+
from django.db.models.functions import Coalesce
2021
from django.db.models.query import Prefetch, QuerySet
2122
from django.http import HttpRequest, HttpResponse, HttpResponseRedirect, QueryDict, StreamingHttpResponse
2223
from django.shortcuts import get_object_or_404, render
@@ -104,6 +105,7 @@
104105
add_error_message_to_response,
105106
add_success_message_to_response,
106107
async_delete,
108+
build_count_subquery,
107109
calculate_grade,
108110
generate_file_response_from_file_path,
109111
get_cal_event,
@@ -151,7 +153,6 @@ def engagement_calendar(request):
151153

152154

153155
def get_filtered_engagements(request, view):
154-
155156
if view not in {"all", "active"}:
156157
msg = f"View {view} is not allowed"
157158
raise ValidationError(msg)
@@ -161,37 +162,29 @@ def get_filtered_engagements(request, view):
161162
if view == "active":
162163
engagements = engagements.filter(active=True)
163164

164-
engagements = engagements.select_related("product", "product__prod_type") \
165+
engagements = (
166+
engagements
167+
.select_related("product", "product__prod_type")
165168
.prefetch_related("lead", "tags", "product__tags")
169+
)
166170

167171
if System_Settings.objects.get().enable_jira:
168172
engagements = engagements.prefetch_related(
169173
"jira_project__jira_instance",
170174
"product__jira_project_set__jira_instance",
171175
)
172176

177+
test_count_subquery = build_count_subquery(
178+
Test.objects.filter(engagement=OuterRef("pk")), group_field="engagement_id",
179+
)
180+
engagements = engagements.annotate(test_count=Coalesce(test_count_subquery, Value(0)))
181+
173182
filter_string_matching = get_system_setting("filter_string_matching", False)
174183
filter_class = EngagementDirectFilterWithoutObjectLookups if filter_string_matching else EngagementDirectFilter
175184
return filter_class(request.GET, queryset=engagements)
176185

177186

178-
def get_test_counts(engagements):
179-
# Get the test counts per engagement. As a separate query, this is much
180-
# faster than annotating the above `engagements` query.
181-
return {
182-
test["engagement"]: test["test_count"]
183-
for test in Test.objects.filter(
184-
engagement__in=engagements,
185-
).values(
186-
"engagement",
187-
).annotate(
188-
test_count=Count("engagement"),
189-
)
190-
}
191-
192-
193187
def engagements(request, view):
194-
195188
if not view:
196189
view = "active"
197190

@@ -209,7 +202,6 @@ def engagements(request, view):
209202
return render(
210203
request, "dojo/engagement.html", {
211204
"engagements": engs,
212-
"engagement_test_counts": get_test_counts(filtered_engagements.qs),
213205
"filter_form": filtered_engagements.form,
214206
"product_name_words": product_name_words,
215207
"engagement_name_words": engagement_name_words,
@@ -592,23 +584,27 @@ def post(self, request, eid, *args, **kwargs):
592584

593585

594586
def prefetch_for_view_tests(tests):
595-
prefetched = tests
596-
if isinstance(tests,
597-
QuerySet): # old code can arrive here with prods being a list because the query was already executed
598-
599-
prefetched = prefetched.select_related("lead")
600-
prefetched = prefetched.prefetch_related("tags", "test_type", "notes")
601-
prefetched = prefetched.annotate(count_findings_test_all=Count("finding__id", distinct=True))
602-
prefetched = prefetched.annotate(count_findings_test_active=Count("finding__id", filter=Q(finding__active=True), distinct=True))
603-
prefetched = prefetched.annotate(count_findings_test_active_verified=Count("finding__id", filter=Q(finding__active=True) & Q(finding__verified=True), distinct=True))
604-
prefetched = prefetched.annotate(count_findings_test_mitigated=Count("finding__id", filter=Q(finding__is_mitigated=True), distinct=True))
605-
prefetched = prefetched.annotate(count_findings_test_dups=Count("finding__id", filter=Q(finding__duplicate=True), distinct=True))
606-
prefetched = prefetched.annotate(total_reimport_count=Count("test_import__id", filter=Q(test_import__type=Test_Import.REIMPORT_TYPE), distinct=True))
607-
608-
else:
587+
# old code can arrive here with prods being a list because the query was already executed
588+
if not isinstance(tests, QuerySet):
609589
logger.warning("unable to prefetch because query was already executed")
610-
611-
return prefetched
590+
return tests
591+
592+
prefetched = tests.select_related("lead", "test_type").prefetch_related("tags", "notes")
593+
base_findings = Finding.objects.filter(test_id=OuterRef("pk"))
594+
count_subquery = partial(build_count_subquery, group_field="test_id")
595+
return prefetched.annotate(
596+
count_findings_test_all=Coalesce(count_subquery(base_findings), Value(0)),
597+
count_findings_test_active=Coalesce(count_subquery(base_findings.filter(active=True)), Value(0)),
598+
count_findings_test_active_verified=Coalesce(
599+
count_subquery(base_findings.filter(active=True, verified=True)), Value(0),
600+
),
601+
count_findings_test_mitigated=Coalesce(count_subquery(base_findings.filter(is_mitigated=True)), Value(0)),
602+
count_findings_test_dups=Coalesce(count_subquery(base_findings.filter(duplicate=True)), Value(0)),
603+
total_reimport_count=Coalesce(
604+
count_subquery(Test_Import.objects.filter(test_id=OuterRef("pk"), type=Test_Import.REIMPORT_TYPE)),
605+
Value(0),
606+
),
607+
)
612608

613609

614610
@user_is_authorized(Engagement, Permissions.Test_Add, "eid")
@@ -1583,14 +1579,18 @@ def get_engagements(request):
15831579
query = get_list_index(path_items, 1)
15841580

15851581
request.GET = QueryDict(query)
1586-
engagements = get_filtered_engagements(request, view).qs
1587-
test_counts = get_test_counts(engagements)
1588-
1589-
return engagements, test_counts
1582+
return get_filtered_engagements(request, view).qs
15901583

15911584

15921585
def get_excludes():
1593-
return ["is_ci_cd", "jira_issue", "jira_project", "objects", "unaccepted_open_findings"]
1586+
return [
1587+
"is_ci_cd",
1588+
"jira_issue",
1589+
"jira_project",
1590+
"objects",
1591+
"unaccepted_open_findings",
1592+
"test_count", # already exported separately as “tests”
1593+
]
15941594

15951595

15961596
def get_foreign_keys():
@@ -1600,7 +1600,7 @@ def get_foreign_keys():
16001600

16011601
def csv_export(request):
16021602
logger.debug("starting csv export")
1603-
engagements, test_counts = get_engagements(request)
1603+
engagements = get_engagements(request)
16041604

16051605
response = HttpResponse(content_type="text/csv")
16061606
response["Content-Disposition"] = "attachment; filename=engagements.csv"
@@ -1627,7 +1627,7 @@ def csv_export(request):
16271627
if value and isinstance(value, str):
16281628
value = value.replace("\n", " NEWLINE ").replace("\r", "")
16291629
fields.append(value)
1630-
fields.append(test_counts.get(engagement.id, 0))
1630+
fields.append(getattr(engagement, "test_count", 0))
16311631

16321632
writer.writerow(fields)
16331633
logger.debug("done with csv export")
@@ -1636,7 +1636,7 @@ def csv_export(request):
16361636

16371637
def excel_export(request):
16381638
logger.debug("starting excel export")
1639-
engagements, test_counts = get_engagements(request)
1639+
engagements = get_engagements(request)
16401640

16411641
workbook = Workbook()
16421642
workbook.iso_dates = True
@@ -1668,7 +1668,7 @@ def excel_export(request):
16681668
value = value.replace(tzinfo=None)
16691669
worksheet.cell(row=row_num, column=col_num, value=value)
16701670
col_num += 1
1671-
worksheet.cell(row=row_num, column=col_num, value=test_counts.get(engagement.id, 0))
1671+
worksheet.cell(row=row_num, column=col_num, value=getattr(engagement, "test_count", 0))
16721672
row_num += 1
16731673

16741674
with NamedTemporaryFile() as tmp:

dojo/finding/views.py

Lines changed: 48 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import mimetypes
88
from collections import OrderedDict, defaultdict
9+
from functools import partial
910
from itertools import chain
1011
from pathlib import Path
1112

@@ -14,8 +15,8 @@
1415
from django.core import serializers
1516
from django.core.exceptions import PermissionDenied, ValidationError
1617
from django.db import models
17-
from django.db.models import Count, Q, QuerySet
18-
from django.db.models.functions import Length
18+
from django.db.models import OuterRef, QuerySet, Value
19+
from django.db.models.functions import Coalesce, Length
1920
from django.db.models.query import Prefetch
2021
from django.http import Http404, HttpRequest, HttpResponse, HttpResponseRedirect, JsonResponse, StreamingHttpResponse
2122
from django.shortcuts import get_object_or_404, render
@@ -110,6 +111,7 @@
110111
add_field_errors_to_response,
111112
add_success_message_to_response,
112113
apply_cwe_to_template,
114+
build_count_subquery,
113115
calculate_grade,
114116
close_external_issue,
115117
do_false_positive_history,
@@ -132,87 +134,58 @@
132134

133135

134136
def prefetch_for_findings(findings, prefetch_type="all", *, exclude_untouched=True):
135-
prefetched_findings = findings
136-
if isinstance(
137-
findings, QuerySet,
138-
): # old code can arrive here with prods being a list because the query was already executed
139-
prefetched_findings = prefetched_findings.prefetch_related(
140-
"reviewers",
141-
)
142-
prefetched_findings = prefetched_findings.prefetch_related("reporter")
143-
prefetched_findings = prefetched_findings.prefetch_related(
144-
"jira_issue__jira_project__jira_instance",
145-
)
146-
prefetched_findings = prefetched_findings.prefetch_related("test__test_type")
147-
prefetched_findings = prefetched_findings.prefetch_related(
148-
"test__engagement__jira_project__jira_instance",
149-
)
150-
prefetched_findings = prefetched_findings.prefetch_related(
151-
"test__engagement__product__jira_project_set__jira_instance",
152-
)
153-
prefetched_findings = prefetched_findings.prefetch_related("found_by")
137+
# old code can arrive here with prods being a list because the query was already executed
138+
if not isinstance(findings, QuerySet):
139+
logger.debug("unable to prefetch because query was already executed")
140+
return findings
154141

155-
# for open/active findings the following 4 prefetches are not needed
156-
if prefetch_type != "open":
157-
prefetched_findings = prefetched_findings.prefetch_related(
158-
"risk_acceptance_set",
159-
)
160-
prefetched_findings = prefetched_findings.prefetch_related(
161-
"risk_acceptance_set__accepted_findings",
162-
)
163-
prefetched_findings = prefetched_findings.prefetch_related(
164-
"original_finding",
165-
)
166-
prefetched_findings = prefetched_findings.prefetch_related(
167-
"duplicate_finding",
168-
)
142+
prefetched_findings = findings.prefetch_related(
143+
"reviewers",
144+
"reporter",
145+
"jira_issue__jira_project__jira_instance",
146+
"test__test_type",
147+
"test__engagement__jira_project__jira_instance",
148+
"test__engagement__product__jira_project_set__jira_instance",
149+
"found_by",
150+
)
169151

170-
if exclude_untouched:
171-
# filter out noop reimport actions from finding status history
172-
prefetched_findings = prefetched_findings.prefetch_related(
173-
Prefetch(
174-
"test_import_finding_action_set",
175-
queryset=Test_Import_Finding_Action.objects.exclude(
176-
action=IMPORT_UNTOUCHED_FINDING,
177-
),
178-
),
179-
)
180-
else:
181-
prefetched_findings = prefetched_findings.prefetch_related(
182-
"test_import_finding_action_set",
183-
)
184-
"""
185-
we could try to prefetch only the latest note with SubQuery and OuterRef,
186-
but I'm getting that MySql doesn't support limits in subqueries.
187-
"""
188-
prefetched_findings = prefetched_findings.prefetch_related("notes")
189-
prefetched_findings = prefetched_findings.prefetch_related("tags")
190-
prefetched_findings = prefetched_findings.prefetch_related("endpoints")
191-
prefetched_findings = prefetched_findings.prefetch_related("status_finding")
192-
prefetched_findings = prefetched_findings.annotate(
193-
active_endpoint_count=Count(
194-
"status_finding__id", filter=Q(status_finding__mitigated=False),
195-
),
196-
)
197-
prefetched_findings = prefetched_findings.annotate(
198-
mitigated_endpoint_count=Count(
199-
"status_finding__id", filter=Q(status_finding__mitigated=True),
200-
),
201-
)
202-
prefetched_findings = prefetched_findings.prefetch_related("finding_group_set")
152+
# for open/active findings, the following 4 prefetches are not needed
153+
if prefetch_type != "open":
203154
prefetched_findings = prefetched_findings.prefetch_related(
204-
"test__engagement__product__members",
205-
)
206-
prefetched_findings = prefetched_findings.prefetch_related(
207-
"test__engagement__product__prod_type__members",
155+
"risk_acceptance_set",
156+
"risk_acceptance_set__accepted_findings",
157+
"original_finding",
158+
"duplicate_finding",
208159
)
160+
161+
if exclude_untouched:
162+
# filter out noop reimport actions from finding status history
209163
prefetched_findings = prefetched_findings.prefetch_related(
210-
"vulnerability_id_set",
164+
Prefetch(
165+
"test_import_finding_action_set",
166+
queryset=Test_Import_Finding_Action.objects.exclude(action=IMPORT_UNTOUCHED_FINDING),
167+
),
211168
)
212169
else:
213-
logger.debug("unable to prefetch because query was already executed")
170+
prefetched_findings = prefetched_findings.prefetch_related("test_import_finding_action_set")
171+
172+
prefetched_findings = prefetched_findings.prefetch_related(
173+
"notes",
174+
"tags",
175+
"endpoints",
176+
"status_finding",
177+
"finding_group_set",
178+
"test__engagement__product__members",
179+
"test__engagement__product__prod_type__members",
180+
"vulnerability_id_set",
181+
)
214182

215-
return prefetched_findings
183+
base_status = Endpoint_Status.objects.filter(finding_id=OuterRef("pk"))
184+
count_subquery = partial(build_count_subquery, group_field="finding_id")
185+
return prefetched_findings.annotate(
186+
active_endpoint_count=Coalesce(count_subquery(base_status.filter(mitigated=False)), Value(0)),
187+
mitigated_endpoint_count=Coalesce(count_subquery(base_status.filter(mitigated=True)), Value(0)),
188+
)
216189

217190

218191
def prefetch_for_similar_findings(findings):

0 commit comments

Comments
 (0)