Skip to content

Commit d417cd1

Browse files
dogboatDefectDojo release bot
andauthored
Finding hash/dedupe changes (#10386)
* dedupe-help Move logic to set Finding hash code to its own method * dedupe-help rework set_hash_code method to accept dedupe_option * Update versions in application files * Revert "Update versions in application files" This reverts commit 7ee4bfa. * dedupe-help reorder method to make linter happy * dedupe-help Rework finding hash set/dedupe to attempt to load methods based on settings and fall back to existing implementations as defaults * dedupe-help add helper method to load custom methods and use it * dedupe-help bug in load custom helper method * dedupe-help Linter fix (import ordering) * dedupe-help Update default_importer to handle .values() call on findings set within close old findings method * dedupe-help extract get_(re)importer methods from engagement/test (re)import views into a separate method * dedupe-help extract reimport dedupe alg determination into its own method * dedupe-help refactor where custom methods for hashing/dedupe are called to minimize changes to existing calls * dedupe-help linter fixes --------- Co-authored-by: DefectDojo release bot <dojo-release-bot@users.noreply.github.com>
1 parent cccf8a7 commit d417cd1

File tree

6 files changed

+70
-13
lines changed

6 files changed

+70
-13
lines changed

dojo/engagement/views.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
TypedNoteForm,
6868
UploadThreatForm,
6969
)
70+
from dojo.importers.base_importer import BaseImporter
7071
from dojo.importers.default_importer import DefaultImporter
7172
from dojo.models import (
7273
Check_List,
@@ -921,6 +922,15 @@ def create_engagement(
921922
# Return the engagement
922923
return engagement
923924

925+
def get_importer(
926+
self,
927+
context: dict,
928+
) -> BaseImporter:
929+
"""
930+
Gets the importer to use
931+
"""
932+
return DefaultImporter(**context)
933+
924934
def import_findings(
925935
self,
926936
context: dict,
@@ -929,7 +939,7 @@ def import_findings(
929939
Attempt to import with all the supplied information
930940
"""
931941
try:
932-
importer_client = DefaultImporter(**context)
942+
importer_client = self.get_importer(context)
933943
context["test"], _, finding_count, closed_finding_count, _, _, _ = importer_client.process_scan(
934944
context.pop("scan", None),
935945
)

dojo/importers/default_importer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def process_scan(
108108
new_findings = self.determine_process_method(self.parsed_findings, **kwargs)
109109
# Close any old findings in the processed list if the the user specified for that
110110
# to occur in the form that is then passed to the kwargs
111-
closed_findings = self.close_old_findings(self.test.finding_set.values(), **kwargs)
111+
closed_findings = self.close_old_findings(self.test.finding_set.all(), **kwargs)
112112
# Update the timestamps of the test object by looking at the findings imported
113113
self.update_timestamps()
114114
# Update the test meta
@@ -247,11 +247,12 @@ def close_old_findings(
247247
logger.debug("REIMPORT_SCAN: Closing findings no longer present in scan report")
248248
# Close old active findings that are not reported by this scan.
249249
# Refactoring this to only call test.finding_set.values() once.
250+
findings = findings.values()
250251
mitigated_hash_codes = []
251252
new_hash_codes = []
252253
for finding in findings:
253254
new_hash_codes.append(finding["hash_code"])
254-
if getattr(finding, "is_mitigated", None):
255+
if finding.get("is_mitigated", None):
255256
mitigated_hash_codes.append(finding["hash_code"])
256257
for hash_code in new_hash_codes:
257258
if hash_code == finding["hash_code"]:

dojo/importers/default_reimporter.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ def process_scan(
147147
test_import_history,
148148
)
149149

150+
def determine_deduplication_algorithm(self) -> str:
151+
"""
152+
Determines what dedupe algorithm to use for the Test being processed.
153+
:return: A string representing the dedupe algorithm to use.
154+
"""
155+
return self.test.deduplication_algorithm
156+
150157
def process_findings(
151158
self,
152159
parsed_findings: List[Finding],
@@ -160,7 +167,7 @@ def process_findings(
160167
at import time
161168
"""
162169

163-
self.deduplication_algorithm = self.test.deduplication_algorithm
170+
self.deduplication_algorithm = self.determine_deduplication_algorithm()
164171
self.original_items = list(self.test.finding_set.all())
165172
self.new_items = []
166173
self.reactivated_items = []

dojo/models.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,14 +2640,7 @@ def save(self, dedupe_option=True, rules_option=True, product_grading_option=Tru
26402640
except Exception as ex:
26412641
logger.error("Can't compute cvssv3 score for finding id %i. Invalid cvssv3 vector found: '%s'. Exception: %s", self.id, self.cvssv3, ex)
26422642

2643-
# Finding.save is called once from serializers.py with dedupe_option=False because the finding is not ready yet, for example the endpoints are not built
2644-
# It is then called a second time with dedupe_option defaulted to true; now we can compute the hash_code and run the deduplication
2645-
if dedupe_option:
2646-
if (self.hash_code is not None):
2647-
deduplicationLogger.debug("Hash_code already computed for finding")
2648-
else:
2649-
self.hash_code = self.compute_hash_code()
2650-
deduplicationLogger.debug("Hash_code computed for finding: %s", self.hash_code)
2643+
self.set_hash_code(dedupe_option)
26512644

26522645
if self.pk is None:
26532646
# We enter here during the first call from serializers.py
@@ -3346,6 +3339,20 @@ def inherit_tags(self, potentially_existing_tags):
33463339
def violates_sla(self):
33473340
return (self.sla_expiration_date and self.sla_expiration_date < timezone.now().date())
33483341

3342+
def set_hash_code(self, dedupe_option):
3343+
from dojo.utils import get_custom_method
3344+
if hash_method := get_custom_method("FINDING_HASH_METHOD"):
3345+
hash_method(self, dedupe_option)
3346+
else:
3347+
# Finding.save is called once from serializers.py with dedupe_option=False because the finding is not ready yet, for example the endpoints are not built
3348+
# It is then called a second time with dedupe_option defaulted to true; now we can compute the hash_code and run the deduplication
3349+
if dedupe_option:
3350+
if self.hash_code is not None:
3351+
deduplicationLogger.debug("Hash_code already computed for finding")
3352+
else:
3353+
self.hash_code = self.compute_hash_code()
3354+
deduplicationLogger.debug("Hash_code computed for finding: %s", self.hash_code)
3355+
33493356

33503357
class FindingAdmin(admin.ModelAdmin):
33513358
# For efficiency with large databases, display many-to-many fields with raw

dojo/test/views.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
TestForm,
4242
TypedNoteForm,
4343
)
44+
from dojo.importers.base_importer import BaseImporter
4445
from dojo.importers.default_reimporter import DefaultReImporter
4546
from dojo.models import (
4647
IMPORT_UNTOUCHED_FINDING,
@@ -979,6 +980,15 @@ def process_jira_form(
979980
context["push_to_jira"] = push_all_jira_issues or (form and form.cleaned_data.get("push_to_jira"))
980981
return None
981982

983+
def get_reimporter(
984+
self,
985+
context: dict,
986+
) -> BaseImporter:
987+
"""
988+
Gets the reimporter to use
989+
"""
990+
return DefaultReImporter(**context)
991+
982992
def reimport_findings(
983993
self,
984994
context: dict,
@@ -987,7 +997,7 @@ def reimport_findings(
987997
Attempt to import with all the supplied information
988998
"""
989999
try:
990-
importer_client = DefaultReImporter(**context)
1000+
importer_client = self.get_reimporter(context)
9911001
(
9921002
context["test"],
9931003
finding_count,

dojo/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import binascii
22
import calendar as tcalendar
33
import hashlib
4+
import importlib
45
import logging
56
import mimetypes
67
import os
78
import re
89
from calendar import monthrange
910
from datetime import date, datetime, timedelta
1011
from math import pi, sqrt
12+
from typing import Callable, Optional
1113

1214
import bleach
1315
import crum
@@ -295,6 +297,9 @@ def do_dedupe_finding_task(new_finding, *args, **kwargs):
295297

296298

297299
def do_dedupe_finding(new_finding, *args, **kwargs):
300+
if dedupe_method := get_custom_method("FINDING_DEDUPE_METHOD"):
301+
return dedupe_method(new_finding, *args, **kwargs)
302+
298303
try:
299304
enabled = System_Settings.objects.get(no_cache=True).enable_deduplication
300305
except System_Settings.DoesNotExist:
@@ -2594,6 +2599,23 @@ def get_open_findings_burndown(product):
25942599
return past_90_days
25952600

25962601

2602+
def get_custom_method(setting_name: str) -> Optional[Callable]:
2603+
"""
2604+
Attempts to load and return the method specified by fully-qualified name at the given setting.
2605+
2606+
:param setting_name: The name of the setting that holds the fqname of the Python method we want to load
2607+
:return: The callable if it was able to be loaded, else None
2608+
"""
2609+
if fq_name := getattr(settings, setting_name, None):
2610+
try:
2611+
mn, _, fn = fq_name.rpartition(".")
2612+
m = importlib.import_module(mn)
2613+
return getattr(m, fn)
2614+
except ModuleNotFoundError:
2615+
pass
2616+
return None
2617+
2618+
25972619
def generate_file_response(file_object: FileUpload) -> FileResponse:
25982620
"""Serve an uploaded file in a uniformed way.
25992621

0 commit comments

Comments
 (0)