Skip to content

Commit 4e3c6f4

Browse files
ms defender: do not cache parsed findings (#12493)
* ms defender: do not cache parsed findings * update other parsers class variables
1 parent 05dc721 commit 4e3c6f4

File tree

5 files changed

+61
-35
lines changed

5 files changed

+61
-35
lines changed

docs/content/en/open_source/contributing/how-to-write-a-parser.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ $ docker compose build --build-arg uid=1000
3737
|`unittests/scans/<parser_dir>/{many_vulns,no_vuln,one_vuln}.json` | Sample files containing meaningful data for unit tests. The minimal set.
3838
|`unittests/tools/test_<parser_name>_parser.py` | Unit tests of the parser.
3939
|`dojo/settings/settings.dist.py` | If you want to use a modern hashcode based deduplication algorithm
40-
|`docs/content/en/connecting_your_tools/parsers/<file/api>/<parser_file>.md` | Documentation, what kind of file format is required and how it should be obtained
41-
40+
|`docs/content/en/connecting_your_tools/parsers/<file/api>/<parser_file>.md` | Documentation, what kind of file format is required and how it should be obtained
41+
4242

4343
## Factory contract
4444

@@ -57,6 +57,7 @@ Parsers are loaded dynamicaly with a factory pattern. To have your parser loaded
5757
3. `def get_description_for_scan_types(self, scan_type):` This function return a string used to provide some text in the UI (long description)
5858
4. `def get_findings(self, file, test)` This function return a list of findings
5959
6. If your parser have more than 1 scan_type (for detailled mode) you **MUST** implement `def set_mode(self, mode)` method
60+
7. The parser instance is re-used over all imports performed for this scan_type, so do not store any data at class level
6061

6162
Example:
6263

@@ -145,7 +146,7 @@ Very bad example:
145146
Various file formats are handled through libraries. In order to keep DefectDojo slim and also don't extend the attack surface, keep the number of libraries used minimal and take other parsers as an example.
146147

147148
#### defusedXML in favour of lxml
148-
As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library.
149+
As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library.
149150

150151
### Not all attributes are mandatory
151152

@@ -366,4 +367,3 @@ Please add a new .md file in [`docs/content/en/connecting_your_tools/parsers`] w
366367
* A link to the scanner itself - (e.g. GitHub or vendor link)
367368

368369
Here is an example of a completed Parser documentation page: [https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md](https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md)
369-

dojo/tools/fortify/fpr_parser.py

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
logger = logging.getLogger(__name__)
1212

1313

14-
class FortifyFPRParser:
14+
class FortifyRelatedData:
1515
def __init__(self):
1616
self.descriptions: dict[str, DescriptionData] = {}
1717
self.snippets: dict[str, SnippetData] = {}
@@ -20,6 +20,11 @@ def __init__(self):
2020
self.suppressed: dict[str, bool] = {}
2121
self.threaded_comments: dict[str, list[str]] = {}
2222

23+
24+
class FortifyFPRParser:
25+
def __init__(self):
26+
pass
27+
2328
def parse_fpr(self, filename, test):
2429
if str(filename.__class__) == "<class '_io.TextIOWrapper'>":
2530
input_zip = zipfile.ZipFile(filename.name, "r")
@@ -60,41 +65,44 @@ def identify_namespace(self, root: Element) -> dict[str, str]:
6065

6166
def parse_related_data(self, root: Element, test: Test) -> None:
6267
"""Parse the XML and generate a list of findings."""
68+
related_data = FortifyRelatedData()
6369
for description in root.findall("Description", self.namespaces):
6470
class_id = description.attrib.get("classID")
6571
logger.debug(f"Description: {class_id}")
6672
if class_id:
67-
self.descriptions[class_id] = self.parse_description_information(description)
73+
related_data.descriptions[class_id] = self.parse_description_information(description)
6874

6975
for snippet in root.find("Snippets", self.namespaces):
7076
snippet_id = snippet.attrib.get("id")
7177
logger.debug(f"Snippet: {snippet_id}")
7278
if snippet_id:
73-
self.snippets[snippet_id] = self.parse_snippet_information(snippet)
79+
related_data.snippets[snippet_id] = self.parse_snippet_information(snippet)
7480

7581
for rule in root.find("EngineData", self.namespaces).find("RuleInfo", self.namespaces):
7682
rule_id = rule.attrib.get("id")
7783
logger.debug(f"Rule: {rule_id}")
7884
if rule_id:
79-
self.rules[rule_id] = self.parse_rule_information(rule.find("MetaInfo", self.namespaces))
85+
related_data.rules[rule_id] = self.parse_rule_information(rule.find("MetaInfo", self.namespaces))
86+
return related_data
8087

81-
def parse_audit_log(self, audit_log: Element) -> None:
88+
def add_audit_log(self, related_data, audit_log: Element) -> None:
8289
logger.debug("Parse audit log")
8390
if audit_log is None:
84-
return
91+
return related_data
8592

8693
for issue in audit_log.find("IssueList", self.namespaces_audit_log).findall("Issue", self.namespaces_audit_log):
8794
instance_id = issue.attrib.get("instanceId")
8895
if instance_id:
8996
suppressed_string = issue.attrib.get("suppressed")
9097
suppressed = suppressed_string.lower() == "true" if suppressed_string else False
9198
logger.debug(f"Issue: {instance_id} - Suppressed: {suppressed}")
92-
self.suppressed[instance_id] = suppressed
99+
related_data.suppressed[instance_id] = suppressed
93100

94101
threaded_comments = issue.find("ThreadedComments", self.namespaces_audit_log)
95102
logger.debug(f"ThreadedComments: {threaded_comments}")
96103
if threaded_comments is not None:
97-
self.threaded_comments[instance_id] = [self.get_comment_text(comment) for comment in threaded_comments.findall("Comment", self.namespaces_audit_log)]
104+
related_data.threaded_comments[instance_id] = [self.get_comment_text(comment) for comment in threaded_comments.findall("Comment", self.namespaces_audit_log)]
105+
return related_data
98106

99107
def get_comment_text(self, comment: Element) -> str:
100108
content = comment.findtext("Content", "", self.namespaces_audit_log)
@@ -107,8 +115,9 @@ def convert_vulnerabilities_to_findings(self, root: Element, audit_log: Element,
107115
"""Convert the list of vulnerabilities to a list of findings."""
108116
"""Try to mimic the logic from the xml parser"""
109117
"""Future Improvement: share code between xml and fpr parser (it was split up earlier)"""
110-
self.parse_related_data(root, test)
111-
self.parse_audit_log(audit_log)
118+
related_data = self.parse_related_data(root, test)
119+
# add audit log information to related data
120+
related_data = self.add_audit_log(related_data, audit_log)
112121

113122
findings = []
114123
for vuln in root.find("Vulnerabilities", self.namespaces):
@@ -117,18 +126,18 @@ def convert_vulnerabilities_to_findings(self, root: Element, audit_log: Element,
117126
self.parse_class_information(vuln, vuln_data)
118127
self.parse_analysis_information(vuln, vuln_data)
119128

120-
snippet = self.snippets.get(vuln_data.snippet_id)
121-
description = self.descriptions.get(vuln_data.class_id)
122-
rule = self.rules.get(vuln_data.class_id)
129+
snippet = related_data.snippets.get(vuln_data.snippet_id)
130+
description = related_data.descriptions.get(vuln_data.class_id)
131+
rule = related_data.rules.get(vuln_data.class_id)
123132

124133
finding = Finding(test=test, static_finding=True)
125134

126-
finding.active, finding.false_p = self.compute_status(vuln_data)
135+
finding.active, finding.false_p = self.compute_status(related_data, vuln_data)
127136
finding.title = self.format_title(vuln_data, snippet, description, rule)
128137
finding.description = self.format_description(vuln_data, snippet, description, rule)
129138
finding.mitigation = self.format_mitigation(vuln_data, snippet, description, rule)
130139
finding.severity = self.compute_severity(vuln_data, snippet, description, rule)
131-
finding.impact = self.format_impact(vuln_data)
140+
finding.impact = self.format_impact(related_data, vuln_data)
132141

133142
finding.file_path = vuln_data.source_location_path
134143
finding.line = int(self.compute_line(vuln_data, snippet, description, rule))
@@ -302,22 +311,22 @@ def compute_severity(self, vulnerability, snippet, description, rule) -> str:
302311

303312
return "Informational"
304313

305-
def format_impact(self, vuln_data) -> str:
314+
def format_impact(self, related_data, vuln_data) -> str:
306315
"""Format the impact of the vulnerability based on the threaded comments."""
307-
logger.debug(f"Threaded comments: {self.threaded_comments}")
308-
threaded_comments = self.threaded_comments.get(vuln_data.instance_id)
316+
logger.debug(f"Threaded comments: {related_data.threaded_comments}")
317+
threaded_comments = related_data.threaded_comments.get(vuln_data.instance_id)
309318
if not threaded_comments:
310319
return ""
311320

312321
impact = "Threaded Comments:\n"
313-
for comment in self.threaded_comments[vuln_data.instance_id]:
322+
for comment in related_data.threaded_comments[vuln_data.instance_id]:
314323
impact += f"{comment}\n"
315324

316325
return impact
317326

318-
def compute_status(self, vulnerability) -> tuple[bool, bool]:
327+
def compute_status(self, related_data, vulnerability) -> tuple[bool, bool]:
319328
"""Compute the status of the vulnerability based on the instance ID. Return active, false_p"""
320-
if vulnerability.instance_id in self.suppressed:
329+
if vulnerability.instance_id in related_data.suppressed:
321330
return False, True
322331
return True, False
323332

dojo/tools/ms_defender/parser.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ class MSDefenderParser:
1111

1212
"""Import from MSDefender findings"""
1313

14-
def __init__(self):
15-
self.findings = []
16-
1714
def get_scan_types(self):
1815
return ["MSDefender Parser"]
1916

@@ -24,11 +21,12 @@ def get_description_for_scan_types(self, scan_type):
2421
return ("MSDefender findings can be retrieved using the REST API")
2522

2623
def get_findings(self, file, test):
24+
findings = []
2725
if str(file.name).endswith(".json"):
2826
vulnerabilityfile = json.load(file)
2927
vulnerabilitydata = vulnerabilityfile["value"]
3028
for vulnerability in vulnerabilitydata:
31-
self.process_json(vulnerability)
29+
findings.append(self.process_json(vulnerability))
3230
elif str(file.name).endswith(".zip"):
3331
if str(file.__class__) == "<class '_io.TextIOWrapper'>":
3432
input_zip = zipfile.ZipFile(file.name, "r")
@@ -51,27 +49,29 @@ def get_findings(self, file, test):
5149
vulnerabilities = []
5250
machines = {}
5351
for vulnerabilityfile in vulnerabilityfiles:
52+
logger.debug("Loading vulnerabilitiy file: %s", vulnerabilityfile)
5453
output = json.loads(zipdata[vulnerabilityfile].decode("ascii"))["value"]
5554
for data in output:
5655
vulnerabilities.append(data)
5756
for machinefile in machinefiles:
57+
logger.debug("Loading machine file: %s", vulnerabilityfile)
5858
output = json.loads(zipdata[machinefile].decode("ascii"))["value"]
5959
for data in output:
6060
machines[data.get("id")] = data
6161
for vulnerability in vulnerabilities:
6262
try:
6363
machine = machines.get(vulnerability["machineId"], None)
6464
if machine is not None:
65-
self.process_zip(vulnerability, machine)
65+
findings.append(self.process_json_with_machine_info(vulnerability, machine))
6666
else:
6767
logger.debug("fallback to process without machine: no machine id")
68-
self.process_json(vulnerability)
68+
findings.append(self.process_json(vulnerability))
6969
except (IndexError, KeyError):
7070
logger.exception("fallback to process without machine: exception")
7171
self.process_json(vulnerability)
7272
else:
7373
return []
74-
return self.findings
74+
return findings
7575

7676
def process_json(self, vulnerability):
7777
description = ""
@@ -95,10 +95,10 @@ def process_json(self, vulnerability):
9595
if vulnerability["cveId"] is not None:
9696
finding.unsaved_vulnerability_ids = []
9797
finding.unsaved_vulnerability_ids.append(vulnerability["cveId"])
98-
self.findings.append(finding)
9998
finding.unsaved_endpoints = []
99+
return finding
100100

101-
def process_zip(self, vulnerability, machine):
101+
def process_json_with_machine_info(self, vulnerability, machine):
102102
description = ""
103103
description += "cveId: " + str(vulnerability.get("cveId", "")) + "\n"
104104
description += "machineId: " + str(vulnerability.get("machineId", "")) + "\n"
@@ -142,14 +142,14 @@ def process_zip(self, vulnerability, machine):
142142
if "cveId" in vulnerability:
143143
finding.unsaved_vulnerability_ids = []
144144
finding.unsaved_vulnerability_ids.append(vulnerability["cveId"])
145-
self.findings.append(finding)
146145
finding.unsaved_endpoints = []
147146
if "computerDnsName" in machine and machine["computerDnsName"] is not None:
148147
finding.unsaved_endpoints.append(Endpoint(host=str(machine["computerDnsName"]).replace(" ", "").replace("(", "_").replace(")", "_")))
149148
if "lastIpAddress" in machine and machine["lastIpAddress"] is not None:
150149
finding.unsaved_endpoints.append(Endpoint(host=str(machine["lastIpAddress"])))
151150
if "lastExternalIpAddress" in machine and machine["lastExternalIpAddress"] is not None:
152151
finding.unsaved_endpoints.append(Endpoint(host=str(machine["lastExternalIpAddress"])))
152+
return finding
153153

154154
def severity_check(self, severity_input):
155155
if severity_input in {"Informational", "Low", "Medium", "High", "Critical"}:

dojo/tools/ptart/retest_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def __init__(self):
2020
self.cvss_type = None
2121

2222
def get_test_data(self, tree):
23+
self.cvss_type = None
2324
if "retests" in tree:
2425
self.cvss_type = tree.get("cvss_type", None)
2526
retests = tree["retests"]

unittests/tools/test_ms_defender_parser.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ def test_parser_defender_zip(self):
4646
endpoint.clean()
4747
self.assertEqual("1.1.1.1", finding.unsaved_endpoints[0].host)
4848

49+
def test_parser_defender_zip_repeated(self):
50+
"""
51+
It was found that the defender parser was caching findings across different runs of the parser.
52+
This test might be a good default test for any parser to make sure nothing is cached.
53+
"""
54+
testfile = (get_unit_tests_scans_path("ms_defender") / "defender.zip").open(encoding="utf-8")
55+
parser = MSDefenderParser()
56+
findings = parser.get_findings(testfile, Test())
57+
testfile.close()
58+
self.assertEqual(4, len(findings))
59+
60+
testfile_repeated = (get_unit_tests_scans_path("ms_defender") / "defender.zip").open(encoding="utf-8")
61+
findings_repeated = parser.get_findings(testfile, Test())
62+
testfile_repeated.close()
63+
self.assertEqual(4, len(findings_repeated))
64+
4965
def test_parser_defender_wrong_machines_zip(self):
5066
testfile = (get_unit_tests_scans_path("ms_defender") / "defender_wrong_machines.zip").open(encoding="utf-8")
5167
parser = MSDefenderParser()

0 commit comments

Comments
 (0)