diff --git a/dojo/tools/openscap/parser.py b/dojo/tools/openscap/parser.py index fa17d26f5dc..0d27a015608 100644 --- a/dojo/tools/openscap/parser.py +++ b/dojo/tools/openscap/parser.py @@ -1,7 +1,8 @@ import hashlib import re +import html2text +import defusedxml.ElementTree as ElementTree -from defusedxml.ElementTree import parse from django.core.exceptions import ValidationError from django.core.validators import validate_ipv46_address @@ -19,7 +20,7 @@ def get_description_for_scan_types(self, scan_type): return "Import Openscap Vulnerability Scan in XML formats." def get_findings(self, file, test): - tree = parse(file) + tree = ElementTree.parse(file) # get root of tree. root = tree.getroot() namespace = self.get_namespace(root) @@ -35,8 +36,22 @@ def get_findings(self, file, test): # read rules rules = {} for rule in root.findall(f".//{namespace}Rule"): + # get description and rationale (contains html codes) + desc_elem = rule.find(f"./{namespace}description") + rationale_elem = rule.find(f"./{namespace}rationale") + description_html = ElementTree.tostring(desc_elem, encoding="unicode", method="xml") if desc_elem is not None else "none" + rationale_html = ElementTree.tostring(rationale_elem, encoding="unicode", method="xml") if rationale_elem is not None else "none" + # remove xml-html namespace + description_html = re.sub(r"