DefectDojo
diff --git a/‎docs/content/en/integrations/parsers/file/appcheck_web_application_scanner.md
Lines changed: 8 additions & 0 deletions b/‎docs/content/en/integrations/parsers/file/appcheck_web_application_scanner.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎dojo/settings/.settings.dist.py.sha256sum
Lines changed: 1 addition & 1 deletion b/‎dojo/settings/.settings.dist.py.sha256sum
Lines changed: 1 addition & 1 deletion
diff --git a/‎dojo/settings/settings.dist.py
Lines changed: 2 additions & 0 deletions b/‎dojo/settings/settings.dist.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎dojo/tools/appcheck_web_application_scanner/__init__.py b/‎dojo/tools/appcheck_web_application_scanner/__init__.py
diff --git a/‎dojo/tools/appcheck_web_application_scanner/engines/__init__.py b/‎dojo/tools/appcheck_web_application_scanner/engines/__init__.py
diff --git a/‎dojo/tools/appcheck_web_application_scanner/engines/appcheck.py
Lines changed: 29 additions & 0 deletions b/‎dojo/tools/appcheck_web_application_scanner/engines/appcheck.py
Lines changed: 29 additions & 0 deletions
diff --git a/‎dojo/tools/appcheck_web_application_scanner/engines/base.py
Lines changed: 292 additions & 0 deletions b/‎dojo/tools/appcheck_web_application_scanner/engines/base.py
Lines changed: 292 additions & 0 deletions
diff --git a/‎dojo/tools/appcheck_web_application_scanner/engines/nmap.py
Lines changed: 31 additions & 0 deletions b/‎dojo/tools/appcheck_web_application_scanner/engines/nmap.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎dojo/tools/appcheck_web_application_scanner/engines/openvas.py
Lines changed: 10 additions & 0 deletions b/‎dojo/tools/appcheck_web_application_scanner/engines/openvas.py
Lines changed: 10 additions & 0 deletions
@@ -0,0 +1,8 @@
+---
+title: "AppCheck Web Application Scanner"
+toc_hide: true
+---
+Accepts AppCheck Web Application Scanner output in .json format.
+
+### Sample Scan Data
+Sample AppCheck Web Application Scanner scans can be found [here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/appcheck_web_application_scanner).
@@ -1 +1 @@
-4483a2efbe3cf1bf1c96c333f9bfc65ffe82a2fb20ed86eb4c4f6726cf41fa94
+66ee64ade0a61b090efd059a63e39f11683bd53e33bd25b8d41009cbbde06073
@@ -1272,6 +1272,7 @@ def saml2_attrib_map_format(dict):
     "Kubescape JSON Importer": ["title", "component_name"],
     "Kiuwan SCA Scan": ["description", "severity", "component_name", "component_version", "cwe"],
     "Rapplex Scan": ["title", "endpoints", "severity"],
+    "AppCheck Web Application Scanner": ["title", "severity"],
 }
 
 # Override the hardcoded settings here via the env var
@@ -1493,6 +1494,7 @@ def saml2_attrib_map_format(dict):
     "Kubescape JSON Importer": DEDUPE_ALGO_HASH_CODE,
     "Kiuwan SCA Scan": DEDUPE_ALGO_HASH_CODE,
     "Rapplex Scan": DEDUPE_ALGO_HASH_CODE,
+    "AppCheck Web Application Scanner": DEDUPE_ALGO_HASH_CODE,
 }
 
 # Override the hardcoded settings here via the env var
 
@@ -0,0 +1,29 @@
+import re
+from typing import Union
+
+from dojo.models import Finding
+from dojo.tools.appcheck_web_application_scanner.engines.base import BaseEngineParser
+
+
+class AppCheckScanningEngineParser(BaseEngineParser):
+    """
+    Parser for data from the (proprietary?) AppCheck scanning engine.
+
+    Results from this engine may include request/response data nested in the 'details' entry. This extracts those values
+    and stores them in the Finding unsaved_request/unsaved_response attributes.
+    """
+    SCANNING_ENGINE = "NewAppCheckScannerMultiple"
+
+    REQUEST_RESPONSE_PATTERN = re.compile(r"^--->\n\n(.+)\n\n<---\n\n(.+)$", re.DOTALL)
+
+    def extract_request_response(self, finding: Finding, value: dict[str, [str]]) -> None:
+        if rr_details := self.REQUEST_RESPONSE_PATTERN.findall(value.get("Messages") or ""):
+            # Remove the 'Messages' entry since we've parsed it as a request/response pair; don't need to add it to the
+            # Finding description
+            value.pop("Messages")
+            finding.unsaved_request, finding.unsaved_response = (d.strip() for d in rr_details[0])
+
+    def parse_details(self, finding: Finding, value: dict[str, Union[str, dict[str, [str]]]]) -> None:
+        self.extract_request_response(finding, value)
+        # super's version adds everything else to the description field
+        return super().parse_details(finding, value)
@@ -0,0 +1,292 @@
+import re
+from typing import Any, Optional, Tuple, Union
+
+import cvss.parser
+import dateutil.parser
+from cpe import CPE
+from django.core.exceptions import ImproperlyConfigured
+
+from dojo.models import Endpoint, Finding
+
+#######
+# Helpers/Utils
+#######
+
+# Pattern for stripping markup from entry values -- removes "[[markup]]" and "[[" and "]]"
+MARKUP_STRIPPING_PATTERN = re.compile(r"\[\[markup\]\]|\[\[|\]\]")
+
+
+def strip_markup(value: str) -> str:
+    """
+    Strips out "markup" from value
+    """
+    if value:
+        return MARKUP_STRIPPING_PATTERN.sub("", value).strip()
+    return value
+
+
+#######
+# Field parsing helper classes
+#######
+class FieldType:
+    """
+    Base class for attribute handlers for parsers. Callable, and calls the .handle() method, which should be implemented
+    by subclasses.
+
+    We lose type safety by accepting strings for target names; to try to work around this, the check() method on
+    subclasses should check whether the configuration for this object makes sense (or as much sense as can be determined
+    when the method is called) and raise an ImproperlyConfigured exception if it does not.
+    """
+    def __init__(self, target_name):
+        self.target_name = target_name
+
+    def handle(self, engine_class, finding, value):
+        pass
+
+    def __call__(self, engine_class, finding, value):
+        self.handle(engine_class, finding, value)
+
+    def check(self, engine_parser):
+        pass
+
+
+class Attribute(FieldType):
+    """
+    Class for a field that maps directly from one in the input data to a Finding attribute. Initialized with a Finding
+    attribute name, when called sets the value of that attribute to the passed-in value.
+    """
+    def handle(self, engine_class, finding, value):
+        setattr(finding, self.target_name, value)
+
+    def check(self, engine_parser):
+        if not hasattr(Finding, self.target_name):
+            msg = f"Finding does not have attribute '{self.target_name}.'"
+            raise ImproperlyConfigured(msg)
+
+
+class DeMarkupedAttribute(Attribute):
+    """
+    Class for an Attribute (as above) but whose value is stripped of markup prior to being set.
+    """
+    def handle(self, engine_class, finding, value):
+        super().handle(engine_class, finding, strip_markup(value))
+
+
+class Method(FieldType):
+    """
+    Class for a field that requires a method to process it. Initialized with a method name, when called it invokes the
+    method on the passed-in engine parser, passing in a Finding and value. It's expected that the method will update
+    the Finding as it sees fit (i.e., this class does not modify the Finding)
+    """
+    def handle(self, engine_parser, finding, value):
+        getattr(engine_parser, self.target_name)(finding, value)
+
+    def check(self, engine_parser):
+        if not callable(getattr(engine_parser, self.target_name, None)):
+            msg = f"{type(engine_parser).__name__} does not have method '{self.target_name}().'"
+            raise ImproperlyConfigured(msg)
+
+
+class BaseEngineParser:
+    """
+    Parser for data shared by all engines used by AppCheck, as well as data from an unknown/unspecified engine.
+
+    Directly mapped attributes, from JSON object -> Finding attribute:
+        * _id -> unique_id_from_tool
+        * cvss_v3_vector -> cvssv3
+        * epss_base_score -> epss_score
+
+    Directly mapped attributes but value is stripped of "markup" first, JSON Object -> Finding attribute:
+        * title -> title
+        * description -> description
+        * solution -> mitigation
+
+    Data mapped with a bit of tinkering, JSON object -> Finding attribute:
+        * first_detected_at -> date (parse date)
+        * status -> active/false_p/risk_accepted (depending on value)
+        * cves -> unsaved_vulnerability_ids (vulnerability_ids)
+        * cpe -> component name/version
+        * cvss_vector -> severity (determined using CVSS package)
+        * notes -> appended to Finding description
+        * details -> appended to Finding description
+
+    Child classes can override the _ENGINE_FIELDS_MAP dictionary to support extended/different functionality as so
+    desired, without having to change/copy the common field parsing described above.
+    """
+    SCANNING_ENGINE = "Unknown"
+
+    # Field handling common to all findings returned by AppCheck
+    _COMMON_FIELDS_MAP: dict[str, FieldType] = {
+        "_id": Attribute("unique_id_from_tool"),
+        "cvss_v3_vector": Attribute("cvssv3"),
+        "epss_base_score": Attribute("epss_score"),
+        "title": DeMarkupedAttribute("title"),
+        "description": DeMarkupedAttribute("description"),
+        "solution": DeMarkupedAttribute("mitigation"),
+        "first_detected_at": Method("parse_initial_date"),
+        "status": Method("parse_status"),
+        "cves": Method("parse_cves"),
+        "cpe": Method("parse_components"),
+        "cvss_vector": Method("parse_severity"),
+        # These should be listed after the 'description' entry; they append to it
+        "notes": Method("parse_notes"),
+        "details": Method("parse_details")}
+
+    # Field handling specific to a given scanning_engine AppCheck uses
+    _ENGINE_FIELDS_MAP: dict[str, FieldType] = {}
+
+    def __init__(self):
+        # Do a basic check that the fields we'll process over are valid
+        for field_handler in self.get_engine_fields().values():
+            field_handler.check(self)
+
+    #####
+    # For parsing the initial finding datetime to a date format pleasing to Finding
+    #####
+    def get_date(self, value: str) -> Optional[str]:
+        try:
+            return str(dateutil.parser.parse(value).date())
+        except dateutil.parser.ParserError:
+            return None
+
+    def parse_initial_date(self, finding: Finding, value: str) -> None:
+        finding.date = self.get_date(value)
+
+    #####
+    # For parsing CVEs
+    #####
+    CVE_PATTERN = re.compile("CVE-[0-9]+-[0-9]+", re.IGNORECASE)
+
+    def is_cve(self, c: str) -> bool:
+        return bool(c and isinstance(c, str) and self.CVE_PATTERN.fullmatch(c))
+
+    def parse_cves(self, finding: Finding, value: [str]) -> None:
+        finding.unsaved_vulnerability_ids = [c.upper() for c in value if self.is_cve(c)]
+
+    #####
+    # Handles setting various status flags on the Finding
+    #####
+    def parse_status(self, finding: Finding, value: str) -> None:
+        # Possible values (best guess): unfixed (the initial value), fixed, false_positive, and acceptable_risk
+        value = value.lower()
+        if value == "fixed":
+            finding.active = False
+        elif value == "false_positive":
+            finding.false_p = True
+        elif value == "acceptable_risk":
+            finding.risk_accepted = True
+
+    #####
+    # For severity (extracted from cvss vector)
+    #####
+    def get_severity(self, value: str) -> Optional[str]:
+        if cvss_obj := cvss.parser.parse_cvss_from_text(value):
+            if (severity := cvss_obj[0].severities()[0].title()) in Finding.SEVERITIES:
+                return severity
+        return None
+
+    def parse_severity(self, finding: Finding, value: str) -> None:
+        if severity := self.get_severity(value):
+            finding.severity = severity
+
+    #####
+    # For parsing component data
+    #####
+    def parse_cpe(self, cpe_str: str) -> (Optional[str], Optional[str]):
+        if not cpe_str:
+            return None, None
+        cpe_obj = CPE(cpe_str)
+        return (
+            cpe_obj.get_product() and cpe_obj.get_product()[0] or None,
+            cpe_obj.get_version() and cpe_obj.get_version()[0] or None,
+        )
+
+    def parse_components(self, finding: Finding, value: [str]) -> None:
+        # Only use the first entry
+        finding.component_name, finding.component_version = self.parse_cpe(value[0])
+
+    #####
+    # For parsing additional description-related entries (description, notes, and details)
+    #####
+    def format_additional_description(self, section: str, value: str) -> str:
+        return f"**{section}**: {strip_markup(value)}"
+
+    def append_description(self, finding: Finding, addendum: dict[str, str]) -> None:
+        if addendum:
+            if finding.description:
+                finding.description += "\n\n"
+            finding.description += "\n\n".join([self.format_additional_description(k, v) for k, v in addendum.items()])
+
+    def parse_notes(self, finding: Finding, value: str) -> None:
+        self.append_description(finding, {"Notes": value})
+
+    def extract_details(self, value: Union[str, dict[str, Union[str, dict[str, [str]]]]]) -> dict[str, str]:
+        if isinstance(value, dict):
+            return {k: v for k, v in value.items() if k != "_meta"}
+        return {"Details": str(value)}
+
+    def parse_details(self, finding: Finding, value: dict[str, Union[str, dict[str, [str]]]]) -> None:
+        self.append_description(finding, self.extract_details(value))
+
+    #####
+    # For parsing endpoints
+    #####
+    def get_host(self, item: dict[str, Any]) -> str:
+        return item.get("url") or item.get("host") or item.get("ipv4_address") or None
+
+    def parse_port(self, item: Any) -> Optional[int]:
+        try:
+            int_val = int(item)
+            if 0 < int_val <= 65535:
+                return int_val
+        except (ValueError, TypeError):
+            pass
+        return None
+
+    def get_port(self, item: dict[str, Any]) -> Optional[int]:
+        return self.parse_port(item.get("port"))
+
+    def construct_endpoint(self, host: str, port: Optional[int]) -> Endpoint:
+        endpoint = Endpoint.from_uri(host)
+        if endpoint.host:
+            if port:
+                endpoint.port = port
+        else:
+            endpoint = Endpoint(host=host, port=port)
+        return endpoint
+
+    def parse_endpoints(self, item: dict[str, Any]) -> [Endpoint]:
+        # Endpoint requires a host
+        if host := self.get_host(item):
+            port = self.get_port(item)
+            return [self.construct_endpoint(host, port)]
+        return []
+
+    def set_endpoints(self, finding: Finding, item: Any) -> None:
+        endpoints = self.parse_endpoints(item)
+        finding.unsaved_endpoints.extend(endpoints)
+
+    # Returns the complete field processing map: common fields plus any engine-specific
+    def get_engine_fields(self) -> dict[str, FieldType]:
+        return {
+            **BaseEngineParser._COMMON_FIELDS_MAP,
+            **self._ENGINE_FIELDS_MAP}
+
+    def get_finding_key(self, finding: Finding) -> Tuple:
+        return (
+            finding.severity,
+            finding.title,
+            tuple(sorted([(e.host, e.port) for e in finding.unsaved_endpoints])),
+            self.SCANNING_ENGINE,
+        )
+
+    def parse_finding(self, item: dict[str, Any]) -> Tuple[Finding, Tuple]:
+        finding = Finding()
+        for field, field_handler in self.get_engine_fields().items():
+            # Check first whether the field even exists on this item entry; if not, skip it
+            if value := item.get(field):
+                field_handler(self, finding, value)
+        self.set_endpoints(finding, item)
+        # Make a note of what scanning engine was used for this Finding
+        self.append_description(finding, {"Scanning Engine": self.SCANNING_ENGINE})
+        return finding, self.get_finding_key(finding)
@@ -0,0 +1,31 @@
+from typing import Any, Union
+
+from dojo.models import Endpoint
+from dojo.tools.appcheck_web_application_scanner.engines.base import BaseEngineParser
+
+
+class NmapScanningEngineParser(BaseEngineParser):
+    """
+    Parser for data from the Nmap scanning engine.
+
+    Nmap engine results include a list of ports in a 'port_table' data entry that we use to generate several endpoints
+    under the same Finding.
+    """
+    SCANNING_ENGINE = "NMapScanner"
+
+    def is_port_table_entry(self, entry) -> bool:
+        return len(entry) > 0 and self.parse_port(entry[0])
+
+    def get_ports(self, item) -> Union[list[int], list[None]]:
+        meta = item.get("meta")
+        if not isinstance(meta, dict):
+            meta = {}
+        if ports := meta.get("port_table", []):
+            return [port for port_entry in ports if (port := self.is_port_table_entry(port_entry))]
+        # Want at least one endpoint reported since we have a host -- no ports provided. This shouldn't happen, but...
+        return [None]
+
+    def parse_endpoints(self, item: dict[str, Any]) -> [Endpoint]:
+        host = self.get_host(item)
+        ports = self.get_ports(item)
+        return [self.construct_endpoint(host, port) for port in ports]
@@ -0,0 +1,10 @@
+from dojo.tools.appcheck_web_application_scanner.engines.base import BaseEngineParser
+
+
+class OpenVASScannerEngineParser(BaseEngineParser):
+    """
+    Parser for data from the OpenVAS scanning engine.
+
+    Shares all functionality with BaseEngineParser, but registered under an explicit name.
+    """
+    SCANNING_ENGINE = "OpenVASScanner"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-4483a2efbe3cf1bf1c96c333f9bfc65ffe82a2fb20ed86eb4c4f6726cf41fa94`
	`1`	`+66ee64ade0a61b090efd059a63e39f11683bd53e33bd25b8d41009cbbde06073`
Original file line number	Diff line number	Diff line change
`@@ -1272,6 +1272,7 @@ def saml2_attrib_map_format(dict):`
`1272`	`1272`	`"Kubescape JSON Importer": ["title", "component_name"],`
`1273`	`1273`	`"Kiuwan SCA Scan": ["description", "severity", "component_name", "component_version", "cwe"],`
`1274`	`1274`	`"Rapplex Scan": ["title", "endpoints", "severity"],`
	`1275`	`+ "AppCheck Web Application Scanner": ["title", "severity"],`
`1275`	`1276`	`}`
`1276`	`1277`
`1277`	`1278`	`# Override the hardcoded settings here via the env var`
`@@ -1493,6 +1494,7 @@ def saml2_attrib_map_format(dict):`
`1493`	`1494`	`"Kubescape JSON Importer": DEDUPE_ALGO_HASH_CODE,`
`1494`	`1495`	`"Kiuwan SCA Scan": DEDUPE_ALGO_HASH_CODE,`
`1495`	`1496`	`"Rapplex Scan": DEDUPE_ALGO_HASH_CODE,`
	`1497`	`+ "AppCheck Web Application Scanner": DEDUPE_ALGO_HASH_CODE,`
`1496`	`1498`	`}`
`1497`	`1499`
`1498`	`1500`	`# Override the hardcoded settings here via the env var`