♻️ Remove async import (#12042)

manuel-sommer · web-flow · commit 9225090ff026 · 2025-06-01T23:38:20.000-06:00
* ♻️ Deprecate async import

* simplify

* update

* fix unittest

* add docs

* update
diff --git a/docs/content/en/open_source/installation/running-in-production.md b/docs/content/en/open_source/installation/running-in-production.md
@@ -88,21 +88,5 @@ You can execute the following command to see the configuration:
 `docker compose exec celerybeat bash -c "celery -A dojo inspect stats"`
 and see what is in effect.
 
-### Asynchronous Import
-
-<span style="background-color:rgba(242, 86, 29, 0.3)">This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release).  Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.</span>
-
-Import and Re-Import can also be configured to handle uploads asynchronously to aid in
-processing especially large scans. It works by batching Findings and Endpoints by a
-configurable amount. Each batch will be be processed in separate celery tasks.
-
-The following variables impact async imports.
-
--   `DD_ASYNC_FINDING_IMPORT` defaults to False
--   `DD_ASYNC_FINDING_IMPORT_CHUNK_SIZE` defaults to 100
-
-When using asynchronous imports with dynamic scanners, Endpoints will continue to "trickle" in
-even after the import has returned a successful response. This is because processing continues
-to occur after the Findings have already been imported.
-
-To determine if an import has been fully completed, please see the progress bar in the appropriate test.
+### Asynchronous Import: Deprecated
+This feature has been removed in 2.47.0
diff --git a/docs/content/en/open_source/upgrading/2.47.md b/docs/content/en/open_source/upgrading/2.47.md
@@ -9,3 +9,7 @@ description: Drop support for PostgreSQL-HA in HELM
 This release removes support for the PostgreSQL-HA (High Availability) Helm chart as a dependency in the DefectDojo Helm chart. Users relying on the PostgreSQL-HA Helm chart will need to transition to using the standard PostgreSQL configuration or an external PostgreSQL database.
 
 There are no special instructions for upgrading to 2.47.x. Check the [Release Notes](https://github.com/DefectDojo/django-DefectDojo/releases/tag/2.47.0) for the contents of the release.
+
+## Removal of Asynchronous Import
+
+Please note that asynchronous import has been removed as it was announced in 2.46. If you haven't migrated from this feature yet, we recommend doing before upgrading to 2.47.0
diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py
@@ -1,6 +1,5 @@
 import base64
 import logging
-from warnings import warn
 
 from django.conf import settings
 from django.core.exceptions import ValidationError
@@ -255,33 +254,11 @@ def sync_process_findings(
         """
         return self.process_findings(parsed_findings, sync=True, **kwargs)
 
-    def async_process_findings(
-        self,
-        parsed_findings: list[Finding],
-        **kwargs: dict,
-    ) -> list[Finding]:
-        """
-        Processes findings in chunks within N number of processes. The
-        ASYNC_FINDING_IMPORT_CHUNK_SIZE setting will determine how many
-        findings will be processed in a given worker/process/thread
-        """
-        warn("This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.", stacklevel=2)
-        return self.process_findings(parsed_findings, sync=False, **kwargs)
-
     def determine_process_method(
         self,
         parsed_findings: list[Finding],
         **kwargs: dict,
     ) -> list[Finding]:
-        """
-        Determines whether to process the scan iteratively, or in chunks,
-        based upon the ASYNC_FINDING_IMPORT setting
-        """
-        if settings.ASYNC_FINDING_IMPORT:
-            return self.async_process_findings(
-                parsed_findings,
-                **kwargs,
-            )
         return self.sync_process_findings(
             parsed_findings,
             **kwargs,
@@ -513,24 +490,6 @@ def construct_imported_message(
 
         return message
 
-    def chunk_findings(
-        self,
-        finding_list: list[Finding],
-        chunk_size: int = settings.ASYNC_FINDING_IMPORT_CHUNK_SIZE,
-    ) -> list[list[Finding]]:
-        """
-        Split a single large list into a list of lists of size `chunk_size`.
-        For Example
-        ```
-        >>> chunk_findings([A, B, C, D, E], 2)
-        >>> [[A, B], [B, C], [E]]
-        ```
-        """
-        # Break the list of parsed findings into "chunk_size" lists
-        chunk_list = [finding_list[i:i + chunk_size] for i in range(0, len(finding_list), chunk_size)]
-        logger.debug(f"Split endpoints/findings into {len(chunk_list)} chunks of {chunk_size}")
-        return chunk_list
-
     def update_test_progress(
         self,
         percentage_value: int = 100,
diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
@@ -1,8 +1,7 @@
 import logging
-from warnings import warn
 
 from django.core.files.uploadedfile import TemporaryUploadedFile
-from django.core.serializers import deserialize, serialize
+from django.core.serializers import serialize
 from django.db.models.query_utils import Q
 from django.urls import reverse
 
@@ -350,38 +349,3 @@ def parse_findings_dynamic_test_type(
         """
         logger.debug("IMPORT_SCAN parser v2: Create Test and parse findings")
         return super().parse_findings_dynamic_test_type(scan, parser)
-
-    def async_process_findings(
-        self,
-        parsed_findings: list[Finding],
-        **kwargs: dict,
-    ) -> list[Finding]:
-        """
-        Processes findings in chunks within N number of processes. The
-        ASYNC_FINDING_IMPORT_CHUNK_SIZE setting will determine how many
-        findings will be processed in a given worker/process/thread
-        """
-        warn("This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.", stacklevel=2)
-        chunk_list = self.chunk_findings(parsed_findings)
-        results_list = []
-        new_findings = []
-        # First kick off all the workers
-        for findings_list in chunk_list:
-            result = self.process_findings(
-                findings_list,
-                sync=False,
-                **kwargs,
-            )
-            # Since I dont want to wait until the task is done right now, save the id
-            # So I can check on the task later
-            results_list += [result]
-        # After all tasks have been started, time to pull the results
-        logger.info("IMPORT_SCAN: Collecting Findings")
-        for results in results_list:
-            serial_new_findings = results
-            new_findings += [next(deserialize("json", finding)).object for finding in serial_new_findings]
-        logger.info("IMPORT_SCAN: All Findings Collected")
-        # Indicate that the test is not complete yet as endpoints will still be rolling in.
-        self.test.percent_complete = 50
-        self.test.save()
-        return new_findings
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
@@ -1,8 +1,7 @@
 import logging
-from warnings import warn
 
 from django.core.files.uploadedfile import TemporaryUploadedFile
-from django.core.serializers import deserialize, serialize
+from django.core.serializers import serialize
 from django.db.models.query_utils import Q
 
 import dojo.finding.helper as finding_helper
@@ -314,63 +313,6 @@ def parse_findings_dynamic_test_type(
         logger.debug("REIMPORT_SCAN parser v2: Create parse findings")
         return super().parse_findings_dynamic_test_type(scan, parser)
 
-    def async_process_findings(
-        self,
-        parsed_findings: list[Finding],
-        **kwargs: dict,
-    ) -> tuple[list[Finding], list[Finding], list[Finding], list[Finding]]:
-        """
-        Processes findings in chunks within N number of processes. The
-        ASYNC_FINDING_IMPORT_CHUNK_SIZE setting will determine how many
-        findings will be processed in a given worker/process/thread
-        """
-        warn("This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.", stacklevel=2)
-        # Indicate that the test is not complete yet as endpoints will still be rolling in.
-        self.update_test_progress(percentage_value=50)
-        chunk_list = self.chunk_findings(parsed_findings)
-        results_list = []
-        new_findings = []
-        reactivated_findings = []
-        findings_to_mitigate = []
-        untouched_findings = []
-        # First kick off all the workers
-        for findings_list in chunk_list:
-            result = self.process_findings(
-                findings_list,
-                sync=False,
-                **kwargs,
-            )
-            # Since I dont want to wait until the task is done right now, save the id
-            # So I can check on the task later
-            results_list += [result]
-        # After all tasks have been started, time to pull the results
-        logger.debug("REIMPORT_SCAN: Collecting Findings")
-        for results in results_list:
-            (
-                serial_new_findings,
-                serial_reactivated_findings,
-                serial_findings_to_mitigate,
-                serial_untouched_findings,
-            ) = results
-            new_findings += [
-                next(deserialize("json", finding)).object
-                for finding in serial_new_findings
-            ]
-            reactivated_findings += [
-                next(deserialize("json", finding)).object
-                for finding in serial_reactivated_findings
-            ]
-            findings_to_mitigate += [
-                next(deserialize("json", finding)).object
-                for finding in serial_findings_to_mitigate
-            ]
-            untouched_findings += [
-                next(deserialize("json", finding)).object
-                for finding in serial_untouched_findings
-            ]
-            logger.debug("REIMPORT_SCAN: All Findings Collected")
-        return new_findings, reactivated_findings, findings_to_mitigate, untouched_findings
-
     def match_new_finding_to_existing_finding(
         self,
         unsaved_finding: Finding,
diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py
@@ -1,6 +1,5 @@
 import logging
 
-from django.conf import settings
 from django.core.exceptions import MultipleObjectsReturned, ValidationError
 from django.urls import reverse
 from django.utils import timezone
@@ -95,48 +94,14 @@ def reactivate_endpoint_status(
                 endpoint_status.save()
         return
 
-    def chunk_endpoints(
-        self,
-        endpoint_list: list[Endpoint],
-        chunk_size: int = settings.ASYNC_FINDING_IMPORT_CHUNK_SIZE,
-    ) -> list[list[Endpoint]]:
-        """
-        Split a single large list into a list of lists of size `chunk_size`.
-        For Example
-        ```
-        >>> chunk_endpoints([A, B, C, D, E], 2)
-        >>> [[A, B], [B, C], [E]]
-        ```
-        """
-        # Break the list of parsed findings into "chunk_size" lists
-        chunk_list = [endpoint_list[i:i + chunk_size] for i in range(0, len(endpoint_list), chunk_size)]
-        logger.debug(f"Split endpoints into {len(chunk_list)} chunks of {chunk_size}")
-        return chunk_list
-
     def chunk_endpoints_and_disperse(
         self,
         finding: Finding,
         endpoints: list[Endpoint],
         **kwargs: dict,
     ) -> None:
-        """
-        Determines whether to asynchronously process endpoints on a finding or not. if so,
-        chunk up the findings to be dispersed into individual celery workers. Otherwise,
-        only use one worker
-        """
-        if settings.ASYNC_FINDING_IMPORT:
-            chunked_list = self.chunk_endpoints(endpoints)
-            # If there is only one chunk, then do not bother with async
-            if len(chunked_list) < 2:
-                self.add_endpoints_to_unsaved_finding(finding, endpoints, sync=True)
-                return []
-            # First kick off all the workers
-            for endpoints_list in chunked_list:
-                self.add_endpoints_to_unsaved_finding(finding, endpoints_list, sync=False)
-        else:
-            # Do not run this asynchronously or chunk the endpoints
-            self.add_endpoints_to_unsaved_finding(finding, endpoints, sync=True)
-        return None
+        self.add_endpoints_to_unsaved_finding(finding, endpoints, sync=True)
+        return
 
     def clean_unsaved_endpoints(
         self,
@@ -158,23 +123,7 @@ def chunk_endpoints_and_reactivate(
         endpoint_status_list: list[Endpoint_Status],
         **kwargs: dict,
     ) -> None:
-        """
-        Reactivates all endpoint status objects. Whether this function will asynchronous or not is dependent
-        on the ASYNC_FINDING_IMPORT setting. If it is set to true, endpoint statuses will be chunked,
-        and dispersed over celery workers.
-        """
-        # Determine if this can be run async
-        if settings.ASYNC_FINDING_IMPORT:
-            chunked_list = self.chunk_endpoints(endpoint_status_list)
-            # If there is only one chunk, then do not bother with async
-            if len(chunked_list) < 2:
-                self.reactivate_endpoint_status(endpoint_status_list, sync=True)
-            logger.debug(f"Split endpoints into {len(chunked_list)} chunks of {len(chunked_list[0])}")
-            # First kick off all the workers
-            for endpoint_status_list in chunked_list:
-                self.reactivate_endpoint_status(endpoint_status_list, sync=False)
-        else:
-            self.reactivate_endpoint_status(endpoint_status_list, sync=True)
+        self.reactivate_endpoint_status(endpoint_status_list, sync=True)
         return
 
     def chunk_endpoints_and_mitigate(
@@ -183,23 +132,7 @@ def chunk_endpoints_and_mitigate(
         user: Dojo_User,
         **kwargs: dict,
     ) -> None:
-        """
-        Mitigates all endpoint status objects. Whether this function will asynchronous or not is dependent
-        on the ASYNC_FINDING_IMPORT setting. If it is set to true, endpoint statuses will be chunked,
-        and dispersed over celery workers.
-        """
-        # Determine if this can be run async
-        if settings.ASYNC_FINDING_IMPORT:
-            chunked_list = self.chunk_endpoints(endpoint_status_list)
-            # If there is only one chunk, then do not bother with async
-            if len(chunked_list) < 2:
-                self.mitigate_endpoint_status(endpoint_status_list, user, sync=True)
-            logger.debug(f"Split endpoints into {len(chunked_list)} chunks of {len(chunked_list[0])}")
-            # First kick off all the workers
-            for endpoint_status_list in chunked_list:
-                self.mitigate_endpoint_status(endpoint_status_list, user, sync=False)
-        else:
-            self.mitigate_endpoint_status(endpoint_status_list, user, sync=True)
+        self.mitigate_endpoint_status(endpoint_status_list, user, sync=True)
         return
 
     def update_endpoint_status(
diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py
@@ -274,12 +274,6 @@
     DD_RATE_LIMITER_ACCOUNT_LOCKOUT=(bool, False),
     # when enabled SonarQube API parser will download the security hotspots
     DD_SONARQUBE_API_PARSER_HOTSPOTS=(bool, True),
-    # when enabled, finding importing will occur asynchronously, default False
-    # This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.
-    DD_ASYNC_FINDING_IMPORT=(bool, False),
-    # The number of findings to be processed per celeryworker
-    # This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.
-    DD_ASYNC_FINDING_IMPORT_CHUNK_SIZE=(int, 100),
     # When enabled, deleting objects will be occur from the bottom up. In the example of deleting an engagement
     # The objects will be deleted as follows Endpoints -> Findings -> Tests -> Engagement
     DD_ASYNC_OBJECT_DELETE=(bool, False),
@@ -1795,10 +1789,6 @@ def saml2_attrib_map_format(din):
 # Deside if SonarQube API parser should download the security hotspots
 SONARQUBE_API_PARSER_HOTSPOTS = env("DD_SONARQUBE_API_PARSER_HOTSPOTS")
 
-# when enabled, finding importing will occur asynchronously, default False
-ASYNC_FINDING_IMPORT = env("DD_ASYNC_FINDING_IMPORT")
-# The number of findings to be processed per celeryworker
-ASYNC_FINDING_IMPORT_CHUNK_SIZE = env("DD_ASYNC_FINDING_IMPORT_CHUNK_SIZE")
 # When enabled, deleting objects will be occur from the bottom up. In the example of deleting an engagement
 # The objects will be deleted as follows Endpoints -> Findings -> Tests -> Engagement
 ASYNC_OBJECT_DELETE = env("DD_ASYNC_OBJECT_DELETE")