Skip to content

Commit 9225090

Browse files
♻️ Remove async import (#12042)
* ♻️ Deprecate async import * simplify * update * fix unittest * add docs * update
1 parent fcf9878 commit 9225090

File tree

7 files changed

+12
-236
lines changed

7 files changed

+12
-236
lines changed

docs/content/en/open_source/installation/running-in-production.md

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,5 @@ You can execute the following command to see the configuration:
8888
`docker compose exec celerybeat bash -c "celery -A dojo inspect stats"`
8989
and see what is in effect.
9090

91-
### Asynchronous Import
92-
93-
<span style="background-color:rgba(242, 86, 29, 0.3)">This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.</span>
94-
95-
Import and Re-Import can also be configured to handle uploads asynchronously to aid in
96-
processing especially large scans. It works by batching Findings and Endpoints by a
97-
configurable amount. Each batch will be be processed in separate celery tasks.
98-
99-
The following variables impact async imports.
100-
101-
- `DD_ASYNC_FINDING_IMPORT` defaults to False
102-
- `DD_ASYNC_FINDING_IMPORT_CHUNK_SIZE` defaults to 100
103-
104-
When using asynchronous imports with dynamic scanners, Endpoints will continue to "trickle" in
105-
even after the import has returned a successful response. This is because processing continues
106-
to occur after the Findings have already been imported.
107-
108-
To determine if an import has been fully completed, please see the progress bar in the appropriate test.
91+
### Asynchronous Import: Deprecated
92+
This feature has been removed in 2.47.0

docs/content/en/open_source/upgrading/2.47.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,7 @@ description: Drop support for PostgreSQL-HA in HELM
99
This release removes support for the PostgreSQL-HA (High Availability) Helm chart as a dependency in the DefectDojo Helm chart. Users relying on the PostgreSQL-HA Helm chart will need to transition to using the standard PostgreSQL configuration or an external PostgreSQL database.
1010

1111
There are no special instructions for upgrading to 2.47.x. Check the [Release Notes](https://github.com/DefectDojo/django-DefectDojo/releases/tag/2.47.0) for the contents of the release.
12+
13+
## Removal of Asynchronous Import
14+
15+
Please note that asynchronous import has been removed as it was announced in 2.46. If you haven't migrated from this feature yet, we recommend doing before upgrading to 2.47.0

dojo/importers/base_importer.py

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import base64
22
import logging
3-
from warnings import warn
43

54
from django.conf import settings
65
from django.core.exceptions import ValidationError
@@ -255,33 +254,11 @@ def sync_process_findings(
255254
"""
256255
return self.process_findings(parsed_findings, sync=True, **kwargs)
257256

258-
def async_process_findings(
259-
self,
260-
parsed_findings: list[Finding],
261-
**kwargs: dict,
262-
) -> list[Finding]:
263-
"""
264-
Processes findings in chunks within N number of processes. The
265-
ASYNC_FINDING_IMPORT_CHUNK_SIZE setting will determine how many
266-
findings will be processed in a given worker/process/thread
267-
"""
268-
warn("This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.", stacklevel=2)
269-
return self.process_findings(parsed_findings, sync=False, **kwargs)
270-
271257
def determine_process_method(
272258
self,
273259
parsed_findings: list[Finding],
274260
**kwargs: dict,
275261
) -> list[Finding]:
276-
"""
277-
Determines whether to process the scan iteratively, or in chunks,
278-
based upon the ASYNC_FINDING_IMPORT setting
279-
"""
280-
if settings.ASYNC_FINDING_IMPORT:
281-
return self.async_process_findings(
282-
parsed_findings,
283-
**kwargs,
284-
)
285262
return self.sync_process_findings(
286263
parsed_findings,
287264
**kwargs,
@@ -513,24 +490,6 @@ def construct_imported_message(
513490

514491
return message
515492

516-
def chunk_findings(
517-
self,
518-
finding_list: list[Finding],
519-
chunk_size: int = settings.ASYNC_FINDING_IMPORT_CHUNK_SIZE,
520-
) -> list[list[Finding]]:
521-
"""
522-
Split a single large list into a list of lists of size `chunk_size`.
523-
For Example
524-
```
525-
>>> chunk_findings([A, B, C, D, E], 2)
526-
>>> [[A, B], [B, C], [E]]
527-
```
528-
"""
529-
# Break the list of parsed findings into "chunk_size" lists
530-
chunk_list = [finding_list[i:i + chunk_size] for i in range(0, len(finding_list), chunk_size)]
531-
logger.debug(f"Split endpoints/findings into {len(chunk_list)} chunks of {chunk_size}")
532-
return chunk_list
533-
534493
def update_test_progress(
535494
self,
536495
percentage_value: int = 100,

dojo/importers/default_importer.py

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import logging
2-
from warnings import warn
32

43
from django.core.files.uploadedfile import TemporaryUploadedFile
5-
from django.core.serializers import deserialize, serialize
4+
from django.core.serializers import serialize
65
from django.db.models.query_utils import Q
76
from django.urls import reverse
87

@@ -350,38 +349,3 @@ def parse_findings_dynamic_test_type(
350349
"""
351350
logger.debug("IMPORT_SCAN parser v2: Create Test and parse findings")
352351
return super().parse_findings_dynamic_test_type(scan, parser)
353-
354-
def async_process_findings(
355-
self,
356-
parsed_findings: list[Finding],
357-
**kwargs: dict,
358-
) -> list[Finding]:
359-
"""
360-
Processes findings in chunks within N number of processes. The
361-
ASYNC_FINDING_IMPORT_CHUNK_SIZE setting will determine how many
362-
findings will be processed in a given worker/process/thread
363-
"""
364-
warn("This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.", stacklevel=2)
365-
chunk_list = self.chunk_findings(parsed_findings)
366-
results_list = []
367-
new_findings = []
368-
# First kick off all the workers
369-
for findings_list in chunk_list:
370-
result = self.process_findings(
371-
findings_list,
372-
sync=False,
373-
**kwargs,
374-
)
375-
# Since I dont want to wait until the task is done right now, save the id
376-
# So I can check on the task later
377-
results_list += [result]
378-
# After all tasks have been started, time to pull the results
379-
logger.info("IMPORT_SCAN: Collecting Findings")
380-
for results in results_list:
381-
serial_new_findings = results
382-
new_findings += [next(deserialize("json", finding)).object for finding in serial_new_findings]
383-
logger.info("IMPORT_SCAN: All Findings Collected")
384-
# Indicate that the test is not complete yet as endpoints will still be rolling in.
385-
self.test.percent_complete = 50
386-
self.test.save()
387-
return new_findings

dojo/importers/default_reimporter.py

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import logging
2-
from warnings import warn
32

43
from django.core.files.uploadedfile import TemporaryUploadedFile
5-
from django.core.serializers import deserialize, serialize
4+
from django.core.serializers import serialize
65
from django.db.models.query_utils import Q
76

87
import dojo.finding.helper as finding_helper
@@ -314,63 +313,6 @@ def parse_findings_dynamic_test_type(
314313
logger.debug("REIMPORT_SCAN parser v2: Create parse findings")
315314
return super().parse_findings_dynamic_test_type(scan, parser)
316315

317-
def async_process_findings(
318-
self,
319-
parsed_findings: list[Finding],
320-
**kwargs: dict,
321-
) -> tuple[list[Finding], list[Finding], list[Finding], list[Finding]]:
322-
"""
323-
Processes findings in chunks within N number of processes. The
324-
ASYNC_FINDING_IMPORT_CHUNK_SIZE setting will determine how many
325-
findings will be processed in a given worker/process/thread
326-
"""
327-
warn("This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.", stacklevel=2)
328-
# Indicate that the test is not complete yet as endpoints will still be rolling in.
329-
self.update_test_progress(percentage_value=50)
330-
chunk_list = self.chunk_findings(parsed_findings)
331-
results_list = []
332-
new_findings = []
333-
reactivated_findings = []
334-
findings_to_mitigate = []
335-
untouched_findings = []
336-
# First kick off all the workers
337-
for findings_list in chunk_list:
338-
result = self.process_findings(
339-
findings_list,
340-
sync=False,
341-
**kwargs,
342-
)
343-
# Since I dont want to wait until the task is done right now, save the id
344-
# So I can check on the task later
345-
results_list += [result]
346-
# After all tasks have been started, time to pull the results
347-
logger.debug("REIMPORT_SCAN: Collecting Findings")
348-
for results in results_list:
349-
(
350-
serial_new_findings,
351-
serial_reactivated_findings,
352-
serial_findings_to_mitigate,
353-
serial_untouched_findings,
354-
) = results
355-
new_findings += [
356-
next(deserialize("json", finding)).object
357-
for finding in serial_new_findings
358-
]
359-
reactivated_findings += [
360-
next(deserialize("json", finding)).object
361-
for finding in serial_reactivated_findings
362-
]
363-
findings_to_mitigate += [
364-
next(deserialize("json", finding)).object
365-
for finding in serial_findings_to_mitigate
366-
]
367-
untouched_findings += [
368-
next(deserialize("json", finding)).object
369-
for finding in serial_untouched_findings
370-
]
371-
logger.debug("REIMPORT_SCAN: All Findings Collected")
372-
return new_findings, reactivated_findings, findings_to_mitigate, untouched_findings
373-
374316
def match_new_finding_to_existing_finding(
375317
self,
376318
unsaved_finding: Finding,

dojo/importers/endpoint_manager.py

Lines changed: 4 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import logging
22

3-
from django.conf import settings
43
from django.core.exceptions import MultipleObjectsReturned, ValidationError
54
from django.urls import reverse
65
from django.utils import timezone
@@ -95,48 +94,14 @@ def reactivate_endpoint_status(
9594
endpoint_status.save()
9695
return
9796

98-
def chunk_endpoints(
99-
self,
100-
endpoint_list: list[Endpoint],
101-
chunk_size: int = settings.ASYNC_FINDING_IMPORT_CHUNK_SIZE,
102-
) -> list[list[Endpoint]]:
103-
"""
104-
Split a single large list into a list of lists of size `chunk_size`.
105-
For Example
106-
```
107-
>>> chunk_endpoints([A, B, C, D, E], 2)
108-
>>> [[A, B], [B, C], [E]]
109-
```
110-
"""
111-
# Break the list of parsed findings into "chunk_size" lists
112-
chunk_list = [endpoint_list[i:i + chunk_size] for i in range(0, len(endpoint_list), chunk_size)]
113-
logger.debug(f"Split endpoints into {len(chunk_list)} chunks of {chunk_size}")
114-
return chunk_list
115-
11697
def chunk_endpoints_and_disperse(
11798
self,
11899
finding: Finding,
119100
endpoints: list[Endpoint],
120101
**kwargs: dict,
121102
) -> None:
122-
"""
123-
Determines whether to asynchronously process endpoints on a finding or not. if so,
124-
chunk up the findings to be dispersed into individual celery workers. Otherwise,
125-
only use one worker
126-
"""
127-
if settings.ASYNC_FINDING_IMPORT:
128-
chunked_list = self.chunk_endpoints(endpoints)
129-
# If there is only one chunk, then do not bother with async
130-
if len(chunked_list) < 2:
131-
self.add_endpoints_to_unsaved_finding(finding, endpoints, sync=True)
132-
return []
133-
# First kick off all the workers
134-
for endpoints_list in chunked_list:
135-
self.add_endpoints_to_unsaved_finding(finding, endpoints_list, sync=False)
136-
else:
137-
# Do not run this asynchronously or chunk the endpoints
138-
self.add_endpoints_to_unsaved_finding(finding, endpoints, sync=True)
139-
return None
103+
self.add_endpoints_to_unsaved_finding(finding, endpoints, sync=True)
104+
return
140105

141106
def clean_unsaved_endpoints(
142107
self,
@@ -158,23 +123,7 @@ def chunk_endpoints_and_reactivate(
158123
endpoint_status_list: list[Endpoint_Status],
159124
**kwargs: dict,
160125
) -> None:
161-
"""
162-
Reactivates all endpoint status objects. Whether this function will asynchronous or not is dependent
163-
on the ASYNC_FINDING_IMPORT setting. If it is set to true, endpoint statuses will be chunked,
164-
and dispersed over celery workers.
165-
"""
166-
# Determine if this can be run async
167-
if settings.ASYNC_FINDING_IMPORT:
168-
chunked_list = self.chunk_endpoints(endpoint_status_list)
169-
# If there is only one chunk, then do not bother with async
170-
if len(chunked_list) < 2:
171-
self.reactivate_endpoint_status(endpoint_status_list, sync=True)
172-
logger.debug(f"Split endpoints into {len(chunked_list)} chunks of {len(chunked_list[0])}")
173-
# First kick off all the workers
174-
for endpoint_status_list in chunked_list:
175-
self.reactivate_endpoint_status(endpoint_status_list, sync=False)
176-
else:
177-
self.reactivate_endpoint_status(endpoint_status_list, sync=True)
126+
self.reactivate_endpoint_status(endpoint_status_list, sync=True)
178127
return
179128

180129
def chunk_endpoints_and_mitigate(
@@ -183,23 +132,7 @@ def chunk_endpoints_and_mitigate(
183132
user: Dojo_User,
184133
**kwargs: dict,
185134
) -> None:
186-
"""
187-
Mitigates all endpoint status objects. Whether this function will asynchronous or not is dependent
188-
on the ASYNC_FINDING_IMPORT setting. If it is set to true, endpoint statuses will be chunked,
189-
and dispersed over celery workers.
190-
"""
191-
# Determine if this can be run async
192-
if settings.ASYNC_FINDING_IMPORT:
193-
chunked_list = self.chunk_endpoints(endpoint_status_list)
194-
# If there is only one chunk, then do not bother with async
195-
if len(chunked_list) < 2:
196-
self.mitigate_endpoint_status(endpoint_status_list, user, sync=True)
197-
logger.debug(f"Split endpoints into {len(chunked_list)} chunks of {len(chunked_list[0])}")
198-
# First kick off all the workers
199-
for endpoint_status_list in chunked_list:
200-
self.mitigate_endpoint_status(endpoint_status_list, user, sync=False)
201-
else:
202-
self.mitigate_endpoint_status(endpoint_status_list, user, sync=True)
135+
self.mitigate_endpoint_status(endpoint_status_list, user, sync=True)
203136
return
204137

205138
def update_endpoint_status(

dojo/settings/settings.dist.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -274,12 +274,6 @@
274274
DD_RATE_LIMITER_ACCOUNT_LOCKOUT=(bool, False),
275275
# when enabled SonarQube API parser will download the security hotspots
276276
DD_SONARQUBE_API_PARSER_HOTSPOTS=(bool, True),
277-
# when enabled, finding importing will occur asynchronously, default False
278-
# This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.
279-
DD_ASYNC_FINDING_IMPORT=(bool, False),
280-
# The number of findings to be processed per celeryworker
281-
# This experimental feature has been deprecated as of DefectDojo 2.44.0 (March release). Please exercise caution if using this feature with an older version of DefectDojo, as results may be inconsistent.
282-
DD_ASYNC_FINDING_IMPORT_CHUNK_SIZE=(int, 100),
283277
# When enabled, deleting objects will be occur from the bottom up. In the example of deleting an engagement
284278
# The objects will be deleted as follows Endpoints -> Findings -> Tests -> Engagement
285279
DD_ASYNC_OBJECT_DELETE=(bool, False),
@@ -1795,10 +1789,6 @@ def saml2_attrib_map_format(din):
17951789
# Deside if SonarQube API parser should download the security hotspots
17961790
SONARQUBE_API_PARSER_HOTSPOTS = env("DD_SONARQUBE_API_PARSER_HOTSPOTS")
17971791

1798-
# when enabled, finding importing will occur asynchronously, default False
1799-
ASYNC_FINDING_IMPORT = env("DD_ASYNC_FINDING_IMPORT")
1800-
# The number of findings to be processed per celeryworker
1801-
ASYNC_FINDING_IMPORT_CHUNK_SIZE = env("DD_ASYNC_FINDING_IMPORT_CHUNK_SIZE")
18021792
# When enabled, deleting objects will be occur from the bottom up. In the example of deleting an engagement
18031793
# The objects will be deleted as follows Endpoints -> Findings -> Tests -> Engagement
18041794
ASYNC_OBJECT_DELETE = env("DD_ASYNC_OBJECT_DELETE")

0 commit comments

Comments
 (0)