diff --git a/docs/content/en/open_source/contributing/how-to-write-a-parser.md b/docs/content/en/open_source/contributing/how-to-write-a-parser.md index 6c210ac130c..193d8912b52 100644 --- a/docs/content/en/open_source/contributing/how-to-write-a-parser.md +++ b/docs/content/en/open_source/contributing/how-to-write-a-parser.md @@ -37,8 +37,8 @@ $ docker compose build --build-arg uid=1000 |`unittests/scans//{many_vulns,no_vuln,one_vuln}.json` | Sample files containing meaningful data for unit tests. The minimal set. |`unittests/tools/test__parser.py` | Unit tests of the parser. |`dojo/settings/settings.dist.py` | If you want to use a modern hashcode based deduplication algorithm -|`docs/content/en/connecting_your_tools/parsers//.md` | Documentation, what kind of file format is required and how it should be obtained - +|`docs/content/en/connecting_your_tools/parsers//.md` | Documentation, what kind of file format is required and how it should be obtained + ## Factory contract @@ -145,7 +145,7 @@ Very bad example: Various file formats are handled through libraries. In order to keep DefectDojo slim and also don't extend the attack surface, keep the number of libraries used minimal and take other parsers as an example. #### defusedXML in favour of lxml -As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library. +As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library. ### Not all attributes are mandatory @@ -232,7 +232,8 @@ Bad example (DIY): By default a new parser uses the 'legacy' deduplication algorithm documented at https://documentation.defectdojo.com/usage/features/#deduplication-algorithms -Please use a pre-defined deduplication algorithm where applicable. +Please use a pre-defined deduplication algorithm where applicable. When using the `unique_id_from_tool` or `vuln_id_from_tool` fields in the hash code configuration, it's important that these are uqniue for the finding and constant over time across subsequent scans. If this is not the case, the values can still be useful to set on the finding model without using them for deduplication. +The values must be coming from the report directly and must not be something that is calculated by the parser internally. ## Unit tests @@ -366,4 +367,3 @@ Please add a new .md file in [`docs/content/en/connecting_your_tools/parsers`] w * A link to the scanner itself - (e.g. GitHub or vendor link) Here is an example of a completed Parser documentation page: [https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md](https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md) - diff --git a/dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py b/dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py new file mode 100644 index 00000000000..b7111668370 --- /dev/null +++ b/dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py @@ -0,0 +1,18 @@ +# Generated by Django 5.1.8 on 2025-05-19 16:14 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dojo', '0228_alter_jira_username_password'), + ] + + operations = [ + migrations.AlterField( + model_name='finding', + name='unique_id_from_tool', + field=models.CharField(blank=True, help_text='Vulnerability technical id from the source tool. Allows to track unique vulnerabilities over time across subsequent scans.', max_length=500, null=True, verbose_name='Unique ID from tool'), + ), + ] diff --git a/dojo/models.py b/dojo/models.py index b418168d213..f76468cdcc9 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -2562,7 +2562,7 @@ class Finding(models.Model): blank=True, max_length=500, verbose_name=_("Unique ID from tool"), - help_text=_("Vulnerability technical id from the source tool. Allows to track unique vulnerabilities.")) + help_text=_("Vulnerability technical id from the source tool. Allows to track unique vulnerabilities over time across subsequent scans.")) vuln_id_from_tool = models.CharField(null=True, blank=True, max_length=500, diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 60579577150..6f4af1618ea 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -1433,6 +1433,8 @@ def saml2_attrib_map_format(din): # legacy one with multiple conditions (default mode) DEDUPE_ALGO_LEGACY = "legacy" # based on dojo_finding.unique_id_from_tool only (for checkmarx detailed, or sonarQube detailed for example) +# When using the `unique_id_from_tool` or `vuln_id_from_tool` fields for dedupication, it's important that these are uqniue for the finding and constant over time across subsequent scans. +# If this is not the case, the values can still be useful to set on the finding model without using them for deduplication. DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL = "unique_id_from_tool" # based on dojo_finding.hash_code only DEDUPE_ALGO_HASH_CODE = "hash_code"