From 924185557b77a5a2f8350d0fd04d1230b7ce2f9d Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Fri, 16 May 2025 17:56:47 +0200
Subject: [PATCH 1/4] unique_id_from_tool_remark

---
 .../en/open_source/contributing/how-to-write-a-parser.md | 9 ++++-----
 dojo/settings/settings.dist.py                           | 2 ++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/content/en/open_source/contributing/how-to-write-a-parser.md b/docs/content/en/open_source/contributing/how-to-write-a-parser.md
index 6c210ac130c..092b4001631 100644
--- a/docs/content/en/open_source/contributing/how-to-write-a-parser.md
+++ b/docs/content/en/open_source/contributing/how-to-write-a-parser.md
@@ -37,8 +37,8 @@ $ docker compose build --build-arg uid=1000
 |`unittests/scans/<parser_dir>/{many_vulns,no_vuln,one_vuln}.json` | Sample files containing meaningful data for unit tests. The minimal set.
 |`unittests/tools/test_<parser_name>_parser.py` | Unit tests of the parser.
 |`dojo/settings/settings.dist.py`               | If you want to use a modern hashcode based deduplication algorithm
-|`docs/content/en/connecting_your_tools/parsers/<file/api>/<parser_file>.md` | Documentation, what kind of file format is required and how it should be obtained 
-    
+|`docs/content/en/connecting_your_tools/parsers/<file/api>/<parser_file>.md` | Documentation, what kind of file format is required and how it should be obtained
+
 
 ## Factory contract
 
@@ -145,7 +145,7 @@ Very bad example:
 Various file formats are handled through libraries. In order to keep DefectDojo slim and also don't extend the attack surface, keep the number of libraries used minimal and take other parsers as an example.
 
 #### defusedXML in favour of lxml
-As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library. 
+As xml is by default an unsecure format, the information parsed from various xml output has to be parsed in a secure way. Within an evaluation, we determined that defusedXML is the library which we will use in the future to parse xml files in parsers as this library is rated more secure. Thus, we will only accept PRs with the defusedxml library.
 
 ### Not all attributes are mandatory
 
@@ -232,7 +232,7 @@ Bad example (DIY):
 
 By default a new parser uses the 'legacy' deduplication algorithm documented at https://documentation.defectdojo.com/usage/features/#deduplication-algorithms
 
-Please use a pre-defined deduplication algorithm where applicable.
+Please use a pre-defined deduplication algorithm where applicable. When using the `unique_id_from_tool` or `vuln_id_from_tool` fields in the hash code configuration, it's important that these are uqniue for the finding and constant over time across subsequent scans. If this is not the case, the values can still be useful to set on the finding model without using them for deduplication.
 
 ## Unit tests
 
@@ -366,4 +366,3 @@ Please add a new .md file in [`docs/content/en/connecting_your_tools/parsers`] w
 * A link to the scanner itself - (e.g. GitHub or vendor link)
 
 Here is an example of a completed Parser documentation page: [https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md](https://github.com/DefectDojo/django-DefectDojo/blob/master/docs/content/en/connecting_your_tools/parsers/file/acunetix.md)
-
diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py
index 60579577150..6f4af1618ea 100644
--- a/dojo/settings/settings.dist.py
+++ b/dojo/settings/settings.dist.py
@@ -1433,6 +1433,8 @@ def saml2_attrib_map_format(din):
 # legacy one with multiple conditions (default mode)
 DEDUPE_ALGO_LEGACY = "legacy"
 # based on dojo_finding.unique_id_from_tool only (for checkmarx detailed, or sonarQube detailed for example)
+# When using the `unique_id_from_tool` or `vuln_id_from_tool` fields for dedupication, it's important that these are uqniue for the finding and constant over time across subsequent scans.
+# If this is not the case, the values can still be useful to set on the finding model without using them for deduplication.
 DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL = "unique_id_from_tool"
 # based on dojo_finding.hash_code only
 DEDUPE_ALGO_HASH_CODE = "hash_code"

From 8b2d9ef260ee741380aea25c1083bdf58840d5d5 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Fri, 16 May 2025 17:57:46 +0200
Subject: [PATCH 2/4] unique_id_from_tool_remark

---
 .../content/en/open_source/contributing/how-to-write-a-parser.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/content/en/open_source/contributing/how-to-write-a-parser.md b/docs/content/en/open_source/contributing/how-to-write-a-parser.md
index 092b4001631..193d8912b52 100644
--- a/docs/content/en/open_source/contributing/how-to-write-a-parser.md
+++ b/docs/content/en/open_source/contributing/how-to-write-a-parser.md
@@ -233,6 +233,7 @@ Bad example (DIY):
 By default a new parser uses the 'legacy' deduplication algorithm documented at https://documentation.defectdojo.com/usage/features/#deduplication-algorithms
 
 Please use a pre-defined deduplication algorithm where applicable. When using the `unique_id_from_tool` or `vuln_id_from_tool` fields in the hash code configuration, it's important that these are uqniue for the finding and constant over time across subsequent scans. If this is not the case, the values can still be useful to set on the finding model without using them for deduplication.
+The values must be coming from the report directly and must not be something that is calculated by the parser internally.
 
 ## Unit tests
 

From e38f6fcad24c0109470b97efd2b71f190ad9e39e Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Fri, 16 May 2025 18:25:54 +0200
Subject: [PATCH 3/4] unique_id_from_tool_remark

---
 dojo/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dojo/models.py b/dojo/models.py
index b418168d213..f76468cdcc9 100644
--- a/dojo/models.py
+++ b/dojo/models.py
@@ -2562,7 +2562,7 @@ class Finding(models.Model):
                                            blank=True,
                                            max_length=500,
                                            verbose_name=_("Unique ID from tool"),
-                                           help_text=_("Vulnerability technical id from the source tool. Allows to track unique vulnerabilities."))
+                                           help_text=_("Vulnerability technical id from the source tool. Allows to track unique vulnerabilities over time across subsequent scans."))
     vuln_id_from_tool = models.CharField(null=True,
                                          blank=True,
                                          max_length=500,

From 0b35178724d42104e61c53d9e439d4fcc4e658b0 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Mon, 19 May 2025 18:14:27 +0200
Subject: [PATCH 4/4] add migration for textual changes

---
 .../0229_alter_finding_unique_id_from_tool.py  | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py

diff --git a/dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py b/dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py
new file mode 100644
index 00000000000..b7111668370
--- /dev/null
+++ b/dojo/db_migrations/0229_alter_finding_unique_id_from_tool.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.1.8 on 2025-05-19 16:14
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('dojo', '0228_alter_jira_username_password'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='finding',
+            name='unique_id_from_tool',
+            field=models.CharField(blank=True, help_text='Vulnerability technical id from the source tool. Allows to track unique vulnerabilities over time across subsequent scans.', max_length=500, null=True, verbose_name='Unique ID from tool'),
+        ),
+    ]