Skip to content

Commit 224a00d

Browse files
authored
Generate a package_uid in create_from_data when not provided #1256 (#1258)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 7fabcb2 commit 224a00d

File tree

7 files changed

+75
-30
lines changed

7 files changed

+75
-30
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ v34.6.0 (unreleased)
2828
- Add a new ``run`` entry point for executing pipeline as a single command.
2929
https://github.com/nexB/scancode.io/pull/1256
3030

31+
- Generate a DiscoveredPackage.package_uid in create_from_data when not provided.
32+
https://github.com/nexB/scancode.io/issues/1256
33+
3134
v34.5.0 (2024-05-22)
3235
--------------------
3336

scanpipe/models.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
from licensedcode.cache import build_spdx_license_expression
7474
from licensedcode.cache import get_licensing
7575
from matchcode_toolkit.fingerprinting import IGNORED_DIRECTORY_FINGERPRINTS
76+
from packagedcode.models import build_package_uid
7677
from packageurl import PackageURL
7778
from packageurl import normalize_qualifiers
7879
from packageurl.contrib.django.models import PackageURLMixin
@@ -3143,6 +3144,17 @@ def create_from_data(cls, project, package_data):
31433144
}
31443145

31453146
discovered_package = cls(project=project, **cleaned_data)
3147+
3148+
# The ``package_uid`` field is not defined as required on the model,
3149+
# but it is essential for retrieving the Package object from the database
3150+
# in various places, such as in the ``update_or_create_resource`` function.
3151+
# If ``package_uid`` is not provided in the ``package_data``, a value is
3152+
# generated using the ``build_package_uid`` function from the ``packagedcode``
3153+
# module.
3154+
if not package_data.get("package_uid"):
3155+
package_uid = build_package_uid(discovered_package.package_url)
3156+
discovered_package.package_uid = package_uid
3157+
31463158
# Using save_error=False to not capture potential errors at this level but
31473159
# rather in the CodebaseResource.create_and_add_package method so resource data
31483160
# can be injected in the ProjectMessage record.

scanpipe/tests/data/d2d/about_files/expected.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
"notice_text": "",
7272
"source_packages": [],
7373
"extra_data": {},
74-
"package_uid": "",
74+
"package_uid": "pkg:local-files/analysis-90cb6382/90cb6382-431c-4187-be76-d4f1a2199a2f?uuid=fixed-uid-done-for-testing-5642512d1758",
7575
"datasource_ids": [],
7676
"datafile_paths": [],
7777
"file_references": [],
@@ -122,7 +122,7 @@
122122
"*flume-ng-node-*.jar-extract/org/apache/flume/node/ConfigurationProvider.class"
123123
]
124124
},
125-
"package_uid": "",
125+
"package_uid": "pkg:maven/log4j/log4j@1.2.13?uuid=fixed-uid-done-for-testing-5642512d1758",
126126
"datasource_ids": [],
127127
"datafile_paths": [],
128128
"file_references": [],
@@ -555,7 +555,7 @@
555555
"authors": [],
556556
"package_data": [],
557557
"for_packages": [
558-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
558+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=90cb6382-431c-4187-be76-d4f1a2199a2f"
559559
],
560560
"emails": [],
561561
"urls": [

scanpipe/tests/data/flume-ng-node-d2d.json

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
"notice_text": "",
7272
"source_packages": [],
7373
"extra_data": {},
74-
"package_uid": "",
74+
"package_uid": "pkg:local-files/analysis-b74fe5df/b74fe5df-e965-415e-ba65-f38421a0695d?uuid=fixed-uid-done-for-testing-5642512d1758",
7575
"datasource_ids": [],
7676
"datafile_paths": [],
7777
"file_references": [],
@@ -411,7 +411,7 @@
411411
"authors": [],
412412
"package_data": [],
413413
"for_packages": [
414-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
414+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
415415
],
416416
"emails": [],
417417
"urls": [
@@ -602,7 +602,7 @@
602602
"authors": [],
603603
"package_data": [],
604604
"for_packages": [
605-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
605+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
606606
],
607607
"emails": [],
608608
"urls": [
@@ -666,7 +666,7 @@
666666
"authors": [],
667667
"package_data": [],
668668
"for_packages": [
669-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
669+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
670670
],
671671
"emails": [],
672672
"urls": [
@@ -730,7 +730,7 @@
730730
"authors": [],
731731
"package_data": [],
732732
"for_packages": [
733-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
733+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
734734
],
735735
"emails": [],
736736
"urls": [
@@ -794,7 +794,7 @@
794794
"authors": [],
795795
"package_data": [],
796796
"for_packages": [
797-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
797+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
798798
],
799799
"emails": [],
800800
"urls": [
@@ -858,7 +858,7 @@
858858
"authors": [],
859859
"package_data": [],
860860
"for_packages": [
861-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
861+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
862862
],
863863
"emails": [],
864864
"urls": [
@@ -922,7 +922,7 @@
922922
"authors": [],
923923
"package_data": [],
924924
"for_packages": [
925-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
925+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
926926
],
927927
"emails": [],
928928
"urls": [
@@ -986,7 +986,7 @@
986986
"authors": [],
987987
"package_data": [],
988988
"for_packages": [
989-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
989+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
990990
],
991991
"emails": [],
992992
"urls": [
@@ -1050,7 +1050,7 @@
10501050
"authors": [],
10511051
"package_data": [],
10521052
"for_packages": [
1053-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
1053+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
10541054
],
10551055
"emails": [],
10561056
"urls": [
@@ -1114,7 +1114,7 @@
11141114
"authors": [],
11151115
"package_data": [],
11161116
"for_packages": [
1117-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
1117+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
11181118
],
11191119
"emails": [],
11201120
"urls": [
@@ -1178,7 +1178,7 @@
11781178
"authors": [],
11791179
"package_data": [],
11801180
"for_packages": [
1181-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
1181+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
11821182
],
11831183
"emails": [],
11841184
"urls": [
@@ -1242,7 +1242,7 @@
12421242
"authors": [],
12431243
"package_data": [],
12441244
"for_packages": [
1245-
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758"
1245+
"pkg:local-files/fixed-namespace-for-testing-5642512d1758/fixed-name-for-testing-5642512d1758?uuid=b74fe5df-e965-415e-ba65-f38421a0695d"
12461246
],
12471247
"emails": [],
12481248
"urls": [

scanpipe/tests/pipes/test_pipes.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def test_scanpipe_pipes_update_or_create_package(self):
111111

112112
# Make sure we can assign a package to multiple Resources calling
113113
# update_or_create_package() several times.
114+
package_data2["package_uid"] = package2.package_uid
114115
resource2 = make_resource_file(project=p1, path="filename2.ext")
115116
package2 = pipes.update_or_create_package(p1, package_data2, [resource2])
116117
self.assertIn(package2, resource1.discovered_packages.all())
@@ -157,25 +158,42 @@ def test_scanpipe_pipes_create_local_files_package(self, mock_uuid4):
157158
self.assertEqual(expected_purl, local_package.purl)
158159
self.assertEqual("mit", local_package.declared_license_expression)
159160
self.assertEqual("Copyright", local_package.copyright)
160-
self.assertEqual([expected_purl], resource1.for_packages)
161+
self.assertEqual(
162+
[f"{expected_purl}?uuid={forced_uuid}"], resource1.for_packages
163+
)
161164

162165
def test_scanpipe_pipes_update_or_create_package_package_uid(self):
163166
p1 = Project.objects.create(name="Analysis")
164167
package_data = dict(package_data1)
165168

166169
package_data["package_uid"] = None
167-
pipes.update_or_create_package(p1, package_data)
168-
pipes.update_or_create_package(p1, package_data)
170+
package1 = pipes.update_or_create_package(p1, package_data)
171+
self.assertTrue(package1.package_uid)
169172

170173
package_data["package_uid"] = ""
171-
pipes.update_or_create_package(p1, package_data)
174+
package2 = pipes.update_or_create_package(p1, package_data)
175+
self.assertTrue(package2.package_uid)
172176

173177
del package_data["package_uid"]
174-
pipes.update_or_create_package(p1, package_data)
178+
package3 = pipes.update_or_create_package(p1, package_data)
179+
self.assertTrue(package3.package_uid)
180+
181+
self.assertNotEqual(package1.package_uid, package2.package_uid)
182+
self.assertNotEqual(package2.package_uid, package3.package_uid)
175183

176-
# Make sure only 1 package was created, then properly found in the db regardless
177-
# of the empty/none package_uid.
178-
self.assertEqual(1, DiscoveredPackage.objects.count())
184+
# A `package_uid` value is generated when not provided, making each
185+
# package instance unique.
186+
self.assertEqual(3, DiscoveredPackage.objects.count())
187+
188+
# In that case, there is a match in the db, the object is updated
189+
package_data["package_uid"] = package1.package_uid
190+
package_data["sha1"] = "sha1"
191+
# We need to use an empty field since override=False in update_from_data
192+
self.assertEqual("", package1.sha1)
193+
pipes.update_or_create_package(p1, package_data)
194+
package1.refresh_from_db()
195+
self.assertEqual("sha1", package1.sha1)
196+
self.assertEqual(3, DiscoveredPackage.objects.count())
179197

180198
def test_scanpipe_pipes_update_or_create_dependency(self):
181199
p1 = Project.objects.create(name="Analysis")

scanpipe/tests/test_models.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,9 +2569,11 @@ def test_scanpipe_discovered_package_model_unique_package_uid_in_project(self):
25692569
package_data_no_uid = package_data1.copy()
25702570
package_data_no_uid.pop("package_uid")
25712571
package2 = DiscoveredPackage.create_from_data(project1, package_data_no_uid)
2572-
self.assertFalse(package2.package_uid)
2572+
self.assertTrue(package2.package_uid)
2573+
self.assertNotEqual(package.package_uid, package2.package_uid)
25732574
package3 = DiscoveredPackage.create_from_data(project1, package_data_no_uid)
2574-
self.assertFalse(package3.package_uid)
2575+
self.assertTrue(package3.package_uid)
2576+
self.assertNotEqual(package.package_uid, package3.package_uid)
25752577

25762578
@skipIf(connection.vendor == "sqlite", "No max_length constraints on SQLite.")
25772579
def test_scanpipe_codebase_resource_create_and_add_package_warnings(self):

scanpipe/tests/test_pipelines.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,10 +1107,15 @@ def test_scanpipe_load_sbom_pipeline_cyclonedx_integration(self):
11071107
self.assertEqual(expected["filename"], package.filename)
11081108

11091109
@mock.patch("scanpipe.pipes.purldb.request_post")
1110-
def test_scanpipe_deploy_to_develop_pipeline_integration(self, mock_request):
1110+
@mock.patch("uuid.uuid4")
1111+
def test_scanpipe_deploy_to_develop_pipeline_integration(
1112+
self, mock_uuid4, mock_request
1113+
):
1114+
forced_uuid = "b74fe5df-e965-415e-ba65-f38421a0695d"
1115+
mock_uuid4.return_value = forced_uuid
11111116
mock_request.return_value = None
11121117
pipeline_name = "map_deploy_to_develop"
1113-
project1 = Project.objects.create(name="Analysis")
1118+
project1 = Project.objects.create(name="Analysis", uuid=forced_uuid)
11141119

11151120
jar_location = self.data_location / "d2d" / "jars"
11161121
project1.copy_input_from(jar_location / "from-flume-ng-node-1.9.0.zip")
@@ -1132,10 +1137,15 @@ def test_scanpipe_deploy_to_develop_pipeline_integration(self, mock_request):
11321137
self.assertPipelineResultEqual(expected_file, result_file)
11331138

11341139
@mock.patch("scanpipe.pipes.purldb.request_post")
1135-
def test_scanpipe_deploy_to_develop_pipeline_with_about_file(self, mock_request):
1140+
@mock.patch("uuid.uuid4")
1141+
def test_scanpipe_deploy_to_develop_pipeline_with_about_file(
1142+
self, mock_uuid4, mock_request
1143+
):
1144+
forced_uuid = "90cb6382-431c-4187-be76-d4f1a2199a2f"
1145+
mock_uuid4.return_value = forced_uuid
11361146
mock_request.return_value = None
11371147
pipeline_name = "map_deploy_to_develop"
1138-
project1 = Project.objects.create(name="Analysis")
1148+
project1 = Project.objects.create(name="Analysis", uuid=forced_uuid)
11391149

11401150
data_dir = self.data_location / "d2d" / "about_files"
11411151
project1.copy_input_from(data_dir / "from-with-about-file.zip")

0 commit comments

Comments
 (0)