Skip to content

Commit 56de573

Browse files
Fix package detection info in DiscoveredPackages #1099 (#1100)
* Fix package detection info in DiscoveredPackages #1099 Reference: #1099 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Add package detection info in the UI * Adds package detection info in the package details view * Adds data migration for datasource_id Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Add changelog entry and address feedback Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> --------- Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent fb6476d commit 56de573

24 files changed

+981
-153
lines changed

CHANGELOG.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ v34.1.0 (unreleased)
3131
- Rename the ``match_to_purldb`` pipeline to ``match_to_matchcode``, and add
3232
MatchCode.io API settings to ScanCode.io settings.
3333

34+
- In the DiscoveredPackage model, rename the "datasource_id" attribute to
35+
"datasource_ids" and add a new attribute "datafile_paths". This is aligned
36+
with the scancode-toolkit Package model, and package detection information
37+
is now stored correctly. Also update the UI for discovered packages to
38+
show the corresponding package datafiles and their datasource IDs.
39+
A data migration is included to facilitate the migration of existing data.
40+
https://github.com/nexB/scancode.io/issues/1099
41+
3442
v34.0.0 (2024-03-04)
3543
--------------------
3644

scanpipe/api/serializers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,8 @@ class Meta:
380380
"source_packages",
381381
"extra_data",
382382
"package_uid",
383-
"datasource_id",
383+
"datasource_ids",
384+
"datafile_paths",
384385
"file_references",
385386
"missing_resources",
386387
"modified_resources",
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Generated by Django 5.0.2 on 2024-03-01 16:09
2+
3+
from django.db import migrations, models
4+
from django.db.models import Q
5+
6+
7+
def update_package_datasource_ids(apps, schema_editor):
8+
"""
9+
Compute DiscoveredPackage `datasource_ids` from the previously
10+
present `datasource_id` field.
11+
"""
12+
DiscoveredPackage = apps.get_model("scanpipe", "DiscoveredPackage")
13+
queryset = DiscoveredPackage.objects.filter(~Q(datasource_id=""))
14+
15+
object_count = queryset.count()
16+
print(f"\nCompute datasource_ids for {object_count:,} packages.")
17+
18+
chunk_size = 2000
19+
iterator = queryset.iterator(chunk_size=chunk_size)
20+
21+
unsaved_objects = []
22+
for index, package in enumerate(iterator, start=1):
23+
datasource_id = package.datasource_id
24+
package.datasource_ids = [datasource_id]
25+
unsaved_objects.append(package)
26+
27+
if not (index % chunk_size) and unsaved_objects:
28+
print(f" {index:,} / {object_count:,} computed")
29+
30+
print("Updating DB objects...")
31+
DiscoveredPackage.objects.bulk_update(
32+
objs=unsaved_objects,
33+
fields=["datasource_ids"],
34+
batch_size=1000,
35+
)
36+
37+
38+
class Migration(migrations.Migration):
39+
40+
dependencies = [
41+
("scanpipe", "0054_rename_pipeline"),
42+
]
43+
44+
operations = [
45+
migrations.AddField(
46+
model_name="discoveredpackage",
47+
name="datasource_ids",
48+
field=models.JSONField(
49+
blank=True,
50+
default=list,
51+
help_text="The identifiers for the datafile handlers used to obtain this package.",
52+
),
53+
),
54+
migrations.RunPython(
55+
update_package_datasource_ids,
56+
reverse_code=migrations.RunPython.noop,
57+
),
58+
migrations.RemoveField(
59+
model_name='discoveredpackage',
60+
name='datasource_id',
61+
),
62+
migrations.AddField(
63+
model_name="discoveredpackage",
64+
name="datafile_paths",
65+
field=models.JSONField(
66+
blank=True,
67+
default=list,
68+
help_text="A list of Resource paths for package datafiles which were used to assemble this pacakage.",
69+
),
70+
),
71+
]

scanpipe/models.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2890,11 +2890,19 @@ class AbstractPackage(models.Model):
28902890
blank=True,
28912891
help_text=_("A notice text for this package."),
28922892
)
2893-
datasource_id = models.CharField(
2894-
max_length=64,
2893+
datasource_ids = models.JSONField(
2894+
default=list,
2895+
blank=True,
2896+
help_text=_(
2897+
"The identifiers for the datafile handlers used to obtain this package."
2898+
),
2899+
)
2900+
datafile_paths = models.JSONField(
2901+
default=list,
28952902
blank=True,
28962903
help_text=_(
2897-
"The identifier for the datafile handler used to obtain this package."
2904+
"A list of Resource paths for package datafiles which were "
2905+
"used to assemble this pacakage."
28982906
),
28992907
)
29002908
file_references = models.JSONField(

scanpipe/pipes/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,14 @@ def _clean_package_data(package_data):
155155
if release_date.endswith("Z"):
156156
release_date = release_date[:-1]
157157
package_data["release_date"] = datetime.fromisoformat(release_date).date()
158+
159+
# Strip leading "codebase/" to make path compatible with
160+
# paths stored in resource database
161+
cleaned_datafile_paths = [
162+
path.removeprefix("codebase/")
163+
for path in package_data.get("datafile_paths", [])
164+
]
165+
package_data["datafile_paths"] = cleaned_datafile_paths
158166
return package_data
159167

160168

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<div class="content">
2+
<table class="table is-bordered is-striped is-narrow is-hoverable is-fullwidth">
3+
<thead>
4+
<tr>
5+
<th>Datafile Paths</th>
6+
</tr>
7+
</thead>
8+
<tbody>
9+
{% for path in tab_data.fields.datafile_paths.value %}
10+
<tr>
11+
<td class="break-all">
12+
<a href="{% url 'resource_detail' project.slug path %}">{{ path }}</a>
13+
</td>
14+
</tr>
15+
{% endfor %}
16+
</tbody>
17+
</table>
18+
<table class="table is-bordered is-striped is-narrow is-hoverable is-fullwidth">
19+
<thead>
20+
<tr>
21+
<th>Datasource IDs</th>
22+
</tr>
23+
</thead>
24+
<tbody>
25+
{% for id in tab_data.fields.datasource_ids.value %}
26+
<tr>
27+
<td class="break-all">
28+
{{ id }}
29+
</td>
30+
</tr>
31+
{% endfor %}
32+
</tbody>
33+
</table>
34+
</div>

scanpipe/tests/data/alpine_3_15_4_scan_codebase.json

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,12 @@
207207
]
208208
},
209209
"package_uid": "pkg:alpine/alpine-baselayout@3.2.0-r18?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
210-
"datasource_id": "",
210+
"datasource_ids": [
211+
"alpine_installed_db"
212+
],
213+
"datafile_paths": [
214+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
215+
],
211216
"file_references": [],
212217
"missing_resources": [],
213218
"modified_resources": [],
@@ -439,7 +444,12 @@
439444
]
440445
},
441446
"package_uid": "pkg:alpine/alpine-keys@2.4-r1?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
442-
"datasource_id": "",
447+
"datasource_ids": [
448+
"alpine_installed_db"
449+
],
450+
"datafile_paths": [
451+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
452+
],
443453
"file_references": [],
444454
"missing_resources": [],
445455
"modified_resources": [],
@@ -516,7 +526,12 @@
516526
],
517527
"extra_data": {},
518528
"package_uid": "pkg:alpine/apk-tools@2.12.7-r3?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
519-
"datasource_id": "",
529+
"datasource_ids": [
530+
"alpine_installed_db"
531+
],
532+
"datafile_paths": [
533+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
534+
],
520535
"file_references": [],
521536
"missing_resources": [],
522537
"modified_resources": [],
@@ -604,7 +619,12 @@
604619
]
605620
},
606621
"package_uid": "pkg:alpine/busybox@1.34.1-r5?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
607-
"datasource_id": "",
622+
"datasource_ids": [
623+
"alpine_installed_db"
624+
],
625+
"datafile_paths": [
626+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
627+
],
608628
"file_references": [],
609629
"missing_resources": [],
610630
"modified_resources": [],
@@ -692,7 +712,12 @@
692712
]
693713
},
694714
"package_uid": "pkg:alpine/ca-certificates-bundle@20211220-r0?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
695-
"datasource_id": "",
715+
"datasource_ids": [
716+
"alpine_installed_db"
717+
],
718+
"datafile_paths": [
719+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
720+
],
696721
"file_references": [],
697722
"missing_resources": [],
698723
"modified_resources": [],
@@ -804,7 +829,12 @@
804829
]
805830
},
806831
"package_uid": "pkg:alpine/libcrypto1.1@1.1.1n-r0?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
807-
"datasource_id": "",
832+
"datasource_ids": [
833+
"alpine_installed_db"
834+
],
835+
"datafile_paths": [
836+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
837+
],
808838
"file_references": [],
809839
"missing_resources": [],
810840
"modified_resources": [],
@@ -881,7 +911,12 @@
881911
],
882912
"extra_data": {},
883913
"package_uid": "pkg:alpine/libc-utils@0.7.2-r3?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
884-
"datasource_id": "",
914+
"datasource_ids": [
915+
"alpine_installed_db"
916+
],
917+
"datafile_paths": [
918+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
919+
],
885920
"file_references": [],
886921
"missing_resources": [],
887922
"modified_resources": [],
@@ -969,7 +1004,12 @@
9691004
]
9701005
},
9711006
"package_uid": "pkg:alpine/libretls@3.3.4-r3?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
972-
"datasource_id": "",
1007+
"datasource_ids": [
1008+
"alpine_installed_db"
1009+
],
1010+
"datafile_paths": [
1011+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1012+
],
9731013
"file_references": [],
9741014
"missing_resources": [],
9751015
"modified_resources": [],
@@ -1057,7 +1097,12 @@
10571097
]
10581098
},
10591099
"package_uid": "pkg:alpine/libssl1.1@1.1.1n-r0?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
1060-
"datasource_id": "",
1100+
"datasource_ids": [
1101+
"alpine_installed_db"
1102+
],
1103+
"datafile_paths": [
1104+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1105+
],
10611106
"file_references": [],
10621107
"missing_resources": [],
10631108
"modified_resources": [],
@@ -1145,7 +1190,12 @@
11451190
]
11461191
},
11471192
"package_uid": "pkg:alpine/musl@1.2.2-r7?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
1148-
"datasource_id": "",
1193+
"datasource_ids": [
1194+
"alpine_installed_db"
1195+
],
1196+
"datafile_paths": [
1197+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1198+
],
11491199
"file_references": [],
11501200
"missing_resources": [],
11511201
"modified_resources": [],
@@ -1222,7 +1272,12 @@
12221272
],
12231273
"extra_data": {},
12241274
"package_uid": "pkg:alpine/musl-utils@1.2.2-r7?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
1225-
"datasource_id": "",
1275+
"datasource_ids": [
1276+
"alpine_installed_db"
1277+
],
1278+
"datafile_paths": [
1279+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1280+
],
12261281
"file_references": [],
12271282
"missing_resources": [],
12281283
"modified_resources": [],
@@ -1299,7 +1354,12 @@
12991354
],
13001355
"extra_data": {},
13011356
"package_uid": "pkg:alpine/scanelf@1.3.3-r0?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
1302-
"datasource_id": "",
1357+
"datasource_ids": [
1358+
"alpine_installed_db"
1359+
],
1360+
"datafile_paths": [
1361+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1362+
],
13031363
"file_references": [],
13041364
"missing_resources": [],
13051365
"modified_resources": [],
@@ -1376,7 +1436,12 @@
13761436
],
13771437
"extra_data": {},
13781438
"package_uid": "pkg:alpine/ssl_client@1.34.1-r5?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
1379-
"datasource_id": "",
1439+
"datasource_ids": [
1440+
"alpine_installed_db"
1441+
],
1442+
"datafile_paths": [
1443+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1444+
],
13801445
"file_references": [],
13811446
"missing_resources": [],
13821447
"modified_resources": [],
@@ -1464,7 +1529,12 @@
14641529
]
14651530
},
14661531
"package_uid": "pkg:alpine/zlib@1.2.12-r0?arch=x86_64&uuid=fixed-uid-done-for-testing-5642512d1758",
1467-
"datasource_id": "",
1532+
"datasource_ids": [
1533+
"alpine_installed_db"
1534+
],
1535+
"datafile_paths": [
1536+
"40e48c8ef2450e6a9e8d50b846a58ede43f1b01dd351d2bdd7dca14c5c033f20/lib/apk/db/installed"
1537+
],
14681538
"file_references": [],
14691539
"missing_resources": [],
14701540
"modified_resources": [],

0 commit comments

Comments
 (0)