Skip to content

Commit 6aa0afc

Browse files
Populate package and dependency attributes in inspect_packages (#1180)
* Populate package and depenedency attributes in inspect_packages Properly populate dependency attributes like for_package, datasource_id and datafile_resource for DiscoveredDependencies and codebase_resources in DiscoveredPackages found in inspect_packages pipeline. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Adress review comments Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> --------- Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 58a45cc commit 6aa0afc

File tree

5 files changed

+52
-7
lines changed

5 files changed

+52
-7
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ v34.5.0 (unreleased)
2323
symbol and string using tree-sitter.
2424
https://github.com/nexB/scancode.io/pull/1181
2525

26+
- Fix `inspect_packages` pipeline to properly link discovered packages and dependencies to
27+
codebase resources of package manifests where they were found. Also correctly assign
28+
the datasource_ids atrribute for packages and dependencies.
29+
https://github.com/nexB/scancode.io/pull/1180/
30+
2631
v34.4.0 (2024-04-22)
2732
--------------------
2833

scanpipe/models.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ class UpdateMixin:
435435

436436
def update(self, **kwargs):
437437
"""
438-
Update this resource with the provided ``kwargs`` values.
438+
Update this instance with the provided ``kwargs`` values.
439439
The full ``save()`` process will be triggered, including signals, and the
440440
``update_fields`` is automatically set.
441441
"""
@@ -3404,6 +3404,7 @@ def create_from_data(
34043404
dependency_data,
34053405
for_package=None,
34063406
datafile_resource=None,
3407+
datasource_id=None,
34073408
strip_datafile_path_root=False,
34083409
):
34093410
"""
@@ -3449,6 +3450,9 @@ def create_from_data(
34493450
datafile_path = "/".join(segments[1:])
34503451
datafile_resource = project.codebaseresources.get(path=datafile_path)
34513452

3453+
if datasource_id:
3454+
dependency_data["datasource_id"] = datasource_id
3455+
34523456
# Set purl fields from `purl`
34533457
purl = dependency_data.get("purl")
34543458
purl_mapping = PackageURL.from_string(purl).to_dict()

scanpipe/pipes/__init__.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ def update_or_create_package(project, package_data, codebase_resources=None):
180180
package_data = _clean_package_data(package_data)
181181
# No values for package_uid requires to be empty string for proper queryset lookup
182182
package_uid = package_data.get("package_uid") or ""
183+
datasource_id = package_data.get("datasource_id") or ""
183184

184185
package = DiscoveredPackage.objects.get_or_none(
185186
project=project,
@@ -192,8 +193,14 @@ def update_or_create_package(project, package_data, codebase_resources=None):
192193
else:
193194
package = DiscoveredPackage.create_from_data(project, package_data)
194195

195-
if package and codebase_resources:
196-
package.add_resources(codebase_resources)
196+
if package:
197+
if datasource_id and datasource_id not in package.datasource_ids:
198+
datasource_ids = package.datasource_ids.copy()
199+
datasource_ids.append(datasource_id)
200+
package.update(datasource_ids=datasource_ids)
201+
202+
if codebase_resources:
203+
package.add_resources(codebase_resources)
197204

198205
return package
199206

@@ -210,7 +217,12 @@ def create_local_files_package(project, defaults, codebase_resources=None):
210217

211218

212219
def update_or_create_dependency(
213-
project, dependency_data, for_package=None, strip_datafile_path_root=False
220+
project,
221+
dependency_data,
222+
for_package=None,
223+
datafile_resource=None,
224+
datasource_id=None,
225+
strip_datafile_path_root=False,
214226
):
215227
"""
216228
Get, update or create a DiscoveredDependency then returns it.
@@ -241,6 +253,8 @@ def update_or_create_dependency(
241253
project,
242254
dependency_data,
243255
for_package=for_package,
256+
datafile_resource=datafile_resource,
257+
datasource_id=datasource_id,
244258
strip_datafile_path_root=strip_datafile_path_root,
245259
)
246260

scanpipe/pipes/scancode.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,11 +478,23 @@ def process_package_data(project):
478478

479479
package_data = pd.to_dict()
480480
dependencies = package_data.pop("dependencies")
481-
for dep in dependencies:
482-
pipes.update_or_create_dependency(project, dep)
483481

482+
package = None
484483
if pd.purl:
485-
pipes.update_or_create_package(project, package_data)
484+
package = pipes.update_or_create_package(
485+
project=project,
486+
package_data=package_data,
487+
codebase_resources=[resource],
488+
)
489+
490+
for dep in dependencies:
491+
pipes.update_or_create_dependency(
492+
project=project,
493+
dependency_data=dep,
494+
for_package=package,
495+
datafile_resource=resource,
496+
datasource_id=pd.datasource_id,
497+
)
486498

487499

488500
def get_packages_with_purl_from_resources(project):

scanpipe/tests/test_pipelines.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,16 @@ def test_scanpipe_inspect_packages_creates_packages_npm(self):
613613
self.assertEqual(1, project1.discoveredpackages.count())
614614
self.assertEqual(1, project1.discovereddependencies.count())
615615

616+
package = project1.discoveredpackages.get()
617+
dependency = project1.discovereddependencies.get()
618+
619+
self.assertEqual(1, package.codebase_resources.count())
620+
self.assertEqual("pkg:npm/is-npm@1.0.0", dependency.for_package.purl)
621+
self.assertEqual(package.datasource_ids, [dependency.datasource_id])
622+
self.assertEqual(
623+
package.codebase_resources.get().path, dependency.datafile_resource.path
624+
)
625+
616626
def test_scanpipe_inspect_packages_creates_packages_pypi(self):
617627
pipeline_name = "inspect_packages"
618628
project1 = Project.objects.create(name="Analysis")

0 commit comments

Comments
 (0)