Skip to content

Commit 61bc390

Browse files
authored
Add discovered dependencies model and queryset methods (#1723)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent cbf1348 commit 61bc390

File tree

4 files changed

+114
-57
lines changed

4 files changed

+114
-57
lines changed

scanpipe/models.py

Lines changed: 80 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3772,6 +3772,18 @@ class DiscoveredDependencyQuerySet(
37723772
VulnerabilityQuerySetMixin,
37733773
ProjectRelatedQuerySet,
37743774
):
3775+
def project_dependencies(self):
3776+
return self.filter(for_package__isnull=True)
3777+
3778+
def package_dependencies(self):
3779+
return self.filter(for_package__isnull=False)
3780+
3781+
def resolved(self):
3782+
return self.filter(resolved_to_package__isnull=False)
3783+
3784+
def unresolved(self):
3785+
return self.filter(resolved_to_package__isnull=True)
3786+
37753787
def prefetch_for_serializer(self):
37763788
"""
37773789
Optimized prefetching for a QuerySet to be consumed by the
@@ -3816,6 +3828,26 @@ class DiscoveredDependency(
38163828
system and application packages discovered in the code under analysis.
38173829
Dependencies are usually collected from parsed package data such as a package
38183830
manifest or lockfile.
3831+
3832+
This class manages dependencies with the following considerations:
3833+
3834+
1. A dependency can be associated with a Package via the ``for_package`` field.
3835+
In this case, it is termed a "Package's dependency".
3836+
If there is no such association, the dependency is considered a
3837+
"Project's dependency".
3838+
3839+
2. A dependency can also be linked to a Package through the ``resolved_to_package``
3840+
field. When this link exists, the dependency is considered "resolved".
3841+
3842+
3. Dependencies can be either direct or transitive:
3843+
- A **direct dependency** is explicitly declared in a package manifest or
3844+
lockfile.
3845+
- A **transitive dependency** is not declared directly, but is required by one
3846+
of the project's direct dependencies.
3847+
3848+
Understanding the distinction between direct and transitive dependencies is
3849+
important for analyzing dependency trees, resolving version conflicts, and
3850+
assessing potential security risks.
38193851
"""
38203852

38213853
# Overrides the `project` field to set the proper `related_name`.
@@ -3966,6 +3998,24 @@ def datafile_path(self):
39663998
if self.datafile_resource:
39673999
return self.datafile_resource.path
39684000

4001+
@property
4002+
def is_project_dependency(self):
4003+
"""
4004+
Return True if the dependency is directly associated with the project
4005+
(not tied to a specific package).
4006+
"""
4007+
return not bool(self.for_package_id)
4008+
4009+
@property
4010+
def is_package_dependency(self):
4011+
"""Return True if the dependency is explicitly associated with a package."""
4012+
return bool(self.for_package_id)
4013+
4014+
@property
4015+
def is_resolved_to_package(self):
4016+
"""Return True if the dependency is resolved to a package."""
4017+
return bool(self.resolved_to_package_id)
4018+
39694019
@classmethod
39704020
def create_from_data(
39714021
cls,
@@ -3981,6 +4031,14 @@ def create_from_data(
39814031
Create and returns a DiscoveredDependency for a `project` from the
39824032
`dependency_data`.
39834033
4034+
The `for_package` and `resolved_to_package` FKs can be provided as args,
4035+
or in the `dependency_data` using the `for_package_uid` and
4036+
`resolve_to_package_uid`.
4037+
4038+
Note that a dependency:
4039+
- without a `for_package` FK is a "Project's dependency"
4040+
- without a `resolve_to_package` is "unresolved".
4041+
39844042
If `strip_datafile_path_root` is True, then `create_from_data()` will
39854043
strip the root path segment from the `datafile_path` of
39864044
`dependency_data` before looking up the corresponding CodebaseResource
@@ -3989,51 +4047,36 @@ def create_from_data(
39894047
not stripped for `datafile_path`.
39904048
"""
39914049
dependency_data = dependency_data.copy()
3992-
required_fields = ["purl", "dependency_uid"]
3993-
missing_values = [
3994-
field_name
3995-
for field_name in required_fields
3996-
if not dependency_data.get(field_name)
3997-
]
4050+
project_packages_qs = project.discoveredpackages
39984051

3999-
if missing_values:
4000-
message = (
4001-
f"No values for the following required fields: "
4002-
f"{', '.join(missing_values)}"
4003-
)
4052+
if not dependency_data.get("dependency_uid"):
4053+
dependency_data["dependency_uid"] = str(uuid.uuid4())
40044054

4005-
project.add_warning(description=message, model=cls, details=dependency_data)
4006-
return
4007-
4008-
if not for_package:
4009-
for_package_uid = dependency_data.get("for_package_uid")
4010-
if for_package_uid:
4011-
for_package = project.discoveredpackages.get(
4012-
package_uid=for_package_uid
4013-
)
4055+
for_package_uid = dependency_data.get("for_package_uid")
4056+
if not for_package and for_package_uid:
4057+
for_package = project_packages_qs.get_or_none(package_uid=for_package_uid)
40144058

4015-
if not resolved_to_package:
4016-
resolved_to_uid = dependency_data.get("resolved_to_uid")
4017-
if resolved_to_uid:
4018-
resolved_to_package = project.discoveredpackages.get(
4019-
package_uid=resolved_to_uid
4020-
)
4059+
resolve_to_package_uid = dependency_data.get("resolve_to_package_uid")
4060+
if not resolved_to_package and resolve_to_package_uid:
4061+
resolved_to_package = project_packages_qs.get_or_none(
4062+
package_uid=resolve_to_package_uid
4063+
)
40214064

4022-
if not datafile_resource:
4023-
datafile_path = dependency_data.get("datafile_path")
4024-
if datafile_path:
4025-
if strip_datafile_path_root:
4026-
segments = datafile_path.split("/")
4027-
datafile_path = "/".join(segments[1:])
4028-
datafile_resource = project.codebaseresources.get(path=datafile_path)
4065+
datafile_path = dependency_data.get("datafile_path")
4066+
if not datafile_resource and datafile_path:
4067+
if strip_datafile_path_root:
4068+
segments = datafile_path.split("/")
4069+
datafile_path = "/".join(segments[1:])
4070+
datafile_resource = project.codebaseresources.get(path=datafile_path)
40294071

40304072
if datasource_id:
40314073
dependency_data["datasource_id"] = datasource_id
40324074

4033-
# Set purl fields from `purl`
4075+
# Set package_url fields from the ``purl`` string.
40344076
purl = dependency_data.get("purl")
4035-
purl_mapping = PackageURL.from_string(purl).to_dict()
4036-
dependency_data.update(**purl_mapping)
4077+
if purl:
4078+
purl_data_dict = PackageURL.from_string(purl).to_dict()
4079+
dependency_data.update(**purl_data_dict)
40374080

40384081
cleaned_data = {
40394082
field_name: value
@@ -4072,7 +4115,7 @@ def spdx_id(self):
40724115
# "SPDXID is a unique string containing letters, numbers, ., and/or -"
40734116
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.uuid}"
40744117

4075-
def as_spdx(self):
4118+
def as_spdx_package(self):
40764119
"""Return this Dependency as an SPDX Package entry."""
40774120
from scanpipe.pipes import spdx
40784121

scanpipe/pipes/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,6 @@ def get_dependencies(project, dependency_data):
325325
Given a `dependency_data` mapping, get a list of DiscoveredDependency objects
326326
for that `project` with similar dependency data.
327327
"""
328-
dependency = None
329328
dependency_uid = dependency_data.get("dependency_uid")
330329
extracted_requirement = dependency_data.get("extracted_requirement") or ""
331330

scanpipe/pipes/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ def to_spdx(project, include_files=False):
692692
license_expressions.append(license_expression)
693693

694694
for dependency in discovereddependency_qs:
695-
packages_as_spdx.append(dependency.as_spdx())
695+
packages_as_spdx.append(dependency.as_spdx_package())
696696
if dependency.for_package:
697697
relationships.append(
698698
spdx.Relationship(

scanpipe/tests/test_models.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2041,15 +2041,35 @@ def test_scanpipe_discovered_package_queryset_dependency_methods(self):
20412041
z = make_package(project, "pkg:type/z")
20422042
# Project -> A -> B -> C
20432043
# Project -> Z
2044-
make_dependency(project, for_package=a, resolved_to_package=b)
2045-
make_dependency(project, for_package=b, resolved_to_package=c)
2044+
a_to_b = make_dependency(
2045+
project, for_package=a, resolved_to_package=b, dependency_uid="a_to_b"
2046+
)
2047+
b_to_c = make_dependency(
2048+
project, for_package=b, resolved_to_package=c, dependency_uid="b_to_c"
2049+
)
2050+
unresolved_dependency = make_dependency(project, dependency_uid="unresolved")
2051+
2052+
self.assertFalse(a_to_b.is_project_dependency)
2053+
self.assertTrue(a_to_b.is_package_dependency)
2054+
self.assertTrue(a_to_b.is_resolved_to_package)
2055+
self.assertTrue(unresolved_dependency.is_project_dependency)
2056+
self.assertFalse(unresolved_dependency.is_package_dependency)
2057+
self.assertFalse(unresolved_dependency.is_resolved_to_package)
20462058

20472059
project_packages_qs = project.discoveredpackages.order_by("name")
20482060
root_packages = project_packages_qs.root_packages()
20492061
self.assertEqual([a, z], list(root_packages))
20502062
non_root_packages = project_packages_qs.non_root_packages()
20512063
self.assertEqual([b, c], list(non_root_packages))
20522064

2065+
dependency_qs = project.discovereddependencies
2066+
self.assertEqual(
2067+
[unresolved_dependency], list(dependency_qs.project_dependencies())
2068+
)
2069+
self.assertEqual([a_to_b, b_to_c], list(dependency_qs.package_dependencies()))
2070+
self.assertEqual([a_to_b, b_to_c], list(dependency_qs.resolved()))
2071+
self.assertEqual([unresolved_dependency], list(dependency_qs.unresolved()))
2072+
20532073
@skipIf(sys.platform != "linux", "Ordering differs on macOS.")
20542074
def test_scanpipe_codebase_resource_model_walk_method(self):
20552075
fixtures = self.data / "asgiref" / "asgiref-3.3.0_walk_test_fixtures.json"
@@ -2955,10 +2975,11 @@ def test_scanpipe_discovered_package_model_create_from_data_missing_type(self):
29552975
def test_scanpipe_discovered_dependency_model_create_from_data(self):
29562976
project1 = make_project("Analysis")
29572977

2958-
DiscoveredPackage.create_from_data(project1, package_data1)
2978+
package1 = DiscoveredPackage.create_from_data(project1, package_data1)
29592979
CodebaseResource.objects.create(
29602980
project=project1, path="daglib-0.3.2.tar.gz-extract/daglib-0.3.2/PKG-INFO"
29612981
)
2982+
# Unresolved dependency
29622983
dependency = DiscoveredDependency.create_from_data(
29632984
project1, dependency_data1, strip_datafile_path_root=False
29642985
)
@@ -2982,23 +3003,17 @@ def test_scanpipe_discovered_dependency_model_create_from_data(self):
29823003
dependency.datafile_path,
29833004
)
29843005
self.assertEqual("pypi_sdist_pkginfo", dependency.datasource_id)
3006+
self.assertFalse(dependency.is_project_dependency)
3007+
self.assertTrue(dependency.is_package_dependency)
3008+
self.assertFalse(dependency.is_resolved_to_package)
29853009

2986-
# Test field validation when using create_from_data
2987-
dependency_count = DiscoveredDependency.objects.count()
2988-
incomplete_data = dict(dependency_data1)
2989-
incomplete_data["dependency_uid"] = ""
2990-
self.assertIsNone(
2991-
DiscoveredDependency.create_from_data(project1, incomplete_data)
3010+
# Resolved project dependency, resolved_to_package provided as arg
3011+
dependency2 = DiscoveredDependency.create_from_data(
3012+
project1, dependency_data={}, resolved_to_package=package1
29923013
)
2993-
self.assertEqual(dependency_count, DiscoveredDependency.objects.count())
2994-
message = project1.projectmessages.latest("created_date")
2995-
self.assertEqual("DiscoveredDependency", message.model)
2996-
self.assertEqual(ProjectMessage.Severity.WARNING, message.severity)
2997-
expected_message = "No values for the following required fields: dependency_uid"
2998-
self.assertEqual(expected_message, message.description)
2999-
self.assertEqual(dependency_data1["purl"], message.details["purl"])
3000-
self.assertEqual("", message.details["dependency_uid"])
3001-
self.assertEqual("", message.traceback)
3014+
self.assertTrue(dependency2.is_project_dependency)
3015+
self.assertFalse(dependency2.is_package_dependency)
3016+
self.assertTrue(dependency2.is_resolved_to_package)
30023017

30033018
def test_scanpipe_discovered_package_model_unique_package_uid_in_project(self):
30043019
project1 = make_project("Analysis")

0 commit comments

Comments
 (0)