Skip to content

Commit 08c54b1

Browse files
Resolve dependencies from lockfiles (#1244)
* Resolve dependencies from lockfiles #1237 Reference: #1237 Reference: #1066 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Address feedback and add improvements Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Improve dependency resolving from lockfiles #1237 Resolves dependency for cases where multiple requirements are resolved by one package and all the version requirements are joined for that package. Reference: #1237 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Update scancode-toolkit and fix tests Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Bump scancode-toolkit to v32.2.0 Reference: https://github.com/nexB/scancode-toolkit/releases/tag/v32.2.0 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Regenerate test fixtures and expectations Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Improve dependency resolver for lockfiles Handle various lockfile cases where: * Same package/dependencies are present in different lockfiles * Independent lockfiles without a manifest and root package * Ecosystems which have only a single version of package in their environment * Dependency graphs where a resolved package can have many parent packages. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Address feedback and refactor code Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * FIx bugs for resolving python packages Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Add unit tests and refactor code Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Address comments and add CHANGELOG entries Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> --------- Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 53ce3b0 commit 08c54b1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2913
-661
lines changed

CHANGELOG.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,24 @@ v34.6.4 (unreleased)
1515
related work directories created more than a specified number of days ago.
1616
https://github.com/nexB/scancode.io/issues/1289
1717

18+
- Update the ``inspect_packages`` pipeline to have an optional ``Static Resolver``
19+
group to create resolved packages and dependency relationships from lockfiles
20+
and manifests having pre-resolved dependencies. Also update this pipeline to
21+
perform package assembly from multiple manifests and files to create
22+
discovered packages. Also update the ``resolve_dependencies`` pipeline to have
23+
the same ``Static Resolver`` group and mode the dynamic resolution part to a new
24+
optional ``Dynamic Resolver`` group.
25+
See https://github.com/nexB/scancode.io/pull/1244
26+
27+
- Add a new attribute ``is_direct`` to the DiscoveredDependency model and two new
28+
attributes ``is_private`` and ``is_virtual`` to the DiscoveredPackage model.
29+
Also update the UIs to show these attributes and show the ``package_data`` field
30+
contents for CodebaseResources in the ``extra_data`` tab.
31+
See https://github.com/nexB/scancode.io/pull/1244
32+
33+
- Update scancode-toolkit to version ``32.2.0``. For the complete list of updates
34+
and improvements see https://github.com/nexB/scancode-toolkit/releases/tag/v32.2.0
35+
1836
v34.6.3 (2024-06-21)
1937
--------------------
2038

scanpipe/api/serializers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ class Meta:
384384
"source_packages",
385385
"extra_data",
386386
"package_uid",
387+
"is_private",
388+
"is_virtual",
387389
"datasource_ids",
388390
"datafile_paths",
389391
"file_references",
@@ -409,6 +411,7 @@ class Meta:
409411
"is_runtime",
410412
"is_optional",
411413
"is_resolved",
414+
"is_direct",
412415
"dependency_uid",
413416
"for_package_uid",
414417
"resolved_to_package_uid",

scanpipe/filters.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,8 @@ class PackageFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
688688
declared_license_expression = django_filters.filters.CharFilter(
689689
widget=HasValueDropdownWidget
690690
)
691+
is_private = StrictBooleanFilter()
692+
is_virtual = StrictBooleanFilter()
691693

692694
class Meta:
693695
model = DiscoveredPackage
@@ -721,6 +723,8 @@ class Meta:
721723
"is_vulnerable",
722724
"compliance_alert",
723725
"tag",
726+
"is_private",
727+
"is_virtual",
724728
]
725729

726730

@@ -731,6 +735,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
731735
"is_runtime",
732736
"is_optional",
733737
"is_resolved",
738+
"is_direct",
734739
"datasource_id",
735740
"is_vulnerable",
736741
]
@@ -751,6 +756,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
751756
"is_runtime",
752757
"is_optional",
753758
"is_resolved",
759+
"is_direct",
754760
"for_package",
755761
"resolved_to_package",
756762
"datafile_resource",
@@ -765,6 +771,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
765771
is_runtime = StrictBooleanFilter()
766772
is_optional = StrictBooleanFilter()
767773
is_resolved = StrictBooleanFilter()
774+
is_direct = StrictBooleanFilter()
768775
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")
769776

770777
class Meta:
@@ -783,6 +790,7 @@ class Meta:
783790
"is_runtime",
784791
"is_optional",
785792
"is_resolved",
793+
"is_direct",
786794
"datasource_id",
787795
"is_vulnerable",
788796
]
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Generated by Django 5.0.6 on 2024-06-04 20:48
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("scanpipe", "0061_codebaseresource_is_legal_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="discovereddependency",
15+
name="is_direct",
16+
field=models.BooleanField(
17+
default=False,
18+
help_text="True if this is a direct, first-level dependency relationship for a package.",
19+
),
20+
),
21+
migrations.AddField(
22+
model_name="discoveredpackage",
23+
name="is_private",
24+
field=models.BooleanField(
25+
default=False,
26+
help_text="True if this is a private package, either not meant to be published on a repository, and/or a local package without a name and version used primarily to track dependencies and other information.",
27+
),
28+
),
29+
migrations.AddField(
30+
model_name="discoveredpackage",
31+
name="is_virtual",
32+
field=models.BooleanField(
33+
default=False,
34+
help_text="True if this package is created only from a manifest or lockfile, and not from its actual packaged code. The files of this package are not present in the codebase.",
35+
),
36+
),
37+
migrations.AlterField(
38+
model_name="discovereddependency",
39+
name="is_optional",
40+
field=models.BooleanField(
41+
default=False,
42+
help_text="True if this dependency is an optional dependency",
43+
),
44+
),
45+
migrations.AlterField(
46+
model_name="discovereddependency",
47+
name="is_resolved",
48+
field=models.BooleanField(
49+
default=False,
50+
help_text="True if this dependency version requirement has been pinned and this dependency points to an exact version.",
51+
),
52+
),
53+
migrations.AlterField(
54+
model_name="discovereddependency",
55+
name="is_runtime",
56+
field=models.BooleanField(
57+
default=False,
58+
help_text="True if this dependency is a runtime dependency.",
59+
),
60+
),
61+
migrations.AddIndex(
62+
model_name="discovereddependency",
63+
index=models.Index(
64+
fields=["is_direct"], name="scanpipe_di_is_dire_6dc594_idx"
65+
),
66+
),
67+
migrations.AddIndex(
68+
model_name="discoveredpackage",
69+
index=models.Index(
70+
fields=["is_private"], name="scanpipe_di_is_priv_9ffd1a_idx"
71+
),
72+
),
73+
migrations.AddIndex(
74+
model_name="discoveredpackage",
75+
index=models.Index(
76+
fields=["is_virtual"], name="scanpipe_di_is_virt_c5c176_idx"
77+
),
78+
),
79+
]

scanpipe/models.py

Lines changed: 103 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
from licensedcode.cache import get_licensing
7575
from matchcode_toolkit.fingerprinting import IGNORED_DIRECTORY_FINGERPRINTS
7676
from packagedcode.models import build_package_uid
77+
from packagedcode.utils import get_base_purl
7778
from packageurl import PackageURL
7879
from packageurl import normalize_qualifiers
7980
from packageurl.contrib.django.models import PackageURLMixin
@@ -1031,6 +1032,19 @@ def walk_codebase_path(self):
10311032
"""Return files and directories path of the codebase/ directory recursively."""
10321033
return self.codebase_path.rglob("*")
10331034

1035+
def get_resource(self, path):
1036+
"""
1037+
Return the codebase resource present for a given path,
1038+
or None the resource with that path does not exist.
1039+
This path is relative to the scan location.
1040+
This is same as the Codebase.get_resource() function.
1041+
"""
1042+
# We don't want to raise an exception if there is no resource
1043+
# as this function is also called from the SCTK side
1044+
resource = self.codebaseresources.get_or_none(path=path)
1045+
if resource:
1046+
return resource
1047+
10341048
@cached_property
10351049
def can_change_inputs(self):
10361050
"""
@@ -3061,6 +3075,23 @@ class AbstractPackage(models.Model):
30613075
blank=True,
30623076
help_text=_("A notice text for this package."),
30633077
)
3078+
is_private = models.BooleanField(
3079+
default=False,
3080+
help_text=_(
3081+
"True if this is a private package, either not meant to be "
3082+
"published on a repository, and/or a local package without a "
3083+
"name and version used primarily to track dependencies and "
3084+
"other information."
3085+
),
3086+
)
3087+
is_virtual = models.BooleanField(
3088+
default=False,
3089+
help_text=_(
3090+
"True if this package is created only from a manifest or lockfile, "
3091+
"and not from its actual packaged code. The files of this package "
3092+
"are not present in the codebase."
3093+
),
3094+
)
30643095
datasource_ids = models.JSONField(
30653096
default=list,
30663097
blank=True,
@@ -3163,6 +3194,8 @@ class Meta:
31633194
models.Index(fields=["sha512"]),
31643195
models.Index(fields=["compliance_alert"]),
31653196
models.Index(fields=["tag"]),
3197+
models.Index(fields=["is_private"]),
3198+
models.Index(fields=["is_virtual"]),
31663199
]
31673200
constraints = [
31683201
models.UniqueConstraint(
@@ -3190,15 +3223,7 @@ def purl(self):
31903223

31913224
@classmethod
31923225
def extract_purl_data(cls, package_data):
3193-
purl_data = {}
3194-
3195-
for field_name in PURL_FIELDS:
3196-
value = package_data.get(field_name)
3197-
if field_name == "qualifiers":
3198-
value = normalize_qualifiers(value, encode=True)
3199-
purl_data[field_name] = value or ""
3200-
3201-
return purl_data
3226+
return normalize_package_url_data(package_data)
32023227

32033228
@classmethod
32043229
def create_from_data(cls, project, package_data):
@@ -3530,9 +3555,28 @@ class DiscoveredDependency(
35303555
"The identifier for the datafile handler used to obtain this dependency."
35313556
),
35323557
)
3533-
is_runtime = models.BooleanField(default=False)
3534-
is_optional = models.BooleanField(default=False)
3535-
is_resolved = models.BooleanField(default=False)
3558+
is_runtime = models.BooleanField(
3559+
default=False,
3560+
help_text=_("True if this dependency is a runtime dependency."),
3561+
)
3562+
is_optional = models.BooleanField(
3563+
default=False,
3564+
help_text=_("True if this dependency is an optional dependency"),
3565+
)
3566+
is_resolved = models.BooleanField(
3567+
default=False,
3568+
help_text=_(
3569+
"True if this dependency version requirement has been pinned "
3570+
"and this dependency points to an exact version."
3571+
),
3572+
)
3573+
is_direct = models.BooleanField(
3574+
default=False,
3575+
help_text=_(
3576+
"True if this is a direct, first-level dependency relationship "
3577+
"for a package."
3578+
),
3579+
)
35363580

35373581
objects = DiscoveredDependencyQuerySet.as_manager()
35383582

@@ -3553,6 +3597,7 @@ class Meta:
35533597
models.Index(fields=["is_runtime"]),
35543598
models.Index(fields=["is_optional"]),
35553599
models.Index(fields=["is_resolved"]),
3600+
models.Index(fields=["is_direct"]),
35563601
]
35573602
constraints = [
35583603
models.UniqueConstraint(
@@ -3574,6 +3619,10 @@ def get_absolute_url(self):
35743619
def purl(self):
35753620
return self.package_url
35763621

3622+
@property
3623+
def base_purl(self):
3624+
return get_base_purl(self.package_url)
3625+
35773626
@property
35783627
def package_type(self):
35793628
return self.type
@@ -3599,6 +3648,7 @@ def create_from_data(
35993648
project,
36003649
dependency_data,
36013650
for_package=None,
3651+
resolved_to_package=None,
36023652
datafile_resource=None,
36033653
datasource_id=None,
36043654
strip_datafile_path_root=False,
@@ -3638,6 +3688,13 @@ def create_from_data(
36383688
package_uid=for_package_uid
36393689
)
36403690

3691+
if not resolved_to_package:
3692+
resolved_to_uid = dependency_data.get("resolved_to_uid")
3693+
if resolved_to_uid:
3694+
resolved_to_package = project.discoveredpackages.get(
3695+
package_uid=resolved_to_uid
3696+
)
3697+
36413698
if not datafile_resource:
36423699
datafile_path = dependency_data.get("datafile_path")
36433700
if datafile_path:
@@ -3663,10 +3720,25 @@ def create_from_data(
36633720
return cls.objects.create(
36643721
project=project,
36653722
for_package=for_package,
3723+
resolved_to_package=resolved_to_package,
36663724
datafile_resource=datafile_resource,
36673725
**cleaned_data,
36683726
)
36693727

3728+
@classmethod
3729+
def extract_purl_data(cls, dependency_data, ignore_nulls=False):
3730+
purl_mapping = PackageURL.from_string(
3731+
purl=dependency_data.get("purl"),
3732+
).to_dict()
3733+
3734+
return normalize_package_url_data(purl_mapping, ignore_nulls)
3735+
3736+
@classmethod
3737+
def populate_dependency_uuid(cls, dependency_data):
3738+
purl = PackageURL.from_string(purl=dependency_data.get("purl"))
3739+
purl.qualifiers["uuid"] = str(uuid.uuid4())
3740+
dependency_data["dependency_uid"] = purl.to_string()
3741+
36703742
@property
36713743
def spdx_id(self):
36723744
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"
@@ -3694,6 +3766,25 @@ def as_spdx(self):
36943766
)
36953767

36963768

3769+
def normalize_package_url_data(purl_mapping, ignore_nulls=False):
3770+
"""
3771+
Normalize a mapping of purl data so database queries with
3772+
purl data can be executed.
3773+
"""
3774+
normalized_purl_mapping = {}
3775+
for field_name in PURL_FIELDS:
3776+
value = purl_mapping.get(field_name)
3777+
if field_name == "qualifiers":
3778+
value = normalize_qualifiers(value, encode=True)
3779+
if not ignore_nulls:
3780+
normalized_purl_mapping[field_name] = value or ""
3781+
else:
3782+
if value:
3783+
normalized_purl_mapping[field_name] = value or ""
3784+
3785+
return normalized_purl_mapping
3786+
3787+
36973788
class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
36983789
target_url = models.URLField(_("Target URL"), max_length=1024)
36993790
created_date = models.DateTimeField(auto_now_add=True, editable=False)

0 commit comments

Comments
 (0)