Skip to content

Commit 9994223

Browse files
Merge pull request #3734 from nexB/add-mariner-support
Add RPM mariner package detection support
2 parents 04e24e0 + 283bec9 commit 9994223

File tree

20 files changed

+8712
-226
lines changed

20 files changed

+8712
-226
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ TAGS
108108
Procfile
109109
local.cfg
110110
geckodriver.log
111-
var
112111
.metaflow
113112
selenium
114113
/dist/

src/packagedcode/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,10 @@
216216

217217
debian.DebianInstalledFilelistHandler,
218218
debian.DebianInstalledMd5sumFilelistHandler,
219-
debian.DebianInstalledStatusDatabaseHandler
219+
debian.DebianInstalledStatusDatabaseHandler,
220+
221+
rpm.RpmLicenseFilesHandler,
222+
rpm.RpmMarinerContainerManifestHandler
220223
]
221224

222225
if on_linux:

src/packagedcode/licensing.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ def logger_debug(*args):
6060
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
6161

6262

63+
RESOURCE_TO_PACKAGE_LICENSE_FIELDS = {
64+
'detected_license_expression': 'declared_license_expression',
65+
'detected_license_expression_spdx': 'declared_license_expression_spdx',
66+
'license_detections': 'license_detections',
67+
}
68+
69+
6370
def add_referenced_license_matches_for_package(resource, codebase):
6471
"""
6572
Return an updated ``resource`` saving it in place, after adding new license

src/packagedcode/models.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
from commoncode.datautils import String
2929
from commoncode.fileutils import as_posixpath
3030
from commoncode.resource import Resource
31+
from license_expression import combine_expressions
32+
from license_expression import Licensing
33+
3134
try:
3235
from typecode import contenttype
3336
except ImportError:
@@ -118,11 +121,8 @@
118121
- IdentifiablePackageData: a base class for a Package-like class with a Package URL.
119122
"""
120123

121-
SCANCODE_DEBUG_PACKAGE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
122-
SCANCODE_DEBUG_PACKAGE_ASSEMBLY = os.environ.get('SCANCODE_DEBUG_PACKAGE_ASSEMBLY', False)
123-
124-
TRACE = SCANCODE_DEBUG_PACKAGE
125-
TRACE_UPDATE = SCANCODE_DEBUG_PACKAGE_ASSEMBLY
124+
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
125+
TRACE_UPDATE = os.environ.get('SCANCODE_DEBUG_PACKAGE_ASSEMBLY', False)
126126

127127

128128
def logger_debug(*args):
@@ -1618,6 +1618,8 @@ def update(
16181618
include_qualifiers=False,
16191619
include_subpath=False,
16201620
ignore_name_check=False,
1621+
default_relation='AND',
1622+
licensing=Licensing(),
16211623
):
16221624
"""
16231625
Update this Package with data from the ``package_data`` PackageData.
@@ -1670,16 +1672,16 @@ def update(
16701672
'file_references',
16711673
])
16721674

1675+
license_modified = False
16731676
for name, value in existing.items():
16741677
new_value = new_package_data.get(name)
1678+
if not new_value:
1679+
if TRACE_UPDATE: logger_debug(f' No new value: {name!r}: skipping')
1680+
continue
16751681

16761682
if TRACE_UPDATE:
16771683
logger_debug(f'update: {name!r}={value!r} with new_value: {new_value!r}')
16781684

1679-
if not new_value:
1680-
if TRACE_UPDATE: logger_debug(' No new value: skipping')
1681-
continue
1682-
16831685
if not value:
16841686
if TRACE_UPDATE: logger_debug(' set existing value to new')
16851687
setattr(self, name, new_value)
@@ -1694,6 +1696,18 @@ def update(
16941696
if name == 'extra_data':
16951697
value.update(new_value)
16961698

1699+
if 'license_detections' in name:
1700+
license_modified = True
1701+
license_keys = licensing.license_keys(
1702+
expression=new_package_data.get("declared_license_expression"),
1703+
unique=True,
1704+
)
1705+
if name == 'license_detections' and len(license_keys) > 1:
1706+
setattr(self, 'other_license_detections', new_value)
1707+
else:
1708+
merged = value + new_value
1709+
setattr(self, name, merged)
1710+
16971711
if name in list_fields:
16981712
if TRACE_UPDATE: logger_debug(' merge lists of values')
16991713
merged = merge_sequences(list1=value, list2=new_value)
@@ -1702,8 +1716,36 @@ def update(
17021716
elif TRACE_UPDATE and value != new_value:
17031717
if TRACE_UPDATE: logger_debug(' skipping update: no replace')
17041718

1719+
if license_modified:
1720+
self.refresh_license_expressions(default_relation=default_relation)
1721+
17051722
return True
17061723

1724+
def refresh_license_expressions(self, default_relation='AND'):
1725+
if self.license_detections:
1726+
self.declared_license_expression = str(combine_expressions(
1727+
expressions=[
1728+
detection["license_expression"]
1729+
for detection in self.license_detections
1730+
],
1731+
relation=default_relation,
1732+
))
1733+
self.declared_license_expression_spdx = get_declared_license_expression_spdx(
1734+
declared_license_expression=self.declared_license_expression,
1735+
)
1736+
1737+
if self.other_license_detections:
1738+
self.other_license_expression = str(combine_expressions(
1739+
expressions=[
1740+
detection["license_expression"]
1741+
for detection in self.other_license_detections
1742+
],
1743+
relation=default_relation,
1744+
))
1745+
self.other_license_expression_spdx = get_declared_license_expression_spdx(
1746+
declared_license_expression=self.other_license_expression,
1747+
)
1748+
17071749
def get_packages_files(self, codebase):
17081750
"""
17091751
Yield all the Resource of this package found in codebase.

src/packagedcode/rpm.py

Lines changed: 166 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,25 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import io
11+
import os
12+
import fnmatch
1013
import logging
1114
import sys
1215
from collections import namedtuple
1316
from pathlib import Path
1417

1518
from packagedcode import models
1619
from packagedcode import nevra
20+
from packagedcode.licensing import RESOURCE_TO_PACKAGE_LICENSE_FIELDS
1721
from packagedcode.pyrpm import RPM
1822
from packagedcode.rpm_installed import collect_installed_rpmdb_xmlish_from_rpmdb_loc
1923
from packagedcode.rpm_installed import parse_rpm_xmlish
2024
from packagedcode.utils import build_description
2125
from packagedcode.utils import get_ancestor
26+
from scancode.api import get_licenses
2227

23-
TRACE = False
28+
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)
2429

2530

2631
def logger_debug(*args):
@@ -374,6 +379,166 @@ def parse(cls, location, package_only=False):
374379
yield models.PackageData.from_data(package_data, package_only)
375380

376381

382+
class RpmMarinerContainerManifestHandler(models.DatafileHandler):
383+
datasource_id = 'rpm_mariner_manifest'
384+
# container-manifest-1 is more minimal and has the same data
385+
path_patterns = ('*var/lib/rpmmanifest/container-manifest-2',)
386+
default_package_type = 'rpm'
387+
default_package_namespace = 'mariner'
388+
description = 'RPM mariner distroless package manifest'
389+
documentation_url = 'https://github.com/microsoft/marinara/'
390+
391+
manifest_attributes = [
392+
"name",
393+
"version",
394+
"n1",
395+
"n2",
396+
"party",
397+
"n3",
398+
"n4",
399+
"arch",
400+
"checksum_algo",
401+
"filename"
402+
]
403+
404+
@classmethod
405+
def parse(cls, location, package_only=False):
406+
with io.open(location, encoding='utf-8') as data:
407+
lines = data.readlines()
408+
409+
for line in lines:
410+
line = line.rstrip("\n")
411+
metadata = line.split("\t")
412+
413+
package_data = {
414+
"type": cls.default_package_type,
415+
"namespace": cls.default_package_namespace,
416+
"datasource_id": cls.datasource_id,
417+
}
418+
for key, value in zip(cls.manifest_attributes, metadata):
419+
package_data[key] = value
420+
421+
package_data = cls.clean_mariner_manifest_data(package_data)
422+
yield models.PackageData.from_data(package_data=package_data)
423+
424+
@classmethod
425+
def assemble(cls, package_data, resource, codebase, package_adder):
426+
427+
levels_up = len('var/lib/rpmmanifest/container-manifest-2'.split('/'))
428+
root_resource = get_ancestor(
429+
levels_up=levels_up,
430+
resource=resource,
431+
codebase=codebase,
432+
)
433+
package_name = package_data.name
434+
435+
package = models.Package.from_package_data(
436+
package_data=package_data,
437+
datafile_path=resource.path,
438+
)
439+
package_uid = package.package_uid
440+
441+
assemblable_paths = tuple(set([
442+
f'*usr/share/licenses/{package_name}/COPYING*',
443+
f'*usr/share/licenses/{package_name}/LICENSE*',
444+
]))
445+
446+
resources = []
447+
for res in root_resource.walk(codebase):
448+
if TRACE:
449+
logger_debug(f' rpm: mariner assemble: root_walk: res: {res}')
450+
if not any([
451+
fnmatch.fnmatch(name=res.location, pat=pattern)
452+
for pattern in assemblable_paths
453+
]):
454+
continue
455+
456+
if TRACE:
457+
logger_debug(f' rpm: mariner assemble: pattern matched for: res: {res}')
458+
459+
for pkgdt in res.package_data:
460+
package_data = models.PackageData.from_dict(pkgdt)
461+
if TRACE:
462+
logger_debug(f' rpm: mariner assemble: package_data: {package_data.declared_license_expression}')
463+
464+
package.update(
465+
package_data=package_data,
466+
datafile_path=res.path,
467+
check_compatible=False,
468+
replace=False,
469+
include_version=False,
470+
include_qualifiers=False,
471+
include_subpath=False,
472+
)
473+
474+
package_adder(package_uid, res, codebase)
475+
resources.append(res)
476+
477+
yield package
478+
yield from resources
479+
480+
@staticmethod
481+
def clean_mariner_manifest_data(package_data):
482+
ignore_attributes = ["n1", "n2", "n3", "n4", "checksum_algo"]
483+
for attribute in ignore_attributes:
484+
package_data.pop(attribute)
485+
486+
if arch := package_data.pop("arch"):
487+
package_data["qualifiers"] = {"arch": arch}
488+
489+
if filename := package_data.pop("filename"):
490+
package_data["extra_data"] = {"filename": filename}
491+
492+
if party := package_data.pop("party"):
493+
party_obj = models.Party(
494+
type=models.party_org,
495+
role="owner",
496+
name=party,
497+
)
498+
package_data["parties"] = [party_obj.to_dict()]
499+
500+
return package_data
501+
502+
503+
class RpmLicenseFilesHandler(models.NonAssemblableDatafileHandler):
504+
datasource_id = 'rpm_package_licenses'
505+
path_patterns = (
506+
'*usr/share/licenses/*/COPYING*',
507+
'*usr/share/licenses/*/LICENSE*',
508+
)
509+
default_package_type = 'rpm'
510+
default_package_namespace = 'mariner'
511+
description = 'RPM mariner distroless package license files'
512+
documentation_url = 'https://github.com/microsoft/marinara/'
513+
514+
@classmethod
515+
def parse(cls, location, package_only=False):
516+
517+
# The license files are in a directory which is the package name,
518+
# for example: "/usr/share/licenses/openssl/LICENSE"
519+
name = location.split('/usr/share/licenses/').pop().split('/')[0]
520+
package_data = models.PackageData(
521+
type=cls.default_package_type,
522+
namespace=cls.default_package_namespace,
523+
name=name,
524+
datasource_id=cls.datasource_id,
525+
)
526+
527+
if package_only:
528+
yield package_data
529+
530+
resource_license_attributes = get_licenses(
531+
location=location,
532+
include_text=True,
533+
license_diagnostics=True,
534+
license_text_diagnostics=True,
535+
)
536+
for key, key_pkg in RESOURCE_TO_PACKAGE_LICENSE_FIELDS.items():
537+
setattr(package_data, key_pkg, resource_license_attributes.get(key))
538+
539+
yield package_data
540+
541+
377542
ALGO_BY_ID = {
378543
None: 'md5',
379544
0: 'md5',

tests/packagedcode/data/chef/package.scan.expected.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@
5656
}
5757
],
5858
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
59+
},
60+
{
61+
"license_expression": "mit",
62+
"license_expression_spdx": "MIT",
63+
"matches": [
64+
{
65+
"license_expression": "mit",
66+
"spdx_license_expression": "MIT",
67+
"from_file": "package/metadata.json",
68+
"start_line": 1,
69+
"end_line": 1,
70+
"matcher": "1-spdx-id",
71+
"score": 100.0,
72+
"matched_length": 1,
73+
"match_coverage": 100.0,
74+
"rule_relevance": 100,
75+
"rule_identifier": "spdx-license-identifier-mit-5da48780aba670b0860c46d899ed42a0f243ff06",
76+
"rule_url": null,
77+
"matched_text": "MIT"
78+
}
79+
],
80+
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
5981
}
6082
],
6183
"other_license_expression": null,

tests/packagedcode/data/plugin/chef-package-expected.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@
5656
}
5757
],
5858
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
59+
},
60+
{
61+
"license_expression": "mit",
62+
"license_expression_spdx": "MIT",
63+
"matches": [
64+
{
65+
"license_expression": "mit",
66+
"spdx_license_expression": "MIT",
67+
"from_file": "package/metadata.json",
68+
"start_line": 1,
69+
"end_line": 1,
70+
"matcher": "1-spdx-id",
71+
"score": 100.0,
72+
"matched_length": 1,
73+
"match_coverage": 100.0,
74+
"rule_relevance": 100,
75+
"rule_identifier": "spdx-license-identifier-mit-5da48780aba670b0860c46d899ed42a0f243ff06",
76+
"rule_url": null,
77+
"matched_text": "MIT"
78+
}
79+
],
80+
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
5981
}
6082
],
6183
"other_license_expression": null,

0 commit comments

Comments
 (0)