Skip to content

Commit 3b53f43

Browse files
Merge branch 'develop' into purls-only-v2
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
2 parents 86e4e90 + 0c9fd4b commit 3b53f43

File tree

88 files changed

+3340
-13964
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+3340
-13964
lines changed

src/cluecode/copyrights.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2517,7 +2517,8 @@ def build_detection_from_node(
25172517
COMPANY: {<COMPANY> <MAINT>} #19603
25182518
25192519
2520-
#######################################
2520+
################################# #COPYRIGHT: {<COPY> <COPY> <MIT>} #1802
2521+
######
25212522
# VARIOUS FORMS OF COPYRIGHT
25222523
#######################################
25232524
@@ -2572,6 +2573,8 @@ def build_detection_from_node(
25722573
25732574
COPYRIGHT: {<COPY> <COPY> <COMP>+} #1690
25742575
2576+
COPYRIGHT: {<COPY> <COPY> <MIT>} #1802
2577+
25752578
COPYRIGHT: {<COPY> <COPY> <NN>+ <COMPANY|NAME|NAME-EMAIL>+} #1710
25762579
25772580
COPYRIGHT: {<COPY> <COPY> <NN> <NNP> <NN> <COMPANY>} #1711
@@ -4125,8 +4128,10 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
41254128

41264129
# normalize (possibly repeated) quotes to unique single quote '
41274130
# backticks ` and "
4128-
.replace('`', u"'")
4129-
.replace('"', u"'")
4131+
.replace('`', "'")
4132+
.replace('"', "'")
4133+
# see https://github.com/nexB/scancode-toolkit/issues/3667
4134+
.replace('§', " ")
41304135
)
41314136

41324137
if TRACE_TOK:

src/packagedcode/__init__.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
from packagedcode import godeps
2929
from packagedcode import golang
3030
from packagedcode import haxe
31-
from packagedcode import jar_manifest
3231
from packagedcode import maven
3332
from packagedcode import misc
3433
from packagedcode import npm
@@ -84,6 +83,7 @@
8483
cran.CranDescriptionFileHandler,
8584

8685
debian_copyright.DebianCopyrightFileInPackageHandler,
86+
debian_copyright.StandaloneDebianCopyrightFileHandler,
8787
debian.DebianDscFileHandler,
8888

8989
debian.DebianControlFileInExtractedDebHandler,
@@ -216,13 +216,7 @@
216216

217217
debian.DebianInstalledFilelistHandler,
218218
debian.DebianInstalledMd5sumFilelistHandler,
219-
debian.DebianInstalledStatusDatabaseHandler,
220-
debian.DebianControlFileInSourceHandler,
221-
debian.DebianDscFileHandler,
222-
debian.DebianSourcePackageTarballHandler,
223-
debian.DebianSourcePackageMetadataTarballHandler,
224-
debian.DebianDebPackageHandler,
225-
debian_copyright.StandaloneDebianCopyrightFileHandler
219+
debian.DebianInstalledStatusDatabaseHandler
226220
]
227221

228222
if on_linux:

src/packagedcode/build.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def logger_debug(*args):
4747
)
4848

4949

50-
class AutotoolsConfigureHandler(models.DatafileHandler):
50+
class AutotoolsConfigureHandler(models.NonAssemblableDatafileHandler):
5151
datasource_id = 'autotools_configure'
5252
path_patterns = ('*/configure', '*/configure.ac',)
5353
default_package_type = 'autotools'
@@ -75,14 +75,6 @@ def parse(cls, location, package_only=False):
7575
)
7676
yield models.PackageData.from_data(package_data, package_only)
7777

78-
@classmethod
79-
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
80-
models.DatafileHandler.assign_package_to_parent_tree(
81-
package=package,
82-
resource=resource,
83-
codebase=codebase,
84-
package_adder=package_adder,
85-
)
8678

8779

8880
def check_rule_name_ending(rule_name, starlark_rule_types=('binary', 'library')):

src/packagedcode/cargo.py

Lines changed: 99 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import os
1011
import re
1112

1213
import saneyaml
@@ -20,7 +21,81 @@
2021
"""
2122

2223

23-
class CargoTomlHandler(models.DatafileHandler):
24+
class CargoBaseHandler(models.DatafileHandler):
25+
@classmethod
26+
def assemble(cls, package_data, resource, codebase, package_adder):
27+
"""
28+
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
29+
support cargo workspaces where we have multiple packages from
30+
a repository and some shared information present at top-level.
31+
"""
32+
workspace = package_data.extra_data.get("workspace", {})
33+
workspace_members = workspace.get("members", [])
34+
workspace_package_data = workspace.get("package", {})
35+
attributes_to_copy = [
36+
"license_detections",
37+
"declared_license_expression",
38+
"declared_license_expression_spdx"
39+
]
40+
if "license" in workspace_package_data:
41+
for attribute in attributes_to_copy:
42+
workspace_package_data[attribute] = getattr(package_data, attribute)
43+
44+
workspace_root_path = resource.parent(codebase).path
45+
if workspace_package_data and workspace_members:
46+
for workspace_member_path in workspace_members:
47+
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
48+
workspace_directory = codebase.get_resource(path=workspace_directory_path)
49+
if not workspace_directory:
50+
continue
51+
52+
# Update the package data for all members with the
53+
# workspace package data
54+
for resource in workspace_directory.children(codebase):
55+
if cls.is_datafile(location=resource.location):
56+
if not resource.package_data:
57+
continue
58+
59+
updated_package_data = cls.update_resource_package_data(
60+
package_data=workspace_package_data,
61+
old_package_data=resource.package_data.pop(),
62+
mapping=CARGO_ATTRIBUTE_MAPPING,
63+
)
64+
resource.package_data.append(updated_package_data)
65+
resource.save(codebase)
66+
67+
yield from cls.assemble_from_many_datafiles(
68+
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
69+
directory=workspace_directory,
70+
codebase=codebase,
71+
package_adder=package_adder,
72+
)
73+
else:
74+
yield from cls.assemble_from_many_datafiles(
75+
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
76+
directory=resource.parent(codebase),
77+
codebase=codebase,
78+
package_adder=package_adder,
79+
)
80+
81+
@classmethod
82+
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):
83+
84+
for attribute in old_package_data.keys():
85+
if attribute in mapping:
86+
replace_by_attribute = mapping.get(attribute)
87+
old_package_data[attribute] = package_data.get(replace_by_attribute)
88+
elif attribute == "parties":
89+
old_package_data[attribute] = list(get_parties(
90+
person_names=package_data.get("authors"),
91+
party_role='author',
92+
))
93+
94+
return old_package_data
95+
96+
97+
98+
class CargoTomlHandler(CargoBaseHandler):
2499
datasource_id = 'cargo_toml'
25100
path_patterns = ('*/Cargo.toml', '*/cargo.toml',)
26101
default_package_type = 'cargo'
@@ -31,11 +106,16 @@ class CargoTomlHandler(models.DatafileHandler):
31106
@classmethod
32107
def parse(cls, location, package_only=False):
33108
package_data = toml.load(location, _dict=dict)
34-
35109
core_package_data = package_data.get('package', {})
110+
workspace = package_data.get('workspace', {})
111+
extra_data = {}
36112

37113
name = core_package_data.get('name')
38114
version = core_package_data.get('version')
115+
if isinstance(version, dict) and "workspace" in version:
116+
version = None
117+
extra_data["version"] = "workspace"
118+
39119
description = core_package_data.get('description') or ''
40120
description = description.strip()
41121

@@ -66,6 +146,8 @@ def parse(cls, location, package_only=False):
66146
repository_homepage_url = name and f'https://crates.io/crates/{name}'
67147
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
68148
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
149+
if workspace:
150+
extra_data["workspace"] = workspace
69151

70152
package_data = dict(
71153
datasource_id=cls.datasource_id,
@@ -82,23 +164,25 @@ def parse(cls, location, package_only=False):
82164
repository_download_url=repository_download_url,
83165
api_data_url=api_data_url,
84166
dependencies=dependencies,
167+
extra_data=extra_data,
85168
)
86169
yield models.PackageData.from_data(package_data, package_only)
87170

88-
@classmethod
89-
def assemble(cls, package_data, resource, codebase, package_adder):
90-
"""
91-
Assemble Cargo.toml and possible Cargo.lock datafiles
92-
"""
93-
yield from cls.assemble_from_many_datafiles(
94-
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
95-
directory=resource.parent(codebase),
96-
codebase=codebase,
97-
package_adder=package_adder,
98-
)
171+
172+
CARGO_ATTRIBUTE_MAPPING = {
173+
# Fields in PackageData model: Fields in cargo
174+
"homepage_url": "homepage",
175+
"vcs_url": "repository",
176+
"keywords": "categories",
177+
"extracted_license_statement": "license",
178+
# These are fields carried over to avoid re-detection of licenses
179+
"license_detections": "license_detections",
180+
"declared_license_expression": "declared_license_expression",
181+
"declared_license_expression_spdx": "declared_license_expression_spdx",
182+
}
99183

100184

101-
class CargoLockHandler(models.DatafileHandler):
185+
class CargoLockHandler(CargoBaseHandler):
102186
datasource_id = 'cargo_lock'
103187
path_patterns = ('*/Cargo.lock', '*/cargo.lock',)
104188
default_package_type = 'cargo'
@@ -146,18 +230,6 @@ def parse(cls, location, package_only=False):
146230
)
147231
yield models.PackageData.from_data(package_data, package_only)
148232

149-
@classmethod
150-
def assemble(cls, package_data, resource, codebase, package_adder):
151-
"""
152-
Assemble Cargo.toml and possible Cargo.lock datafiles
153-
"""
154-
yield from cls.assemble_from_many_datafiles(
155-
datafile_name_patterns=('Cargo.toml', 'Cargo.lock',),
156-
directory=resource.parent(codebase),
157-
codebase=codebase,
158-
package_adder=package_adder,
159-
)
160-
161233

162234
def dependency_mapper(dependencies, scope='dependencies'):
163235
"""
@@ -199,7 +271,7 @@ def get_parties(person_names, party_role):
199271
name=name,
200272
role=party_role,
201273
email=email,
202-
)
274+
).to_dict()
203275

204276

205277
person_parser = re.compile(

src/packagedcode/debian.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,10 @@ def parse_debian_files_list(location, datasource_id, package_type):
588588
name, _, arch = filename.partition(':')
589589
qualifiers['arch'] = arch
590590
else:
591-
name = filename
591+
name = None
592+
# For DebianMd5sumFilelistInPackageHandler we cannot infer name
593+
if not name == "md5sums":
594+
name = filename
592595

593596
file_references = []
594597
with open(location) as info_file:
@@ -650,12 +653,14 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
650653

651654
maintainer = debian_data.get('maintainer')
652655
if maintainer:
653-
party = models.Party(role='maintainer', name=maintainer)
656+
maintainer_name, maintainer_email = parse_debian_maintainers(maintainer)
657+
party = models.Party(role='maintainer', name=maintainer_name, email=maintainer_email)
654658
parties.append(party)
655659

656660
orig_maintainer = debian_data.get('original_maintainer')
657661
if orig_maintainer:
658-
party = models.Party(role='original_maintainer', name=orig_maintainer)
662+
maintainer_name, maintainer_email = parse_debian_maintainers(orig_maintainer)
663+
party = models.Party(role='maintainer', name=maintainer_name, email=maintainer_email)
659664
parties.append(party)
660665

661666
keywords = []
@@ -716,6 +721,26 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
716721
return models.PackageData.from_data(package_data, package_only)
717722

718723

724+
def parse_debian_maintainers(maintainer):
725+
"""
726+
Get name and email values from a debian maintainer string.
727+
728+
Example string:
729+
Debian systemd Maintainers <pkg-systemd-maintainers@lists.alioth.debian.org>
730+
"""
731+
email_wrappers = ["<", ">"]
732+
has_email = "@" in maintainer and all([
733+
True
734+
for char in email_wrappers
735+
if char in maintainer
736+
])
737+
if not has_email:
738+
return maintainer, None
739+
740+
name, _, email = maintainer.rpartition("<")
741+
return name.rstrip(" "), email.rstrip(">")
742+
743+
719744
def populate_debian_namespace(packages):
720745
"""
721746
For an iterable of debian `packages`, populate the

src/packagedcode/gemfile_lock.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,8 @@ def get_option(s):
340340
'%(NAME_VERSION)s'
341341
'$' % locals()).match
342342

343-
PLATS = re.compile('^ (?P<platform>.*)$').match
344-
BUNDLED_WITH = re.compile('^\s+(?P<version>(?:\d+.)+\d+)\s*$').match
343+
PLATS = re.compile(r'^ (?P<platform>.*)$').match
344+
BUNDLED_WITH = re.compile(r'^\s+(?P<version>(?:\d+.)+\d+)\s*$').match
345345

346346

347347
class GemfileLockParser:

src/packagedcode/licensing.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,12 @@ def get_normalized_license_detections(
709709
if detections:
710710
license_detections.extend(detections)
711711

712+
if not license_detections:
713+
unknown_dict_object = repr(dict(extracted_license.items()))
714+
unknown_detection = get_unknown_license_detection(query_string=unknown_dict_object)
715+
license_detections.append(unknown_detection)
716+
if TRACE:
717+
logger_debug(f'get_normalized_license_detections: dict: unknown_dict_object: {unknown_dict_object}, unknown_detection: {saneyaml.dump(unknown_detection.to_dict())}')
712718
else:
713719
extracted_license_statement = saneyaml.dump(extracted_license)
714720
license_detections = get_license_detections_for_extracted_license_statement(
@@ -753,7 +759,6 @@ def get_normalized_license_detections(
753759

754760
else:
755761
extracted_license_statement = saneyaml.dump(extracted_license_item)
756-
757762
detections = get_license_detections_for_extracted_license_statement(
758763
extracted_license_statement=extracted_license_statement,
759764
try_as_expression=try_as_expression,
@@ -819,6 +824,7 @@ def get_license_detections_and_expression(
819824
if not license_detections:
820825
if not isinstance(extracted_license_statement, str):
821826
extracted_license_statement = saneyaml.dump(extracted_license_statement)
827+
822828
license_detection = get_unknown_license_detection(query_string=extracted_license_statement)
823829
license_detections = [license_detection]
824830

src/packagedcode/models.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -759,9 +759,14 @@ def normalize_extracted_license_statement(self):
759759
self.extracted_license_statement and
760760
not isinstance(self.extracted_license_statement, str)
761761
):
762-
self.extracted_license_statement = saneyaml.dump(
763-
self.extracted_license_statement
764-
)
762+
if isinstance(self.extracted_license_statement, dict):
763+
self.extracted_license_statement = saneyaml.dump(
764+
dict(self.extracted_license_statement.items())
765+
)
766+
else:
767+
self.extracted_license_statement = saneyaml.dump(
768+
self.extracted_license_statement
769+
)
765770

766771
def populate_holder_field(self):
767772
if not self.copyright:

src/packagedcode/plugin_package.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
420420
for dfp in item.datafile_paths
421421
]
422422
packages.append(item)
423+
if TRACE:
424+
logger_debug(' get_package_and_deps: Package:', item.purl)
423425

424426
elif isinstance(item, Dependency):
425427
if strip_root and not has_single_resource:

0 commit comments

Comments
 (0)