Skip to content

Commit 0c9fd4b

Browse files
Merge pull request #3602 from nexB/support-cargo-workspaces
Support cargo workspaces
2 parents c80e502 + d626332 commit 0c9fd4b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+3006
-195
lines changed

src/packagedcode/cargo.py

Lines changed: 99 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import os
1011
import re
1112

1213
import saneyaml
@@ -20,7 +21,81 @@
2021
"""
2122

2223

23-
class CargoTomlHandler(models.DatafileHandler):
24+
class CargoBaseHandler(models.DatafileHandler):
25+
@classmethod
26+
def assemble(cls, package_data, resource, codebase, package_adder):
27+
"""
28+
Assemble Cargo.toml and possible Cargo.lock datafiles. Also
29+
support cargo workspaces where we have multiple packages from
30+
a repository and some shared information present at top-level.
31+
"""
32+
workspace = package_data.extra_data.get("workspace", {})
33+
workspace_members = workspace.get("members", [])
34+
workspace_package_data = workspace.get("package", {})
35+
attributes_to_copy = [
36+
"license_detections",
37+
"declared_license_expression",
38+
"declared_license_expression_spdx"
39+
]
40+
if "license" in workspace_package_data:
41+
for attribute in attributes_to_copy:
42+
workspace_package_data[attribute] = getattr(package_data, attribute)
43+
44+
workspace_root_path = resource.parent(codebase).path
45+
if workspace_package_data and workspace_members:
46+
for workspace_member_path in workspace_members:
47+
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
48+
workspace_directory = codebase.get_resource(path=workspace_directory_path)
49+
if not workspace_directory:
50+
continue
51+
52+
# Update the package data for all members with the
53+
# workspace package data
54+
for resource in workspace_directory.children(codebase):
55+
if cls.is_datafile(location=resource.location):
56+
if not resource.package_data:
57+
continue
58+
59+
updated_package_data = cls.update_resource_package_data(
60+
package_data=workspace_package_data,
61+
old_package_data=resource.package_data.pop(),
62+
mapping=CARGO_ATTRIBUTE_MAPPING,
63+
)
64+
resource.package_data.append(updated_package_data)
65+
resource.save(codebase)
66+
67+
yield from cls.assemble_from_many_datafiles(
68+
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
69+
directory=workspace_directory,
70+
codebase=codebase,
71+
package_adder=package_adder,
72+
)
73+
else:
74+
yield from cls.assemble_from_many_datafiles(
75+
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
76+
directory=resource.parent(codebase),
77+
codebase=codebase,
78+
package_adder=package_adder,
79+
)
80+
81+
@classmethod
82+
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):
83+
84+
for attribute in old_package_data.keys():
85+
if attribute in mapping:
86+
replace_by_attribute = mapping.get(attribute)
87+
old_package_data[attribute] = package_data.get(replace_by_attribute)
88+
elif attribute == "parties":
89+
old_package_data[attribute] = list(get_parties(
90+
person_names=package_data.get("authors"),
91+
party_role='author',
92+
))
93+
94+
return old_package_data
95+
96+
97+
98+
class CargoTomlHandler(CargoBaseHandler):
2499
datasource_id = 'cargo_toml'
25100
path_patterns = ('*/Cargo.toml', '*/cargo.toml',)
26101
default_package_type = 'cargo'
@@ -31,11 +106,16 @@ class CargoTomlHandler(models.DatafileHandler):
31106
@classmethod
32107
def parse(cls, location):
33108
package_data = toml.load(location, _dict=dict)
34-
35109
core_package_data = package_data.get('package', {})
110+
workspace = package_data.get('workspace', {})
111+
extra_data = {}
36112

37113
name = core_package_data.get('name')
38114
version = core_package_data.get('version')
115+
if isinstance(version, dict) and "workspace" in version:
116+
version = None
117+
extra_data["version"] = "workspace"
118+
39119
description = core_package_data.get('description') or ''
40120
description = description.strip()
41121

@@ -66,6 +146,8 @@ def parse(cls, location):
66146
repository_homepage_url = name and f'https://crates.io/crates/{name}'
67147
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
68148
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
149+
if workspace:
150+
extra_data["workspace"] = workspace
69151

70152
yield models.PackageData(
71153
datasource_id=cls.datasource_id,
@@ -82,22 +164,24 @@ def parse(cls, location):
82164
repository_download_url=repository_download_url,
83165
api_data_url=api_data_url,
84166
dependencies=dependencies,
167+
extra_data=extra_data,
85168
)
86169

87-
@classmethod
88-
def assemble(cls, package_data, resource, codebase, package_adder):
89-
"""
90-
Assemble Cargo.toml and possible Cargo.lock datafiles
91-
"""
92-
yield from cls.assemble_from_many_datafiles(
93-
datafile_name_patterns=('Cargo.toml', 'cargo.toml', 'Cargo.lock', 'cargo.lock'),
94-
directory=resource.parent(codebase),
95-
codebase=codebase,
96-
package_adder=package_adder,
97-
)
170+
171+
CARGO_ATTRIBUTE_MAPPING = {
172+
# Fields in PackageData model: Fields in cargo
173+
"homepage_url": "homepage",
174+
"vcs_url": "repository",
175+
"keywords": "categories",
176+
"extracted_license_statement": "license",
177+
# These are fields carried over to avoid re-detection of licenses
178+
"license_detections": "license_detections",
179+
"declared_license_expression": "declared_license_expression",
180+
"declared_license_expression_spdx": "declared_license_expression_spdx",
181+
}
98182

99183

100-
class CargoLockHandler(models.DatafileHandler):
184+
class CargoLockHandler(CargoBaseHandler):
101185
datasource_id = 'cargo_lock'
102186
path_patterns = ('*/Cargo.lock', '*/cargo.lock',)
103187
default_package_type = 'cargo'
@@ -144,18 +228,6 @@ def parse(cls, location):
144228
dependencies=dependencies,
145229
)
146230

147-
@classmethod
148-
def assemble(cls, package_data, resource, codebase, package_adder):
149-
"""
150-
Assemble Cargo.toml and possible Cargo.lock datafiles
151-
"""
152-
yield from cls.assemble_from_many_datafiles(
153-
datafile_name_patterns=('Cargo.toml', 'Cargo.lock',),
154-
directory=resource.parent(codebase),
155-
codebase=codebase,
156-
package_adder=package_adder,
157-
)
158-
159231

160232
def dependency_mapper(dependencies, scope='dependencies'):
161233
"""
@@ -197,7 +269,7 @@ def get_parties(person_names, party_role):
197269
name=name,
198270
role=party_role,
199271
email=email,
200-
)
272+
).to_dict()
201273

202274

203275
person_parser = re.compile(

src/packagedcode/licensing.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,12 @@ def get_normalized_license_detections(
709709
if detections:
710710
license_detections.extend(detections)
711711

712+
if not license_detections:
713+
unknown_dict_object = repr(dict(extracted_license.items()))
714+
unknown_detection = get_unknown_license_detection(query_string=unknown_dict_object)
715+
license_detections.append(unknown_detection)
716+
if TRACE:
717+
logger_debug(f'get_normalized_license_detections: dict: unknown_dict_object: {unknown_dict_object}, unknown_detection: {saneyaml.dump(unknown_detection.to_dict())}')
712718
else:
713719
extracted_license_statement = saneyaml.dump(extracted_license)
714720
license_detections = get_license_detections_for_extracted_license_statement(
@@ -753,7 +759,6 @@ def get_normalized_license_detections(
753759

754760
else:
755761
extracted_license_statement = saneyaml.dump(extracted_license_item)
756-
757762
detections = get_license_detections_for_extracted_license_statement(
758763
extracted_license_statement=extracted_license_statement,
759764
try_as_expression=try_as_expression,
@@ -819,6 +824,7 @@ def get_license_detections_and_expression(
819824
if not license_detections:
820825
if not isinstance(extracted_license_statement, str):
821826
extracted_license_statement = saneyaml.dump(extracted_license_statement)
827+
822828
license_detection = get_unknown_license_detection(query_string=extracted_license_statement)
823829
license_detections = [license_detection]
824830

src/packagedcode/models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,10 @@ def populate_license_fields(self):
782782
)
783783

784784
if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
785-
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)
785+
if isinstance(self.extracted_license_statement, dict):
786+
self.extracted_license_statement = saneyaml.dump(dict(self.extracted_license_statement.items()))
787+
else:
788+
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)
786789

787790
def update_purl_fields(self, package_data, replace=False):
788791

src/packagedcode/plugin_package.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,8 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
403403
for dfp in item.datafile_paths
404404
]
405405
packages.append(item)
406+
if TRACE:
407+
logger_debug(' get_package_and_deps: Package:', item.purl)
406408

407409
elif isinstance(item, Dependency):
408410
if strip_root and not has_single_resource:

0 commit comments

Comments
 (0)