Skip to content

Commit 8f5daf7

Browse files
Merge pull request #3783 from nexB/fix-cargo-workspaces
Improve cargo package detection support
2 parents 8e9dc46 + 60c580e commit 8f5daf7

40 files changed

+8145
-2265
lines changed

CHANGELOG.rst

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,23 @@ v33.0.0 (next next, roadmap)
3434
of these in other summary plugins.
3535
See https://github.com/nexB/scancode-toolkit/issues/1745
3636

37+
- Improve cargo package detection support with various improvements
38+
and bugfixes:
39+
- Fix for parser crashing on cargo workspaces
40+
- Fix a bug in dependency parsing (we were not returning any dependencies)
41+
- Also support getting dependency versions from workspace
42+
- Support more attributes from cargo
43+
- Better handle workspace data thorugh extra_data attribute
44+
See https://github.com/nexB/scancode-toolkit/pull/3783
45+
3746
- We now support parsing the Swift manifest JSON dump and the ``Package.resolved`` file https://github.com/nexB/scancode-toolkit/issues/2657.
38-
- Run the commands below on your local Swift project before running the scan.
39-
- ::
47+
- Run the commands below on your local Swift project before running the scan.
48+
- ::
4049

41-
swift package dump-package > Package.swift.json
42-
- ::
50+
swift package dump-package > Package.swift.json
51+
- ::
4352

44-
swift package resolve
53+
swift package resolve
4554

4655
v32.1.0 (next, roadmap)
4756
----------------------------

src/packagedcode/cargo.py

Lines changed: 119 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import logging
1011
import os
1112
import re
13+
import sys
1214

13-
import saneyaml
1415
import toml
1516
from packageurl import PackageURL
1617

@@ -20,6 +21,22 @@
2021
Handle Rust cargo crates
2122
"""
2223

24+
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_CARGO', False)
25+
26+
27+
def logger_debug(*args):
28+
pass
29+
30+
31+
logger = logging.getLogger(__name__)
32+
33+
if TRACE:
34+
logging.basicConfig(stream=sys.stdout)
35+
logger.setLevel(logging.DEBUG)
36+
37+
def logger_debug(*args):
38+
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
39+
2340

2441
class CargoBaseHandler(models.DatafileHandler):
2542
@classmethod
@@ -29,7 +46,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
2946
support cargo workspaces where we have multiple packages from
3047
a repository and some shared information present at top-level.
3148
"""
32-
workspace = package_data.extra_data.get("workspace", {})
49+
workspace = package_data.extra_data.get('workspace', {})
3350
workspace_members = workspace.get("members", [])
3451
workspace_package_data = workspace.get("package", {})
3552
attributes_to_copy = [
@@ -39,10 +56,13 @@ def assemble(cls, package_data, resource, codebase, package_adder):
3956
]
4057
if "license" in workspace_package_data:
4158
for attribute in attributes_to_copy:
59+
package_data.extra_data[attribute] = 'workspace'
4260
workspace_package_data[attribute] = getattr(package_data, attribute)
4361

4462
workspace_root_path = resource.parent(codebase).path
4563
if workspace_package_data and workspace_members:
64+
65+
# TODO: support glob patterns found in cargo workspaces
4666
for workspace_member_path in workspace_members:
4767
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
4868
workspace_directory = codebase.get_resource(path=workspace_directory_path)
@@ -56,9 +76,13 @@ def assemble(cls, package_data, resource, codebase, package_adder):
5676
if not resource.package_data:
5777
continue
5878

79+
if TRACE:
80+
logger_debug(f"Resource manifest to update: {resource.path}")
81+
5982
updated_package_data = cls.update_resource_package_data(
60-
package_data=workspace_package_data,
61-
old_package_data=resource.package_data.pop(),
83+
workspace=workspace,
84+
workspace_package_data=workspace_package_data,
85+
resource_package_data=resource.package_data.pop(),
6286
mapping=CARGO_ATTRIBUTE_MAPPING,
6387
)
6488
resource.package_data.append(updated_package_data)
@@ -79,20 +103,61 @@ def assemble(cls, package_data, resource, codebase, package_adder):
79103
)
80104

81105
@classmethod
82-
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):
106+
def update_resource_package_data(cls, workspace, workspace_package_data, resource_package_data, mapping=None):
83107

84-
for attribute in old_package_data.keys():
108+
extra_data = resource_package_data["extra_data"]
109+
for attribute in resource_package_data.keys():
85110
if attribute in mapping:
86111
replace_by_attribute = mapping.get(attribute)
87-
old_package_data[attribute] = package_data.get(replace_by_attribute)
112+
if not replace_by_attribute in extra_data:
113+
continue
114+
115+
extra_data.pop(replace_by_attribute)
116+
replace_by_value = workspace_package_data.get(replace_by_attribute)
117+
if replace_by_value:
118+
resource_package_data[attribute] = replace_by_value
88119
elif attribute == "parties":
89-
old_package_data[attribute] = list(get_parties(
90-
person_names=package_data.get("authors"),
120+
resource_package_data[attribute] = list(get_parties(
121+
person_names=workspace_package_data.get("authors", []),
91122
party_role='author',
92123
))
93-
94-
return old_package_data
95-
124+
if "authors" in extra_data:
125+
extra_data.pop("authors")
126+
127+
extra_data_copy = extra_data.copy()
128+
for key, value in extra_data_copy.items():
129+
if value == 'workspace':
130+
extra_data.pop(key)
131+
132+
if key in workspace_package_data:
133+
workspace_value = workspace_package_data.get(key)
134+
if workspace_value and key in mapping:
135+
replace_by_attribute = mapping.get(key)
136+
extra_data[replace_by_attribute] = workspace_value
137+
138+
# refresh purl if version updated from workspace
139+
if "version" in workspace_package_data:
140+
resource_package_data["purl"] = PackageURL(
141+
type=cls.default_package_type,
142+
name=resource_package_data["name"],
143+
namespace=resource_package_data["namespace"],
144+
version=resource_package_data["version"],
145+
).to_string()
146+
147+
workspace_dependencies = dependency_mapper(dependencies=workspace.get('dependencies', {}))
148+
deps_by_purl = {}
149+
for dependency in workspace_dependencies:
150+
deps_by_purl[dependency.purl] = dependency
151+
152+
for dep_mapping in resource_package_data['dependencies']:
153+
workspace_dependency = deps_by_purl.get(dep_mapping['purl'], None)
154+
if workspace_dependency and workspace_dependency.extracted_requirement:
155+
dep_mapping['extracted_requirement'] = workspace_dependency.extracted_requirement
156+
157+
if 'workspace' in dep_mapping["extra_data"]:
158+
dep_mapping['extra_data'].pop('workspace')
159+
160+
return resource_package_data
96161

97162

98163
class CargoTomlHandler(CargoBaseHandler):
@@ -105,16 +170,21 @@ class CargoTomlHandler(CargoBaseHandler):
105170

106171
@classmethod
107172
def parse(cls, location, package_only=False):
108-
package_data = toml.load(location, _dict=dict)
109-
core_package_data = package_data.get('package', {})
110-
workspace = package_data.get('workspace', {})
173+
package_data_toml = toml.load(location, _dict=dict)
174+
workspace = package_data_toml.get('workspace', {})
175+
core_package_data = package_data_toml.get('package', {})
111176
extra_data = {}
177+
if workspace:
178+
extra_data['workspace'] = workspace
179+
180+
package_data = core_package_data.copy()
181+
for key, value in package_data.items():
182+
if isinstance(value, dict) and 'workspace' in value:
183+
core_package_data.pop(key)
184+
extra_data[key] = 'workspace'
112185

113186
name = core_package_data.get('name')
114187
version = core_package_data.get('version')
115-
if isinstance(version, dict) and "workspace" in version:
116-
version = None
117-
extra_data["version"] = "workspace"
118188

119189
description = core_package_data.get('description') or ''
120190
description = description.strip()
@@ -132,22 +202,28 @@ def parse(cls, location, package_only=False):
132202

133203
# cargo dependencies are complex and can be overriden at multiple levels
134204
dependencies = []
135-
for key, value in core_package_data.items():
205+
for key, value in package_data_toml.items():
136206
if key.endswith('dependencies'):
137207
dependencies.extend(dependency_mapper(dependencies=value, scope=key))
138208

139209
# TODO: add file refs:
140210
# - readme, include and exclude
141-
# TODO: other URLs
142-
# - documentation
143211

144212
vcs_url = core_package_data.get('repository')
145213
homepage_url = core_package_data.get('homepage')
146214
repository_homepage_url = name and f'https://crates.io/crates/{name}'
147215
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
148216
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
149-
if workspace:
150-
extra_data["workspace"] = workspace
217+
218+
extra_data_mappings = {
219+
"documentation": "documentation_url",
220+
"rust-version": "rust_version",
221+
"edition": "rust_edition",
222+
}
223+
for cargo_attribute, extra_attribute in extra_data_mappings.items():
224+
value = core_package_data.get(cargo_attribute)
225+
if value:
226+
extra_data[extra_attribute] = value
151227

152228
package_data = dict(
153229
datasource_id=cls.datasource_id,
@@ -156,6 +232,7 @@ def parse(cls, location, package_only=False):
156232
version=version,
157233
primary_language=cls.default_primary_language,
158234
description=description,
235+
keywords=keywords,
159236
parties=parties,
160237
extracted_license_statement=extracted_license_statement,
161238
vcs_url=vcs_url,
@@ -171,6 +248,7 @@ def parse(cls, location, package_only=False):
171248

172249
CARGO_ATTRIBUTE_MAPPING = {
173250
# Fields in PackageData model: Fields in cargo
251+
"version": "version",
174252
"homepage_url": "homepage",
175253
"vcs_url": "repository",
176254
"keywords": "categories",
@@ -179,6 +257,9 @@ def parse(cls, location, package_only=False):
179257
"license_detections": "license_detections",
180258
"declared_license_expression": "declared_license_expression",
181259
"declared_license_expression_spdx": "declared_license_expression_spdx",
260+
# extra data fields (reverse mapping)
261+
"edition": "rust_edition",
262+
"rust-version": "rust_version",
182263
}
183264

184265

@@ -237,25 +318,36 @@ def dependency_mapper(dependencies, scope='dependencies'):
237318
"""
238319
is_runtime = not scope.endswith(('dev-dependencies', 'build-dependencies'))
239320
for name, requirement in dependencies.items():
321+
extra_data = {}
322+
extracted_requirement = None
240323
if isinstance(requirement, str):
241324
# plain version requirement
242325
is_optional = False
326+
extracted_requirement = requirement
327+
243328
elif isinstance(requirement, dict):
244-
# complex requirement, with more than version are harder to handle
245-
# so we just dump
329+
# complex requirement, we extract version if available
330+
# everything else is just dumped in extra data
331+
# here {workspace = true} means dependency version
332+
# should be inherited
246333
is_optional = requirement.pop('optional', False)
247-
requirement = saneyaml.dump(requirement)
334+
if 'version' in requirement:
335+
extracted_requirement = requirement.get('version')
336+
337+
if requirement:
338+
extra_data = requirement
248339

249340
yield models.DependentPackage(
250341
purl=PackageURL(
251342
type='cargo',
252343
name=name,
253344
).to_string(),
254-
extracted_requirement=requirement,
345+
extracted_requirement=extracted_requirement,
255346
scope=scope,
256347
is_runtime=is_runtime,
257348
is_optional=is_optional,
258349
is_resolved=False,
350+
extra_data=extra_data,
259351
)
260352

261353

0 commit comments

Comments
 (0)