Skip to content

Commit 217d979

Browse files
authored
Merge pull request #3135 from nexB/packagecode-updates
Separate Package parsing functions Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
2 parents ddc9d2a + 76465e7 commit 217d979

File tree

12 files changed

+109
-61
lines changed

12 files changed

+109
-61
lines changed

CHANGELOG.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ License detection:
6262
removed. This new command supports simpler reindexing using custom
6363
license texts and license rules contributed by plugins or stored in an
6464
additional directory.
65+
66+
Package detection:
67+
~~~~~~~~~~~~~~~~~~~~~
68+
69+
- Code for parsing a Maven POM, npm package.json, freebsd manifest and haxelib
70+
JSON have been separated into two functions: one that creates a PackageData
71+
object from the parsed Resource, and another that calls the previous function
72+
and yields the PackageData. This was done such that we can use the package
73+
manifest data parsing code outside of the scancode-toolkit context in other
74+
libraries.
75+
76+
6577
v31.2.1 - 2022-10-05
6678
----------------------------------
6779

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = scancode-toolkit
3-
version = 31.2.1
3+
version = 31.2.2
44
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft
55

66
# description must be on ONE line https://github.com/pypa/setuptools/issues/1390

src/packagedcode/chef.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def chef_api_url(name, version):
5555
return name and version and f'https://supermarket.chef.io/api/v1/cookbooks/{name}/versions/{version}'
5656

5757

58-
def get_urls(name, version):
58+
def get_urls(name, version, **kwargs):
5959
"""
6060
Return a mapping of URLs given a name and version.
6161
"""

src/packagedcode/cocoapods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def parse(cls, location):
434434
)
435435

436436

437-
def get_urls(name=None, version=None, homepage_url=None, vcs_url=None):
437+
def get_urls(name=None, version=None, homepage_url=None, vcs_url=None, **kwargs):
438438
"""
439439
Return a mapping of podspec URLS.
440440
"""

src/packagedcode/freebsd.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,13 @@ class CompactManifestHandler(models.DatafileHandler):
4242
documentation_url = 'https://www.freebsd.org/cgi/man.cgi?pkg-create(8)#MANIFEST_FILE_DETAILS'
4343

4444
@classmethod
45-
def parse(cls, location):
46-
"""
47-
Yield one or more Package manifest objects given a file ``location`` pointing to a
48-
package archive, manifest or similar.
49-
"""
50-
with io.open(location, encoding='utf-8') as loc:
51-
freebsd_manifest = saneyaml.load(loc)
52-
45+
def _parse(cls, yaml_data):
5346
package_data = models.PackageData(
5447
datasource_id=cls.datasource_id,
5548
type=cls.default_package_type,
5649
qualifiers=dict(
57-
arch=freebsd_manifest.get('arch'),
58-
origin=freebsd_manifest.get('origin'),
50+
arch=yaml_data.get('arch'),
51+
origin=yaml_data.get('origin'),
5952
)
6053
)
6154

@@ -69,7 +62,7 @@ def parse(cls, location):
6962
]
7063

7164
for source, target in plain_fields:
72-
value = freebsd_manifest.get(source)
65+
value = yaml_data.get(source)
7366
if value:
7467
if isinstance(value, str):
7568
value = value.strip()
@@ -87,17 +80,28 @@ def parse(cls, location):
8780

8881
for source, func in field_mappers:
8982
logger.debug('parse: %(source)r, %(func)r' % locals())
90-
value = freebsd_manifest.get(source) or None
83+
value = yaml_data.get(source) or None
9184
if value:
9285
func(value, package_data)
9386

9487
# license_mapper needs multiple fields
95-
license_mapper(freebsd_manifest, package_data)
88+
license_mapper(yaml_data, package_data)
9689

9790
if package_data.declared_license:
9891
package_data.license_expression = cls.compute_normalized_license(package_data)
9992

100-
yield package_data
93+
return package_data
94+
95+
@classmethod
96+
def parse(cls, location):
97+
"""
98+
Yield one or more Package manifest objects given a file ``location`` pointing to a
99+
package archive, manifest or similar.
100+
"""
101+
with io.open(location, encoding='utf-8') as loc:
102+
yaml_data = saneyaml.load(loc)
103+
104+
yield cls._parse(yaml_data)
101105

102106
@classmethod
103107
def compute_normalized_license(cls, package):

src/packagedcode/haxe.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,7 @@ class HaxelibJsonHandler(models.DatafileHandler):
4545
documentation_url = 'https://lib.haxe.org/documentation/creating-a-haxelib-package/'
4646

4747
@classmethod
48-
def parse(cls, location):
49-
"""
50-
Yield one or more Package manifest objects given a file ``location`` pointing to a
51-
package_data archive, manifest or similar.
52-
53-
{
54-
"name": "haxelib",
55-
"url" : "https://lib.haxe.org/documentation/",
56-
"license": "GPL",
57-
"tags": ["haxelib", "core"],
58-
"description": "The haxelib client",
59-
"classPath": "src",
60-
"version": "3.4.0",
61-
"releasenote": " * Fix password input issue in Windows (#421).\n * ....",
62-
"contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"]
63-
}
64-
"""
65-
with io.open(location, encoding='utf-8') as loc:
66-
json_data = json.load(loc)
67-
48+
def _parse(cls, json_data):
6849
name = json_data.get('name')
6950
version = json_data.get('version')
7051

@@ -110,4 +91,27 @@ def parse(cls, location):
11091
dep = models.DependentPackage(purl=dep_purl, is_resolved=is_resolved,)
11192
package_data.dependencies.append(dep)
11293

113-
yield package_data
94+
return package_data
95+
96+
@classmethod
97+
def parse(cls, location):
98+
"""
99+
Yield one or more Package manifest objects given a file ``location`` pointing to a
100+
package_data archive, manifest or similar.
101+
102+
{
103+
"name": "haxelib",
104+
"url" : "https://lib.haxe.org/documentation/",
105+
"license": "GPL",
106+
"tags": ["haxelib", "core"],
107+
"description": "The haxelib client",
108+
"classPath": "src",
109+
"version": "3.4.0",
110+
"releasenote": " * Fix password input issue in Windows (#421).\n * ....",
111+
"contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"]
112+
}
113+
"""
114+
with io.open(location, encoding='utf-8') as loc:
115+
json_data = json.load(loc)
116+
117+
yield cls._parse(json_data)

src/packagedcode/maven.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -891,12 +891,12 @@ def has_basic_pom_attributes(pom):
891891
return basics
892892

893893

894-
def get_maven_pom(location=None):
894+
def get_maven_pom(location=None, text=None):
895895
"""
896896
Return a MavenPom object from a POM file at `location` or provided as a
897897
`text` string.
898898
"""
899-
pom = MavenPom(location=location)
899+
pom = MavenPom(location=location, text=text)
900900

901901
extra_properties = {}
902902

@@ -1043,7 +1043,7 @@ def get_parties(pom):
10431043
return parties
10441044

10451045

1046-
def get_urls(namespace, name, version, qualifiers, base_url='https://repo1.maven.org/maven2'):
1046+
def get_urls(namespace, name, version, qualifiers, base_url='https://repo1.maven.org/maven2', **kwargs):
10471047
"""
10481048
Return a mapping of URLs.
10491049
"""
@@ -1105,7 +1105,30 @@ def parse(
11051105
Yield Packagedata objects from parsing a Maven pom file at `location` or
11061106
using the provided `text` (one or the other but not both).
11071107
"""
1108-
pom = get_maven_pom(location=location)
1108+
package = _parse(
1109+
datasource_id=datasource_id,
1110+
package_type=package_type,
1111+
primary_language=primary_language,
1112+
location=location,
1113+
base_url=base_url
1114+
)
1115+
if package:
1116+
yield package
1117+
1118+
1119+
def _parse(
1120+
datasource_id,
1121+
package_type,
1122+
primary_language,
1123+
location=None,
1124+
text=None,
1125+
base_url='https://repo1.maven.org/maven2',
1126+
):
1127+
"""
1128+
Yield Packagedata objects from parsing a Maven pom file at `location` or
1129+
using the provided `text` (one or the other but not both).
1130+
"""
1131+
pom = get_maven_pom(location=location, text=text)
11091132

11101133
if not pom:
11111134
return
@@ -1192,7 +1215,8 @@ def parse(
11921215
if not package_data.license_expression and package_data.declared_license:
11931216
package_data.license_expression = models.compute_normalized_license(package_data.declared_license)
11941217

1195-
yield package_data
1218+
return package_data
1219+
11961220

11971221
def build_vcs_and_code_view_urls(scm):
11981222
"""

src/packagedcode/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def set_purl(self, package_url):
307307
for key, value in package_url.to_dict().items():
308308
self_val = getattr(self, key)
309309
if not self_val and value:
310-
setattr(self, attr, value)
310+
setattr(self, key, value)
311311

312312
def to_dict(self, **kwargs):
313313
mapping = super().to_dict(**kwargs)

src/packagedcode/npm.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def walk_npm(cls, resource, codebase, depth=0):
146146
yield subchild
147147

148148

149-
def get_urls(namespace, name, version):
149+
def get_urls(namespace, name, version, **kwargs):
150150
return dict(
151151
repository_homepage_url=npm_homepage_url(namespace, name, registry='https://www.npmjs.com/package'),
152152
repository_download_url=npm_download_url(namespace, name, version, registry='https://registry.npmjs.org'),
@@ -163,13 +163,10 @@ class NpmPackageJsonHandler(BaseNpmHandler):
163163
documentation_url = 'https://docs.npmjs.com/cli/v8/configuring-npm/package-json'
164164

165165
@classmethod
166-
def parse(cls, location):
167-
with io.open(location, encoding='utf-8') as loc:
168-
package_data = json.load(loc)
169-
170-
name = package_data.get('name')
171-
version = package_data.get('version')
172-
homepage_url = package_data.get('homepage', '')
166+
def _parse(cls, json_data):
167+
name = json_data.get('name')
168+
version = json_data.get('version')
169+
homepage_url = json_data.get('homepage', '')
173170

174171
# a package.json without name and version can be a private package
175172

@@ -188,11 +185,11 @@ def parse(cls, location):
188185
namespace=namespace or None,
189186
name=name,
190187
version=version or None,
191-
description=package_data.get('description', '').strip() or None,
188+
description=json_data.get('description', '').strip() or None,
192189
homepage_url=homepage_url,
193190
**urls,
194191
)
195-
vcs_revision = package_data.get('gitHead') or None
192+
vcs_revision = json_data.get('gitHead') or None
196193

197194
# mapping of top level package.json items to a function accepting as
198195
# arguments the package.json element value and returning an iterable of (key,
@@ -214,7 +211,7 @@ def parse(cls, location):
214211
]
215212

216213
for source, func in field_mappers:
217-
value = package_data.get(source) or None
214+
value = json_data.get(source) or None
218215
if value:
219216
if isinstance(value, str):
220217
value = value.strip()
@@ -226,14 +223,21 @@ def parse(cls, location):
226223
package.download_url = npm_download_url(package.namespace, package.name, package.version)
227224

228225
# licenses are a tad special with many different data structures
229-
lic = package_data.get('license')
230-
lics = package_data.get('licenses')
226+
lic = json_data.get('license')
227+
lics = json_data.get('licenses')
231228
package = licenses_mapper(lic, lics, package)
232229

233230
if not package.license_expression and package.declared_license:
234231
package.license_expression = compute_normalized_license(package.declared_license)
235232

236-
yield package
233+
return package
234+
235+
@classmethod
236+
def parse(cls, location):
237+
with io.open(location, encoding='utf-8') as loc:
238+
json_data = json.load(loc)
239+
240+
yield cls._parse(json_data)
237241

238242
@classmethod
239243
def compute_normalized_license(cls, package):

src/packagedcode/nuget.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# TODO: add dependencies
1919

2020

21-
def get_urls(name, version):
21+
def get_urls(name, version, **kwargs):
2222
return dict(
2323
repository_homepage_url=f'https://www.nuget.org/packages/{name}/{version}',
2424
repository_download_url=f'https://www.nuget.org/api/v2/package/{name}/{version}',

0 commit comments

Comments
 (0)