Skip to content

Commit d9276c4

Browse files
committed
Move package parsing code to their own functions
* We would like to have a way to get PackageData objects from different manifest data without using generator from the DatafileHandler.parse() method * Update get_urls for several packages to accept arbitrary keyword arguments Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent ded56e9 commit d9276c4

File tree

10 files changed

+154
-103
lines changed

10 files changed

+154
-103
lines changed

src/packagedcode/chef.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def chef_api_url(name, version):
5555
return name and version and f'https://supermarket.chef.io/api/v1/cookbooks/{name}/versions/{version}'
5656

5757

58-
def get_urls(name, version):
58+
def get_urls(name, version, **kwargs):
5959
"""
6060
Return a mapping of URLs given a name and version.
6161
"""

src/packagedcode/cocoapods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def parse(cls, location):
434434
)
435435

436436

437-
def get_urls(name=None, version=None, homepage_url=None, vcs_url=None):
437+
def get_urls(name=None, version=None, homepage_url=None, vcs_url=None, **kwargs):
438438
"""
439439
Return a mapping of podspec URLS.
440440
"""

src/packagedcode/freebsd.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,7 @@ class CompactManifestHandler(models.DatafileHandler):
4242
documentation_url = 'https://www.freebsd.org/cgi/man.cgi?pkg-create(8)#MANIFEST_FILE_DETAILS'
4343

4444
@classmethod
45-
def parse(cls, location):
46-
"""
47-
Yield one or more Package manifest objects given a file ``location`` pointing to a
48-
package archive, manifest or similar.
49-
"""
45+
def _parse_freebsd_manifest_data(cls, location):
5046
with io.open(location, encoding='utf-8') as loc:
5147
freebsd_manifest = saneyaml.load(loc)
5248

@@ -97,7 +93,19 @@ def parse(cls, location):
9793
if package_data.declared_license:
9894
package_data.license_expression = cls.compute_normalized_license(package_data)
9995

100-
yield package_data
96+
return package_data
97+
98+
@classmethod
99+
def parse(cls, location):
100+
"""
101+
Yield one or more Package manifest objects given a file ``location`` pointing to a
102+
package archive, manifest or similar.
103+
"""
104+
yield cls._parse_freebsd_manifest_data(
105+
location=location,
106+
datasource_id=cls.datasource_id,
107+
default_package_type=cls.default_package_type,
108+
)
101109

102110
@classmethod
103111
def compute_normalized_license(cls, package):

src/packagedcode/haxe.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,26 +45,7 @@ class HaxelibJsonHandler(models.DatafileHandler):
4545
documentation_url = 'https://lib.haxe.org/documentation/creating-a-haxelib-package/'
4646

4747
@classmethod
48-
def parse(cls, location):
49-
"""
50-
Yield one or more Package manifest objects given a file ``location`` pointing to a
51-
package_data archive, manifest or similar.
52-
53-
{
54-
"name": "haxelib",
55-
"url" : "https://lib.haxe.org/documentation/",
56-
"license": "GPL",
57-
"tags": ["haxelib", "core"],
58-
"description": "The haxelib client",
59-
"classPath": "src",
60-
"version": "3.4.0",
61-
"releasenote": " * Fix password input issue in Windows (#421).\n * ....",
62-
"contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"]
63-
}
64-
"""
65-
with io.open(location, encoding='utf-8') as loc:
66-
json_data = json.load(loc)
67-
48+
def _parse_haxelib_json_data(cls, json_data):
6849
name = json_data.get('name')
6950
version = json_data.get('version')
7051

@@ -110,4 +91,27 @@ def parse(cls, location):
11091
dep = models.DependentPackage(purl=dep_purl, is_resolved=is_resolved,)
11192
package_data.dependencies.append(dep)
11293

113-
yield package_data
94+
return package_data
95+
96+
@classmethod
97+
def parse(cls, location):
98+
"""
99+
Yield one or more Package manifest objects given a file ``location`` pointing to a
100+
package_data archive, manifest or similar.
101+
102+
{
103+
"name": "haxelib",
104+
"url" : "https://lib.haxe.org/documentation/",
105+
"license": "GPL",
106+
"tags": ["haxelib", "core"],
107+
"description": "The haxelib client",
108+
"classPath": "src",
109+
"version": "3.4.0",
110+
"releasenote": " * Fix password input issue in Windows (#421).\n * ....",
111+
"contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"]
112+
}
113+
"""
114+
with io.open(location, encoding='utf-8') as loc:
115+
json_data = json.load(loc)
116+
117+
yield cls._parse_haxelib_json_data(json_data)

src/packagedcode/maven.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -891,12 +891,12 @@ def has_basic_pom_attributes(pom):
891891
return basics
892892

893893

894-
def get_maven_pom(location=None):
894+
def get_maven_pom(location=None, text=None):
895895
"""
896896
Return a MavenPom object from a POM file at `location` or provided as a
897897
`text` string.
898898
"""
899-
pom = MavenPom(location=location)
899+
pom = MavenPom(location=location, text=text)
900900

901901
extra_properties = {}
902902

@@ -1043,7 +1043,7 @@ def get_parties(pom):
10431043
return parties
10441044

10451045

1046-
def get_urls(namespace, name, version, qualifiers, base_url='https://repo1.maven.org/maven2'):
1046+
def get_urls(namespace, name, version, qualifiers, base_url='https://repo1.maven.org/maven2', **kwargs):
10471047
"""
10481048
Return a mapping of URLs.
10491049
"""
@@ -1095,17 +1095,40 @@ def get_urls(namespace, name, version, qualifiers, base_url='https://repo1.maven
10951095

10961096

10971097
def parse(
1098-
location,
10991098
datasource_id,
11001099
package_type,
11011100
primary_language,
1101+
location=None,
1102+
text=None,
11021103
base_url='https://repo1.maven.org/maven2',
11031104
):
11041105
"""
11051106
Yield Packagedata objects from parsing a Maven pom file at `location` or
11061107
using the provided `text` (one or the other but not both).
11071108
"""
1108-
pom = get_maven_pom(location=location)
1109+
yield parse_pom(
1110+
datasource_id=datasource_id,
1111+
package_type=package_type,
1112+
primary_language=primary_language,
1113+
location=location,
1114+
text=text,
1115+
base_url=base_url
1116+
)
1117+
1118+
1119+
def parse_pom(
1120+
datasource_id,
1121+
package_type,
1122+
primary_language,
1123+
location=None,
1124+
text=None,
1125+
base_url='https://repo1.maven.org/maven2',
1126+
):
1127+
"""
1128+
Yield Packagedata objects from parsing a Maven pom file at `location` or
1129+
using the provided `text` (one or the other but not both).
1130+
"""
1131+
pom = get_maven_pom(location=location, text=text)
11091132

11101133
if not pom:
11111134
return
@@ -1192,7 +1215,8 @@ def parse(
11921215
if not package_data.license_expression and package_data.declared_license:
11931216
package_data.license_expression = models.compute_normalized_license(package_data.declared_license)
11941217

1195-
yield package_data
1218+
return package_data
1219+
11961220

11971221
def build_vcs_and_code_view_urls(scm):
11981222
"""

src/packagedcode/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def set_purl(self, package_url):
307307
for key, value in package_url.to_dict().items():
308308
self_val = getattr(self, key)
309309
if not self_val and value:
310-
setattr(self, attr, value)
310+
setattr(self, key, value)
311311

312312
def to_dict(self, **kwargs):
313313
mapping = super().to_dict(**kwargs)
@@ -413,6 +413,7 @@ class Dependency(DependentPackage):
413413

414414
def __attrs_post_init__(self, *args, **kwargs):
415415
if not self.dependency_uid:
416+
print(self.purl)
416417
self.dependency_uid = build_package_uid(self.purl)
417418

418419
@classmethod

src/packagedcode/npm.py

Lines changed: 78 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def walk_npm(cls, resource, codebase, depth=0):
146146
yield subchild
147147

148148

149-
def get_urls(namespace, name, version):
149+
def get_urls(namespace, name, version, **kwargs):
150150
return dict(
151151
repository_homepage_url=npm_homepage_url(namespace, name, registry='https://www.npmjs.com/package'),
152152
repository_download_url=npm_download_url(namespace, name, version, registry='https://registry.npmjs.org'),
@@ -167,77 +167,91 @@ def parse(cls, location):
167167
with io.open(location, encoding='utf-8') as loc:
168168
package_data = json.load(loc)
169169

170-
name = package_data.get('name')
171-
version = package_data.get('version')
172-
homepage_url = package_data.get('homepage', '')
173-
174-
# a package.json without name and version can be a private package
170+
yield parse_npm_package_json(
171+
package_data,
172+
datasource_id=cls.datasource_id,
173+
default_package_type=cls.default_package_type,
174+
default_primary_language=cls.default_primary_language
175+
)
175176

176-
if homepage_url and isinstance(homepage_url, list):
177-
# TODO: should we keep other URLs
178-
homepage_url = homepage_url[0]
179-
homepage_url = homepage_url.strip() or None
177+
@classmethod
178+
def compute_normalized_license(cls, package):
179+
return compute_normalized_license(package.declared_license)
180180

181-
namespace, name = split_scoped_package_name(name)
182181

183-
urls = get_urls(namespace, name, version)
184-
package = models.PackageData(
185-
datasource_id=cls.datasource_id,
186-
type=cls.default_package_type,
187-
primary_language=cls.default_primary_language,
188-
namespace=namespace or None,
189-
name=name,
190-
version=version or None,
191-
description=package_data.get('description', '').strip() or None,
192-
homepage_url=homepage_url,
193-
**urls,
194-
)
195-
vcs_revision = package_data.get('gitHead') or None
196-
197-
# mapping of top level package.json items to a function accepting as
198-
# arguments the package.json element value and returning an iterable of (key,
199-
# values) to update on a package
200-
field_mappers = [
201-
('author', partial(party_mapper, party_type='author')),
202-
('contributors', partial(party_mapper, party_type='contributor')),
203-
('maintainers', partial(party_mapper, party_type='maintainer')),
204-
205-
('dependencies', partial(deps_mapper, field_name='dependencies')),
206-
('devDependencies', partial(deps_mapper, field_name='devDependencies')),
207-
('peerDependencies', partial(deps_mapper, field_name='peerDependencies')),
208-
('optionalDependencies', partial(deps_mapper, field_name='optionalDependencies')),
209-
('bundledDependencies', bundle_deps_mapper),
210-
('repository', partial(vcs_repository_mapper, vcs_revision=vcs_revision)),
211-
('keywords', keywords_mapper,),
212-
('bugs', bugs_mapper),
213-
('dist', dist_mapper),
214-
]
215-
216-
for source, func in field_mappers:
217-
value = package_data.get(source) or None
182+
def parse_npm_package_json(
183+
package_data,
184+
datasource_id='npm_package_json',
185+
default_package_type='npm',
186+
default_primary_language='Javascript'
187+
):
188+
name = package_data.get('name')
189+
version = package_data.get('version')
190+
homepage_url = package_data.get('homepage', '')
191+
192+
# a package.json without name and version can be a private package
193+
194+
if homepage_url and isinstance(homepage_url, list):
195+
# TODO: should we keep other URLs
196+
homepage_url = homepage_url[0]
197+
homepage_url = homepage_url.strip() or None
198+
199+
namespace, name = split_scoped_package_name(name)
200+
201+
urls = get_urls(namespace, name, version)
202+
package = models.PackageData(
203+
datasource_id=datasource_id,
204+
type=default_package_type,
205+
primary_language=default_primary_language,
206+
namespace=namespace or None,
207+
name=name,
208+
version=version or None,
209+
description=package_data.get('description', '').strip() or None,
210+
homepage_url=homepage_url,
211+
**urls,
212+
)
213+
vcs_revision = package_data.get('gitHead') or None
214+
215+
# mapping of top level package.json items to a function accepting as
216+
# arguments the package.json element value and returning an iterable of (key,
217+
# values) to update on a package
218+
field_mappers = [
219+
('author', partial(party_mapper, party_type='author')),
220+
('contributors', partial(party_mapper, party_type='contributor')),
221+
('maintainers', partial(party_mapper, party_type='maintainer')),
222+
223+
('dependencies', partial(deps_mapper, field_name='dependencies')),
224+
('devDependencies', partial(deps_mapper, field_name='devDependencies')),
225+
('peerDependencies', partial(deps_mapper, field_name='peerDependencies')),
226+
('optionalDependencies', partial(deps_mapper, field_name='optionalDependencies')),
227+
('bundledDependencies', bundle_deps_mapper),
228+
('repository', partial(vcs_repository_mapper, vcs_revision=vcs_revision)),
229+
('keywords', keywords_mapper,),
230+
('bugs', bugs_mapper),
231+
('dist', dist_mapper),
232+
]
233+
234+
for source, func in field_mappers:
235+
value = package_data.get(source) or None
236+
if value:
237+
if isinstance(value, str):
238+
value = value.strip()
218239
if value:
219-
if isinstance(value, str):
220-
value = value.strip()
221-
if value:
222-
func(value, package)
240+
func(value, package)
223241

224-
if not package.download_url:
225-
# Only add a synthetic download URL if there is none from the dist mapping.
226-
package.download_url = npm_download_url(package.namespace, package.name, package.version)
227-
228-
# licenses are a tad special with many different data structures
229-
lic = package_data.get('license')
230-
lics = package_data.get('licenses')
231-
package = licenses_mapper(lic, lics, package)
242+
if not package.download_url:
243+
# Only add a synthetic download URL if there is none from the dist mapping.
244+
package.download_url = npm_download_url(package.namespace, package.name, package.version)
232245

233-
if not package.license_expression and package.declared_license:
234-
package.license_expression = compute_normalized_license(package.declared_license)
246+
# licenses are a tad special with many different data structures
247+
lic = package_data.get('license')
248+
lics = package_data.get('licenses')
249+
package = licenses_mapper(lic, lics, package)
235250

236-
yield package
251+
if not package.license_expression and package.declared_license:
252+
package.license_expression = compute_normalized_license(package.declared_license)
237253

238-
@classmethod
239-
def compute_normalized_license(cls, package):
240-
return compute_normalized_license(package.declared_license)
254+
return package
241255

242256

243257
class BaseNpmLockHandler(BaseNpmHandler):

src/packagedcode/nuget.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# TODO: add dependencies
1919

2020

21-
def get_urls(name, version):
21+
def get_urls(name, version, **kwargs):
2222
return dict(
2323
repository_homepage_url=f'https://www.nuget.org/packages/{name}/{version}',
2424
repository_download_url=f'https://www.nuget.org/api/v2/package/{name}/{version}',

src/packagedcode/pypi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1609,7 +1609,7 @@ def get_setup_py_args(location, include_not_parsable=False):
16091609
return parse_setup_py(location)
16101610

16111611

1612-
def get_pypi_urls(name, version):
1612+
def get_pypi_urls(name, version, **kwargs):
16131613
"""
16141614
Return a mapping of computed Pypi URLs for this package
16151615
"""

src/packagedcode/rubygems.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def compute_normalized_license(declared_license):
337337
return combine_expressions(detected_licenses)
338338

339339

340-
def get_urls(name, version=None, platform=None):
340+
def get_urls(name, version=None, platform=None, **kwargs):
341341
"""
342342
Return a mapping of standard URLs
343343
"""

0 commit comments

Comments
 (0)