Skip to content

Commit a98327e

Browse files
Improve conda packages and dependencies parsing
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent c59de73 commit a98327e

19 files changed

+2931
-156
lines changed

src/packagedcode/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@
7373
cocoapods.PodfileLockHandler,
7474
cocoapods.PodfileHandler,
7575

76-
conda.CondaYamlHandler,
7776
conda.CondaMetaYamlHandler,
77+
conda.CondaYamlHandler,
7878

7979
conan.ConanFileHandler,
8080
conan.ConanDataHandler,

src/packagedcode/conda.py

Lines changed: 162 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from packagedcode import models
1616
from packagedcode.pypi import BaseDependencyFileHandler
17+
from dparse2.parser import parse_requirement_line
1718

1819
"""
1920
Handle Conda manifests and metadata, see https://docs.conda.io/en/latest/
@@ -23,18 +24,36 @@
2324
"""
2425

2526
# TODO: there are likely other package data files for Conda
26-
# TODO: report platform
27-
2827

2928
class CondaYamlHandler(BaseDependencyFileHandler):
30-
# TODO: there are several other manifests worth adding
3129
datasource_id = 'conda_yaml'
32-
path_patterns = ('*conda.yaml', '*conda.yml',)
33-
default_package_type = 'pypi'
30+
path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
31+
default_package_type = 'conda'
3432
default_primary_language = 'Python'
3533
description = 'Conda yaml manifest'
3634
documentation_url = 'https://docs.conda.io/'
3735

36+
@classmethod
37+
def parse(cls, location, package_only=False):
38+
with open(location) as fi:
39+
conda_data = saneyaml.load(fi.read())
40+
dependencies = get_conda_yaml_dependencies(conda_data=conda_data)
41+
name = conda_data.get('name')
42+
extra_data = {}
43+
channels = conda_data.get('channels')
44+
if channels:
45+
extra_data['channels'] = channels
46+
if name or dependencies:
47+
package_data = dict(
48+
datasource_id=cls.datasource_id,
49+
type=cls.default_package_type,
50+
name=name,
51+
primary_language=cls.default_primary_language,
52+
dependencies=dependencies,
53+
extra_data=extra_data,
54+
)
55+
yield models.PackageData.from_data(package_data, package_only)
56+
3857

3958
class CondaMetaYamlHandler(models.DatafileHandler):
4059
datasource_id = 'conda_meta_yaml'
@@ -83,9 +102,7 @@ def parse(cls, location, package_only=False):
83102
metayaml = get_meta_yaml_data(location)
84103
package_element = metayaml.get('package') or {}
85104
package_name = package_element.get('name')
86-
if not package_name:
87-
return
88-
version = package_element.get('version')
105+
package_version = package_element.get('version')
89106

90107
# FIXME: source is source, not download
91108
source = metayaml.get('source') or {}
@@ -99,6 +116,7 @@ def parse(cls, location, package_only=False):
99116
vcs_url = about.get('dev_url')
100117

101118
dependencies = []
119+
extra_data = {}
102120
requirements = metayaml.get('requirements') or {}
103121
for scope, reqs in requirements.items():
104122
# requirements format is like:
@@ -107,33 +125,152 @@ def parse(cls, location, package_only=False):
107125
# u'progressbar2', u'python >=3.6'])])
108126
for req in reqs:
109127
name, _, requirement = req.partition(" ")
110-
purl = PackageURL(type=cls.default_package_type, name=name)
128+
version = None
129+
if requirement.startswith("=="):
130+
_, version = requirement.split("==")
131+
132+
# requirements may have namespace, version too
133+
# - conda-forge::numpy=1.15.4
134+
namespace = None
135+
if "::" in name:
136+
namespace, name = name.split("::")
137+
138+
is_pinned = False
139+
if "=" in name:
140+
name, version = name.split("=")
141+
is_pinned = True
142+
requirement = f"={version}"
143+
144+
if name in ('pip', 'python'):
145+
if not scope in extra_data:
146+
extra_data[scope] = [req]
147+
else:
148+
extra_data[scope].append(req)
149+
continue
150+
151+
purl = PackageURL(
152+
type=cls.default_package_type,
153+
name=name,
154+
namespace=namespace,
155+
version=version,
156+
)
157+
if "run" in scope:
158+
is_runtime = True
159+
is_optional = False
160+
else:
161+
is_runtime = False
162+
is_optional = True
163+
111164
dependencies.append(
112165
models.DependentPackage(
113166
purl=purl.to_string(),
114167
extracted_requirement=requirement,
115168
scope=scope,
116-
is_runtime=True,
117-
is_optional=False,
169+
is_runtime=is_runtime,
170+
is_optional=is_optional,
171+
is_pinned=is_pinned,
172+
is_direct=True,
118173
)
119174
)
120175

121176
package_data = dict(
122177
datasource_id=cls.datasource_id,
123178
type=cls.default_package_type,
124179
name=package_name,
125-
version=version,
180+
version=package_version,
126181
download_url=download_url,
127182
homepage_url=homepage_url,
128183
vcs_url=vcs_url,
129184
description=description,
130185
sha256=sha256,
131186
extracted_license_statement=extracted_license_statement,
132187
dependencies=dependencies,
188+
extra_data=extra_data,
133189
)
134190
yield models.PackageData.from_data(package_data, package_only)
135191

136192

193+
def get_conda_yaml_dependencies(conda_data):
194+
"""
195+
Return a list of DependentPackage mappins from conda and pypi
196+
dependencies present in a `conda_data` mapping.
197+
"""
198+
dependencies = conda_data.get('dependencies') or []
199+
deps = []
200+
for dep in dependencies:
201+
if isinstance(dep, str):
202+
namespace = None
203+
specs = None
204+
is_pinned = False
205+
206+
if "::" in dep:
207+
namespace, dep = dep.split("::")
208+
209+
req = parse_requirement_line(dep)
210+
if req:
211+
name = req.name
212+
version = None
213+
214+
specs = str(req.specs)
215+
if '==' in specs:
216+
version = specs.replace('==','')
217+
is_pinned = True
218+
purl = PackageURL(type='pypi', name=name, version=version)
219+
else:
220+
if "=" in dep:
221+
dep, version = dep.split("=")
222+
is_pinned = True
223+
specs = f"={version}"
224+
225+
purl = PackageURL(
226+
type='conda',
227+
namespace=namespace,
228+
name=dep,
229+
version=version,
230+
)
231+
232+
if purl.name in ('pip', 'python'):
233+
continue
234+
235+
deps.append(
236+
models.DependentPackage(
237+
purl=purl.to_string(),
238+
extracted_requirement=specs,
239+
scope='dependencies',
240+
is_runtime=True,
241+
is_optional=False,
242+
is_pinned=is_pinned,
243+
is_direct=True,
244+
).to_dict()
245+
)
246+
247+
elif isinstance(dep, dict):
248+
for line in dep.get('pip', []):
249+
req = parse_requirement_line(line)
250+
if req:
251+
name = req.name
252+
version = None
253+
is_pinned = False
254+
specs = str(req.specs)
255+
if '==' in specs:
256+
version = specs.replace('==','')
257+
is_pinned = True
258+
purl = PackageURL(type='pypi', name=name, version=version)
259+
deps.append(
260+
models.DependentPackage(
261+
purl=purl.to_string(),
262+
extracted_requirement=specs,
263+
scope='dependencies',
264+
is_runtime=True,
265+
is_optional=False,
266+
is_pinned=is_pinned,
267+
is_direct=True,
268+
).to_dict()
269+
)
270+
271+
return deps
272+
273+
137274
def get_meta_yaml_data(location):
138275
"""
139276
Return a mapping of conda metadata loaded from a meta.yaml files. The format
@@ -158,10 +295,21 @@ def get_meta_yaml_data(location):
158295
# Replace the variable with the value
159296
if '{{' in line and '}}' in line:
160297
for variable, value in variables.items():
161-
line = line.replace('{{ ' + variable + ' }}', value)
298+
if "|lower" in line:
299+
line = line.replace('{{ ' + variable + '|lower' + ' }}', value.lower())
300+
else:
301+
line = line.replace('{{ ' + variable + ' }}', value)
162302
yaml_lines.append(line)
163303

164-
return saneyaml.load('\n'.join(yaml_lines))
304+
# Cleanup any remaining complex jinja template lines
305+
# as the yaml load fails otherwise for unresolved jinja
306+
cleaned_yaml_lines = [
307+
line
308+
for line in yaml_lines
309+
if not "{{" in line
310+
]
311+
312+
return saneyaml.load(''.join(cleaned_yaml_lines))
165313

166314

167315
def get_variables(location):

src/packagedcode/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,16 @@ def is_datafile(cls, location, filetypes=tuple(), _bare_filename=False):
11001100
"""
11011101
if filetype.is_file(location) or _bare_filename:
11021102
loc = as_posixpath(location)
1103-
if any(fnmatchcase(loc, pat) for pat in cls.path_patterns):
1103+
1104+
# Some extension strings are used interchangebly
1105+
extension_aliases = {"yaml": "yml"}
1106+
path_patterns = list(cls.path_patterns)
1107+
for pattern in cls.path_patterns:
1108+
for extension, extension_alias in extension_aliases.items():
1109+
new_pattern = pattern.replace(extension, extension_alias)
1110+
path_patterns.append(new_pattern)
1111+
1112+
if any(fnmatchcase(loc, pat) for pat in path_patterns):
11041113
filetypes = filetypes or cls.filetypes
11051114
if not filetypes:
11061115
return True
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# from https://raw.githubusercontent.com/bayer-science-for-a-better-life/phc-gnn/refs/heads/master/environment_gpu.yml
2+
3+
name: phc-gnn
4+
5+
channels:
6+
- anaconda
7+
- pytorch
8+
- conda-forge
9+
- defaults
10+
11+
dependencies:
12+
- pip=20.2.4
13+
- anaconda::python=3.8.5
14+
- anaconda::python-dateutil=2.8.1
15+
- cudatoolkit=10.1
16+
- magma-cuda101
17+
- cudnn=7.6.5
18+
- pytorch=1.7.1
19+
- torchvision=0.8.2
20+
- torchaudio=0.7.2
21+
- conda-forge::numpy=1.19.2
22+
- anaconda::scipy=1.5.2
23+
- conda-forge::matplotlib=3.3.2
24+
- anaconda::networkx=2.5
25+
- anaconda::scikit-learn=0.23.2
26+
- anaconda::notebook=6.1.4
27+
- anaconda::jupyter_client=6.1.7
28+
- anaconda::jupyter_core=4.6.3
29+
- anaconda::h5py=2.10.0
30+
- conda-forge::tqdm=4.50.0
31+
- conda-forge::tensorboard=2.4.0
32+
- pip:
33+
- ogb==1.2.4
34+
- pytest==6.2.1
35+
- bottleneck==1.3.2
36+
# - torch-cluster==1.5.8
37+
# - torch-scatter==2.0.5
38+
# - torch-sparse==0.6.8
39+
# - torch-spline-conv==1.2.0
40+
# - torch-geometric==1.6.1

0 commit comments

Comments
 (0)