Skip to content

Commit c5dd035

Browse files
Support scanning .dsc and copyright files
Supports scanning .dsc and _copyright files from debain package and metadata archives for package metadata. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 4f3cca0 commit c5dd035

File tree

9 files changed

+13005
-688
lines changed

9 files changed

+13005
-688
lines changed

src/packagedcode/debian.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,19 @@ def parse(cls, location):
197197
location=location,
198198
remove_pgp_signature=True,
199199
)
200-
yield build_package_data(
200+
201+
package_data_from_file = build_package_data_from_package_filename(
202+
filename=os.path.basename(location),
203+
datasource_id=cls.datasource_id,
204+
package_type=cls.default_package_type,
205+
)
206+
package_data = build_package_data(
201207
debian_data=debian_data,
202208
datasource_id=cls.datasource_id,
203209
package_type=cls.default_package_type,
204210
)
211+
package_data.update_purl_fields(package_data=package_data_from_file)
212+
yield package_data
205213

206214
@classmethod
207215
def assign_package_to_resources(cls, package, resource, codebase, package_adder):

src/packagedcode/debian_copyright.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from debian_inspector.copyright import CopyrightLicenseParagraph
2121
from debian_inspector.copyright import CopyrightHeaderParagraph
2222
from debian_inspector.copyright import DebianCopyright
23+
from debian_inspector.package import CodeMetadata
24+
from debian_inspector.version import Version as DebVersion
2325
from license_expression import ExpressionError
2426
from license_expression import LicenseSymbolLike
2527
from license_expression import Licensing
@@ -263,11 +265,65 @@ class StandaloneDebianCopyrightFileHandler(BaseDebianCopyrightFileHandler):
263265
'*_copyright',
264266
)
265267

268+
@classmethod
269+
def is_datafile(cls, location, filetypes=tuple()):
270+
return (
271+
super().is_datafile(location, filetypes=filetypes)
272+
and not DebianCopyrightFileInPackageHandler.is_datafile(location)
273+
and not DebianCopyrightFileInSourceHandler.is_datafile(location)
274+
)
275+
266276
@classmethod
267277
def assemble(cls, package_data, resource, codebase, package_adder):
268278
# assemble is the default
269279
yield from super().assemble(package_data, resource, codebase, package_adder)
270280

281+
@classmethod
282+
def parse(cls, location):
283+
"""
284+
Gets license/copyright information from file like
285+
other copyright files, but also gets purl fields if
286+
present in copyright filename, if obtained from
287+
upstream metadata archive.
288+
"""
289+
package_data = list(super().parse(location)).pop()
290+
package_data_from_file = build_package_data_from_metadata_filename(
291+
filename=os.path.basename(location),
292+
datasource_id=cls.datasource_id,
293+
package_type=cls.default_package_type,
294+
)
295+
if package_data_from_file:
296+
package_data.update_purl_fields(package_data=package_data_from_file)
297+
298+
yield package_data
299+
300+
301+
def build_package_data_from_metadata_filename(filename, datasource_id, package_type):
302+
"""
303+
Return a PackageData built from the filename of a Debian package metadata.
304+
"""
305+
306+
# TODO: we cannot know the distro from the name only
307+
# PURLs without namespace is invalid, so we need to
308+
# have a default value for this
309+
distro = 'debian'
310+
try:
311+
deb = CodeMetadata.from_filename(filename=filename)
312+
except ValueError:
313+
return
314+
315+
version = deb.version
316+
if isinstance(version, DebVersion):
317+
version = str(version)
318+
319+
return models.PackageData(
320+
datasource_id=datasource_id,
321+
type=package_type,
322+
name=deb.name,
323+
namespace=distro,
324+
version=version,
325+
)
326+
271327

272328
class NotReallyStructuredCopyrightFile(Exception):
273329
"""

src/packagedcode/models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,26 @@ def populate_license_fields(self):
784784
if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
785785
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)
786786

787+
def update_purl_fields(self, package_data, replace=False):
788+
789+
if not self.type == package_data.type:
790+
return
791+
792+
purl_fields = [
793+
"name",
794+
"namespace",
795+
"version",
796+
"qualifiers"
797+
]
798+
799+
for purl_field in purl_fields:
800+
value = getattr(self, purl_field)
801+
# We will not update only when replace is False and value is non-empty
802+
if not replace and value:
803+
continue
804+
805+
setattr(self, purl_field, getattr(package_data, purl_field))
806+
787807
def to_dict(self, with_details=True, **kwargs):
788808
mapping = super().to_dict(with_details=with_details, **kwargs)
789809
if not with_details:
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
This is the Debian prepackaged version of the GNU diffutils package.
2+
GNU `diff' was written by Mike Haertel, David Hayes, Richard Stallman,
3+
Len Tower, and Paul Eggert. Wayne Davison designed and implemented
4+
the unified output format. GNU `diff3' was written by Randy Smith.
5+
GNU `sdiff' was written by Thomas Lord. GNU `cmp' was written by
6+
Torbjorn Granlund and David MacKenzie.
7+
8+
The source for this release was obtained from
9+
10+
https://ftp.gnu.org/gnu/diffutils/diffutils-3.7.tar.xz
11+
12+
Program copyright and license:
13+
=============================
14+
15+
Copyright (C) 1988-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013, 2015-2018
16+
Free Software Foundation, Inc.
17+
18+
This program is free software: you can redistribute it and/or modify
19+
it under the terms of the GNU General Public License as published by
20+
the Free Software Foundation, either version 3 of the License, or
21+
(at your option) any later version.
22+
23+
This program is distributed in the hope that it will be useful,
24+
but WITHOUT ANY WARRANTY; without even the implied warranty of
25+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26+
GNU General Public License for more details.
27+
28+
On Debian systems, the complete text of the GNU General Public License
29+
may be found in `/usr/share/common-licenses/GPL'.
30+
31+
32+
Manual copyright and license:
33+
============================
34+
35+
Copyright (C) 1992-1994, 1998, 2001-2002, 2004, 2006, 2009-2018 Free
36+
Software Foundation, Inc.
37+
38+
Permission is granted to copy, distribute and/or modify this document
39+
under the terms of the GNU Free Documentation License, Version 1.3 or
40+
any later version published by the Free Software Foundation; with no
41+
Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
42+
43+
On Debian systems, the complete text of the GNU Free Documentation
44+
License may be found in `/usr/share/common-licenses/GFDL'.
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
[
2+
{
3+
"type": "deb",
4+
"namespace": "debian",
5+
"name": "diffutils",
6+
"version": "3.7-5",
7+
"qualifiers": {},
8+
"subpath": null,
9+
"primary_language": null,
10+
"description": null,
11+
"release_date": null,
12+
"parties": [],
13+
"keywords": [],
14+
"homepage_url": null,
15+
"download_url": null,
16+
"size": null,
17+
"sha1": null,
18+
"md5": null,
19+
"sha256": null,
20+
"sha512": null,
21+
"bug_tracking_url": null,
22+
"code_view_url": null,
23+
"vcs_url": null,
24+
"copyright": "Copyright (c) 1988-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013, 2015-2018 Free Software Foundation, Inc.\nCopyright (c) 1992-1994, 1998, 2001-2002, 2004, 2006, 2009-2018 Free Software Foundation, Inc.",
25+
"holder": "Free Software Foundation, Inc.\nFree Software Foundation, Inc.",
26+
"declared_license_expression": "gpl-3.0-plus AND gfdl-1.3-plus",
27+
"declared_license_expression_spdx": "GPL-3.0-or-later AND GFDL-1.3-or-later",
28+
"license_detections": [
29+
{
30+
"license_expression": "gpl-3.0-plus",
31+
"license_expression_spdx": "GPL-3.0-or-later",
32+
"matches": [
33+
{
34+
"license_expression": "gpl-3.0-plus",
35+
"spdx_license_expression": "GPL-3.0-or-later",
36+
"from_file": null,
37+
"start_line": 18,
38+
"end_line": 29,
39+
"matcher": "2-aho",
40+
"score": 100.0,
41+
"matched_length": 100,
42+
"match_coverage": 100.0,
43+
"rule_relevance": 100,
44+
"rule_identifier": "gpl-3.0-plus_234.RULE",
45+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0-plus_234.RULE",
46+
"matched_text": "This program is free software: you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation, either version 3 of the License, or\n (at your option) any later version.\n\n This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\nOn Debian systems, the complete text of the GNU General Public License\nmay be found in `/usr/share/common-licenses/GPL'."
47+
}
48+
],
49+
"identifier": "gpl_3_0_plus-5534b6bc-4eef-713f-94c7-caa583171b85"
50+
},
51+
{
52+
"license_expression": "gfdl-1.3-plus",
53+
"license_expression_spdx": "GFDL-1.3-or-later",
54+
"matches": [
55+
{
56+
"license_expression": "gfdl-1.3-plus",
57+
"spdx_license_expression": "GFDL-1.3-or-later",
58+
"from_file": null,
59+
"start_line": 38,
60+
"end_line": 44,
61+
"matcher": "2-aho",
62+
"score": 100.0,
63+
"matched_length": 67,
64+
"match_coverage": 100.0,
65+
"rule_relevance": 100,
66+
"rule_identifier": "gfdl-1.3-plus_3.RULE",
67+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gfdl-1.3-plus_3.RULE",
68+
"matched_text": "Permission is granted to copy, distribute and/or modify this document\n under the terms of the GNU Free Documentation License, Version 1.3 or\n any later version published by the Free Software Foundation; with no\n Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.\n\nOn Debian systems, the complete text of the GNU Free Documentation\nLicense may be found in `/usr/share/common-licenses/GFDL'."
69+
}
70+
],
71+
"identifier": "gfdl_1_3_plus-42b93f1a-aad4-0749-d2f0-4655bd194e40"
72+
}
73+
],
74+
"other_license_expression": null,
75+
"other_license_expression_spdx": null,
76+
"other_license_detections": [],
77+
"extracted_license_statement": null,
78+
"notice_text": null,
79+
"source_packages": [],
80+
"file_references": [],
81+
"extra_data": {},
82+
"dependencies": [],
83+
"repository_homepage_url": null,
84+
"repository_download_url": null,
85+
"api_data_url": null,
86+
"datasource_id": "debian_copyright_standalone",
87+
"purl": "pkg:deb/debian/diffutils@3.7-5"
88+
}
89+
]

tests/packagedcode/data/debian/dsc_files/adduser_3.118+deb11u1.dsc.expected.json

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,64 @@
11
{
2-
"packages": [],
2+
"packages": [
3+
{
4+
"type": "deb",
5+
"namespace": "debian",
6+
"name": "adduser",
7+
"version": "3.118+deb11u1",
8+
"qualifiers": {
9+
"architecture": "all"
10+
},
11+
"subpath": null,
12+
"primary_language": null,
13+
"description": null,
14+
"release_date": null,
15+
"parties": [
16+
{
17+
"type": null,
18+
"role": "maintainer",
19+
"name": "Debian Adduser Developers <adduser@packages.debian.org>",
20+
"email": null,
21+
"url": null
22+
}
23+
],
24+
"keywords": [],
25+
"homepage_url": null,
26+
"download_url": null,
27+
"size": null,
28+
"sha1": null,
29+
"md5": null,
30+
"sha256": null,
31+
"sha512": null,
32+
"bug_tracking_url": null,
33+
"code_view_url": null,
34+
"vcs_url": null,
35+
"copyright": null,
36+
"holder": null,
37+
"declared_license_expression": null,
38+
"declared_license_expression_spdx": null,
39+
"license_detections": [],
40+
"other_license_expression": null,
41+
"other_license_expression_spdx": null,
42+
"other_license_detections": [],
43+
"extracted_license_statement": null,
44+
"notice_text": null,
45+
"source_packages": [
46+
"pkg:deb/debian/adduser"
47+
],
48+
"extra_data": {},
49+
"repository_homepage_url": null,
50+
"repository_download_url": null,
51+
"api_data_url": null,
52+
"package_uid": "pkg:deb/debian/adduser@3.118%2Bdeb11u1?architecture=all&uuid=fixed-uid-done-for-testing-5642512d1758",
53+
"datafile_paths": [
54+
"adduser_3.118+deb11u1.dsc"
55+
],
56+
"datasource_ids": [
57+
"debian_source_control_dsc"
58+
],
59+
"purl": "pkg:deb/debian/adduser@3.118%2Bdeb11u1?architecture=all"
60+
}
61+
],
362
"dependencies": [],
463
"files": [
564
{
@@ -9,7 +68,7 @@
968
{
1069
"type": "deb",
1170
"namespace": "debian",
12-
"name": null,
71+
"name": "adduser",
1372
"version": "3.118+deb11u1",
1473
"qualifiers": {
1574
"architecture": "all"
@@ -58,10 +117,12 @@
58117
"repository_download_url": null,
59118
"api_data_url": null,
60119
"datasource_id": "debian_source_control_dsc",
61-
"purl": null
120+
"purl": "pkg:deb/debian/adduser@3.118%2Bdeb11u1?architecture=all"
62121
}
63122
],
64-
"for_packages": [],
123+
"for_packages": [
124+
"pkg:deb/debian/adduser@3.118%2Bdeb11u1?architecture=all&uuid=fixed-uid-done-for-testing-5642512d1758"
125+
],
65126
"scan_errors": []
66127
}
67128
]

0 commit comments

Comments
 (0)