Skip to content

Commit 4f85513

Browse files
Address review comments and add misc refactors
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 3a97066 commit 4f85513

12 files changed

+466
-78
lines changed

src/licensedcode/cache.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,9 @@ def build_spdx_license_expression(license_expression, licensing=None):
515515
>>> spdx = "MIT OR GPL-2.0-only WITH LicenseRef-scancode-generic-exception"
516516
>>> assert build_spdx_license_expression(exp) == spdx
517517
"""
518+
if not license_expression:
519+
return
520+
518521
if not licensing:
519522
licensing = get_licensing()
520523
validate_spdx_license_keys(license_expression=license_expression, licensing=licensing)

src/licensedcode/detection.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,9 +1730,8 @@ def get_ambiguous_license_detections_by_type(unique_license_detections):
17301730
):
17311731
ambi_license_detections[DetectionCategory.LICENSE_CLUES.value] = detection
17321732

1733-
elif "unknown" in detection.license_expression:
1734-
if has_unknown_matches(license_matches=detection.matches):
1735-
ambi_license_detections[DetectionCategory.UNKNOWN_MATCH.value] = detection
1733+
elif detection.is_unknown:
1734+
ambi_license_detections[DetectionCategory.UNKNOWN_MATCH.value] = detection
17361735

17371736
elif is_match_coverage_less_than_threshold(
17381737
license_matches=detection.matches,
@@ -1876,14 +1875,15 @@ def get_referenced_filenames(license_matches):
18761875

18771876
def has_resolved_referenced_file(license_matches):
18781877
"""
1879-
Return a list of unique referenced filenames found in the rules of a list of
1880-
``license_matches``
1878+
Return True if a list of ``license_matches`` has matches from both the original
1879+
files and the referenced files. This would mean that the license reference is
1880+
resolved successfully.
18811881
"""
18821882
match_origin_files = list(set([
18831883
license_match.from_file
18841884
for license_match in license_matches
18851885
]))
1886-
if len(match_origin_files) == 2:
1886+
if len(match_origin_files) >= 2:
18871887
return True
18881888
else:
18891889
return False
@@ -1894,16 +1894,20 @@ def find_referenced_resource_from_package(referenced_filename, resource, codebas
18941894
Return a Resource matching the ``referenced_filename`` path or filename
18951895
given a ``resource`` in ``codebase``.
18961896
1897+
To find the `referenced_filename` the sibling files are searched beside all the
1898+
package manifest paths, for all the packages which the resource is a part of,
1899+
to resolve references to files in package ecosystem specific locations.
1900+
18971901
Return None if the ``referenced_filename`` cannot be found in the same
18981902
directory as the base ``resource``, or at the codebase ``root``.
18991903
19001904
``referenced_filename`` is the path or filename referenced in a
19011905
LicenseMatch detected at ``resource``,
19021906
"""
1903-
if not resource:
1907+
codebase_packages = codebase.attributes.packages
1908+
if not (resource and codebase_packages):
19041909
return
19051910

1906-
codebase_packages = codebase.attributes.packages
19071911
datafile_paths_by_package_uid = {}
19081912
for package in codebase_packages:
19091913
package_uid = package.get("package_uid")
@@ -1940,6 +1944,9 @@ def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
19401944
Return a Resource matching the ``referenced_filename`` path or filename
19411945
given a ``resource`` in ``codebase``.
19421946
1947+
To find the `referenced_filename` the sibling files of the `resource`
1948+
and files at the `codebase` root are searched.
1949+
19431950
Return None if the ``referenced_filename`` cannot be found in the same
19441951
directory as the base ``resource``, or at the codebase ``root``.
19451952
@@ -1969,7 +1976,14 @@ def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
19691976

19701977

19711978
def update_expressions_from_license_detections(resource, codebase):
1979+
"""
1980+
Set the `detected_license_expression` and `detected_license_expression_spdx`
1981+
for a `resource` from the individual license_expressions of it's detections.
19721982
1983+
This needs to be executed after license detections in a resource are modified,
1984+
for example when detections are updated with license matches from a resolved
1985+
reference to a file.
1986+
"""
19731987
license_expressions = [
19741988
detection["license_expression"]
19751989
for detection in resource.license_detections
@@ -1997,8 +2011,18 @@ def update_expressions_from_license_detections(resource, codebase):
19972011
return resource
19982012

19992013

2000-
def update_detection_from_referenced_files(referenced_filenames, license_detection_mapping, resource, codebase, analysis, find_referenced_resource_func):
2001-
2014+
def update_detection_from_referenced_files(
2015+
referenced_filenames,
2016+
license_detection_mapping,
2017+
resource,
2018+
codebase,
2019+
analysis,
2020+
find_referenced_resource_func,
2021+
):
2022+
"""
2023+
Return True if the `license_detection_mapping` was updated with resolved
2024+
license references to other `referenced_filenames`, or return False otherwise.
2025+
"""
20022026
license_detection = LicenseDetectionFromResult.from_license_detection_mapping(
20032027
license_detection_mapping=license_detection_mapping,
20042028
file_path=resource.path,

src/packagedcode/licensing.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,11 @@ def add_license_from_sibling_file(resource, codebase):
336336

337337

338338
def add_referenced_license_detection_from_package_manifest_siblings(resource, codebase):
339-
339+
"""
340+
Return True if we have references to licenses in other files and we could find and resolve
341+
the references successfully. Resolving the references mean adding licenses matches from
342+
the referenced file and updating the license expressions accordingly.
343+
"""
340344
if TRACE:
341345
logger_debug(f'packagedcode.licensing: add_referenced_license_detection_from_package_manifest_siblings: resource: {resource.path}')
342346

@@ -846,8 +850,8 @@ def get_license_detections_and_expression(
846850
datasource_id=None,
847851
):
848852
"""
849-
Given a text `extracted_license_statement` return a list of LicenseDetection objects.
850-
`extracted_license_statement` is typically found in package manifests.
853+
Return a list of LicenseDetection objects from an `extracted_license_statement`
854+
text. `extracted_license_statement` is typically found in package manifests.
851855
852856
If `try_as_expression` is True try first to parse this as a license
853857
expression using the ``expression_symbols`` mapping of {lowered key:
@@ -908,7 +912,7 @@ def get_license_detections_for_extracted_license_statement(
908912
expression_symbols=None,
909913
):
910914
"""
911-
Return a list of LicenseDetection detected the ``extracted_license_statement`` string.
915+
Return a list of LicenseDetection detected in the ``extracted_license_statement`` string.
912916
"""
913917
if not extracted_license_statement:
914918
return []

src/packagedcode/win_pe.py

Lines changed: 93 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from packagedcode import models
1818
from packagedcode.models import Party
1919
from packagedcode.models import party_org
20+
from cluecode.copyrights import detect_copyrights_from_lines
21+
from cluecode.copyrights import prepare_text_line
2022
from typecode import contenttype
2123

2224
TRACE = False
@@ -237,6 +239,31 @@ def concat(mapping, *keys):
237239
return '\n'.join(values)
238240

239241

242+
243+
def has_license_with_copyright(text):
244+
"""
245+
Return True if the LegalCopyright `text` could have some license
246+
declarations and should be a part of the extracted_license_statement.
247+
"""
248+
copyrights = detect_copyrights_from_lines(
249+
numbered_lines=[tuple([1, text])],
250+
include_copyrights=True,
251+
include_authors=False,
252+
include_holders=False,
253+
include_copyright_years=True,
254+
include_copyright_allrights=True,
255+
)
256+
detections = [detection.to_dict() for detection in copyrights]
257+
if text and not detections:
258+
return True
259+
260+
if detections and "copyright" in detections[0]:
261+
return False
262+
263+
return True
264+
265+
266+
240267
class WindowsExecutableHandler(models.NonAssemblableDatafileHandler):
241268
datasource_id = 'windows_executable'
242269
default_package_type = 'winexe'
@@ -278,66 +305,70 @@ def is_datafile(cls, location, filetypes=tuple()):
278305
@classmethod
279306
def parse(cls, location, package_only=False):
280307
infos = pe_info(location)
308+
yield get_package_data_from_pe_info(infos, package_only)
309+
281310

282-
version = get_first(
283-
infos,
284-
'Full Version',
285-
'ProductVersion',
286-
'FileVersion',
287-
'Assembly Version',
288-
)
289-
release_date = get_first(infos, 'BuildDate')
290-
if release_date:
291-
if len(release_date) >= 10:
292-
release_date = release_date[:10]
293-
release_date = release_date.replace('/', '-')
294-
295-
name = get_first(
296-
infos,
297-
'ProductName',
298-
'OriginalFilename',
299-
'InternalName',
300-
)
301-
copyr = get_first(infos, 'LegalCopyright')
302-
303-
LegalCopyright = copyr,
304-
305-
LegalTrademarks = concat(
306-
infos,
307-
'LegalTrademarks',
308-
'LegalTrademarks1',
309-
'LegalTrademarks2',
310-
'LegalTrademarks3')
311-
312-
License = get_first(infos, 'License')
313-
314-
extracted_license_statement = None
315-
if LegalTrademarks or License:
316-
extracted_license_statement = {}
317-
if LegalTrademarks and LegalTrademarks != '':
318-
extracted_license_statement['LegalTrademarks'] = LegalTrademarks
319-
if License:
320-
extracted_license_statement['License'] = License
321-
322-
description = concat(infos, 'FileDescription', 'Comments')
323-
324-
parties = []
325-
cname = get_first(infos, 'CompanyName', 'Company')
326-
327-
if cname:
328-
parties = [Party(type=party_org, role='author', name=cname)]
329-
homepage_url = get_first(infos, 'URL', 'WWW')
330-
331-
package_data = dict(
332-
datasource_id=cls.datasource_id,
333-
type=cls.default_package_type,
334-
name=name,
335-
version=version,
336-
release_date=release_date,
337-
copyright=copyr,
338-
extracted_license_statement=extracted_license_statement,
339-
description=description,
340-
parties=parties,
341-
homepage_url=homepage_url,
342-
)
343-
yield models.PackageData.from_data(package_data, package_only)
311+
def get_package_data_from_pe_info(infos, package_only=False):
312+
313+
version = get_first(
314+
infos,
315+
'Full Version',
316+
'ProductVersion',
317+
'FileVersion',
318+
'Assembly Version',
319+
)
320+
release_date = get_first(infos, 'BuildDate')
321+
if release_date:
322+
if len(release_date) >= 10:
323+
release_date = release_date[:10]
324+
release_date = release_date.replace('/', '-')
325+
326+
name = get_first(
327+
infos,
328+
'ProductName',
329+
'OriginalFilename',
330+
'InternalName',
331+
)
332+
333+
LegalCopyright = get_first(infos, 'LegalCopyright')
334+
copyr_has_license = LegalCopyright and has_license_with_copyright(LegalCopyright)
335+
LegalTrademarks = concat(
336+
infos,
337+
'LegalTrademarks',
338+
'LegalTrademarks1',
339+
'LegalTrademarks2',
340+
'LegalTrademarks3')
341+
License = get_first(infos, 'License')
342+
343+
extracted_license_statement = None
344+
if copyr_has_license or LegalTrademarks or License:
345+
extracted_license_statement = {}
346+
if copyr_has_license:
347+
extracted_license_statement['LegalCopyright'] = LegalCopyright
348+
if LegalTrademarks and LegalTrademarks != '':
349+
extracted_license_statement['LegalTrademarks'] = LegalTrademarks
350+
if License:
351+
extracted_license_statement['License'] = License
352+
353+
description = concat(infos, 'FileDescription', 'Comments')
354+
355+
parties = []
356+
cname = get_first(infos, 'CompanyName', 'Company')
357+
358+
if cname:
359+
parties = [Party(type=party_org, role='author', name=cname)]
360+
homepage_url = get_first(infos, 'URL', 'WWW')
361+
362+
package_data = dict(
363+
datasource_id=WindowsExecutableHandler.datasource_id,
364+
type=WindowsExecutableHandler.default_package_type,
365+
name=name,
366+
version=version,
367+
release_date=release_date,
368+
copyright=LegalCopyright,
369+
extracted_license_statement=extracted_license_statement,
370+
description=description,
371+
parties=parties,
372+
homepage_url=homepage_url,
373+
)
374+
return models.PackageData.from_data(package_data, package_only)

src/summarycode/classify.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ def get_relative_path(root_path, path):
9494
'readme',
9595
)
9696

97+
# Community files are usually files used for FOSS project and community
98+
# maintainence purposes. We want to detect these as in the context of
99+
# licenses as these files don't have interesting license detections, or
100+
# license detection issues are not important to review for these files.
101+
# this is similar to `key` files, which also has a lot of community info
102+
# but there the license declarations are extremely important as they have
103+
# information on the primary (or even secondary) licenses for the package
97104
COMMUNITY_FILES = (
98105
'CHANGELOG',
99106
'ROADMAP',
@@ -129,10 +136,16 @@ def check_is_community_file(filename):
129136

130137

131138
def check_is_resource_community_file(resource):
139+
"""
140+
Return True if the `resource` is a community file.
141+
"""
132142
return check_is_community_file(resource.name) or check_is_community_file(resource.base_name)
133143

134144

135145
def check_is_path_community_file(path):
146+
"""
147+
Return True if the file at `path` is a community file.
148+
"""
136149
name = file_name(path, force_posix=True)
137150
base_name = file_base_name(path, force_posix=True)
138151
return check_is_community_file(name) or check_is_community_file(base_name)
@@ -141,7 +154,7 @@ def check_is_path_community_file(path):
141154
def check_resource_name_start_and_end(resource, STARTS_ENDS):
142155
"""
143156
Return True if `resource.name` or `resource.base_name` begins or ends with
144-
an element of `STARTS_ENDS`
157+
an element of `STARTS_ENDS`.
145158
"""
146159
name = resource.name.lower()
147160
base_name = resource.base_name.lower()
@@ -159,7 +172,7 @@ def set_classification_flags(resource,
159172
_README=README_STARTS_ENDS,
160173
):
161174
"""
162-
Set classification flags on the `resource` Resource
175+
Set classification flags on the `resource` Resource.
163176
"""
164177
path = resource.path.lower()
165178

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"Full Version": null,
3+
"ProductVersion": "1.0.7.0",
4+
"FileVersion": "1.0.7.0",
5+
"Assembly Version": null,
6+
"BuildDate": null,
7+
"ProductName": "DockerPull",
8+
"OriginalFilename": "DockerPull.exe",
9+
"InternalName": "DockerPull",
10+
"License": null,
11+
"LegalCopyright": "MIT License | GitHub: https://github.com/topcss/docker-pull-tar",
12+
"LegalTrademarks": null,
13+
"LegalTrademarks1": null,
14+
"LegalTrademarks2": null,
15+
"LegalTrademarks3": null,
16+
"FileDescription": "Docker Image Puller \u65e0\u9700\u5b89\u88c5 Docker \u6216 Python \u73af\u5883,\u76f4\u63a5\u4ece Docker \u4ed3\u5e93\u62c9\u53d6\u955c\u50cf,\u652f\u6301\u56fd\u5185\u955c\u50cf\u6e90\u52a0\u901f\u548c\u591a\u67b6\u6784\u652f\u6301\u3002\u9879\u76ee\u5730\u5740:https://github.com/topcss/docker-pull-tar",
17+
"Comments": null,
18+
"CompanyName": "topcss",
19+
"Company": null,
20+
"URL": null,
21+
"WWW": null,
22+
"extra_data": {}
23+
}

0 commit comments

Comments
 (0)