Skip to content

Commit 127bd49

Browse files
Resolve license references to files beside package manifest
Reference: #3707 Reference: #3970 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 36a5bc2 commit 127bd49

File tree

21 files changed

+2295
-311
lines changed

21 files changed

+2295
-311
lines changed

src/licensedcode/detection.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"""
4646

4747
TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_DETECTION', False)
48+
TRACE_REFERENCE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE_REFERENCE', False)
4849

4950
TRACE_ANALYSIS = False
5051
TRACE_IS_FUNCTIONS = False
@@ -1829,6 +1830,63 @@ def get_referenced_filenames(license_matches):
18291830
return unique_filenames
18301831

18311832

1833+
def has_resolved_referenced_file(license_matches):
1834+
"""
1835+
Return a list of unique referenced filenames found in the rules of a list of
1836+
``license_matches``
1837+
"""
1838+
match_origin_files = list(set([
1839+
license_match.from_file
1840+
for license_match in license_matches
1841+
]))
1842+
if len(match_origin_files) == 2:
1843+
return True
1844+
else:
1845+
return False
1846+
1847+
1848+
def find_referenced_resource_from_package(referenced_filename, resource, codebase, **kwargs):
1849+
"""
1850+
Return a Resource matching the ``referenced_filename`` path or filename
1851+
given a ``resource`` in ``codebase``.
1852+
1853+
Return None if the ``referenced_filename`` cannot be found in the same
1854+
directory as the base ``resource``, or at the codebase ``root``.
1855+
1856+
``referenced_filename`` is the path or filename referenced in a
1857+
LicenseMatch detected at ``resource``,
1858+
"""
1859+
if not resource:
1860+
return
1861+
1862+
codebase_packages = codebase.attributes.packages
1863+
datafile_paths_by_package_uid = {}
1864+
for package in codebase_packages:
1865+
package_uid = package.get("package_uid")
1866+
datafile_paths = package.get("datafile_paths")
1867+
if package_uid and datafile_paths:
1868+
datafile_paths_by_package_uid[package_uid] = datafile_paths
1869+
1870+
root_path = codebase.root.path
1871+
1872+
for package_uid in resource.for_packages:
1873+
if not package_uid in datafile_paths_by_package_uid:
1874+
continue
1875+
1876+
datafile_paths = datafile_paths_by_package_uid.get(package_uid)
1877+
for path in datafile_paths:
1878+
datafile_path = posixpath.join(root_path, path)
1879+
datafile_resource = codebase.get_resource(path=datafile_path)
1880+
if not datafile_resource or not datafile_resource.parent_path():
1881+
continue
1882+
1883+
parent_path = datafile_resource.parent_path()
1884+
referenced_path = posixpath.join(parent_path, referenced_filename)
1885+
referenced_resource = codebase.get_resource(path=referenced_path)
1886+
if referenced_resource:
1887+
return referenced_resource
1888+
1889+
18321890
def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
18331891
"""
18341892
Return a Resource matching the ``referenced_filename`` path or filename
@@ -1864,6 +1922,135 @@ def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
18641922
return resource
18651923

18661924

1925+
def update_expressions_from_license_detections(resource, codebase):
1926+
1927+
license_expressions = [
1928+
detection["license_expression"]
1929+
for detection in resource.license_detections
1930+
]
1931+
detected_license_expression = combine_expressions(
1932+
expressions=license_expressions,
1933+
relation='AND',
1934+
unique=True,
1935+
licensing=get_cache().licensing)
1936+
if detected_license_expression is not None:
1937+
detected_license_expression = str(detected_license_expression)
1938+
1939+
resource.detected_license_expression = detected_license_expression
1940+
1941+
detected_license_expression_spdx = build_spdx_license_expression(
1942+
license_expression=resource.detected_license_expression,
1943+
licensing=get_cache().licensing)
1944+
1945+
if detected_license_expression_spdx is not None:
1946+
detected_license_expression_spdx = str(detected_license_expression_spdx)
1947+
1948+
resource.detected_license_expression_spdx = detected_license_expression_spdx
1949+
1950+
codebase.save_resource(resource)
1951+
return resource
1952+
1953+
1954+
def update_detection_from_referenced_files(referenced_filenames, license_detection_mapping, resource, codebase, analysis, find_referenced_resource_func):
1955+
1956+
license_detection = LicenseDetectionFromResult.from_license_detection_mapping(
1957+
license_detection_mapping=license_detection_mapping,
1958+
file_path=resource.path,
1959+
)
1960+
license_match_mappings = license_detection_mapping["matches"]
1961+
1962+
referenced_detections = []
1963+
referenced_resources = []
1964+
for referenced_filename in referenced_filenames:
1965+
referenced_resource = find_referenced_resource_func(
1966+
referenced_filename=referenced_filename,
1967+
resource=resource,
1968+
codebase=codebase,
1969+
)
1970+
1971+
if referenced_resource and referenced_resource.license_detections:
1972+
referenced_detections.extend(
1973+
referenced_resource.license_detections
1974+
)
1975+
referenced_resources.append(referenced_resource)
1976+
1977+
# For LicenseMatches with different resources as origin, add the
1978+
# resource path to these matches as origin info
1979+
for detection in referenced_resource.license_detections:
1980+
populate_matches_with_path(
1981+
matches=detection["matches"],
1982+
path=referenced_resource.path
1983+
)
1984+
1985+
if not referenced_detections:
1986+
return False
1987+
1988+
referenced_license_expression = str(combine_expressions(
1989+
expressions=[
1990+
detection["license_expression"]
1991+
for detection in referenced_detections
1992+
],
1993+
relation='AND',
1994+
licensing=get_cache().licensing,
1995+
))
1996+
1997+
if not use_referenced_license_expression(
1998+
referenced_license_expression=referenced_license_expression,
1999+
license_detection=license_detection,
2000+
):
2001+
if TRACE_REFERENCE and referenced_resources:
2002+
paths = [
2003+
resource.path
2004+
for resource in referenced_resource
2005+
]
2006+
logger_debug(
2007+
f'use_referenced_license_expression: False for '
2008+
f'resources: {paths} and '
2009+
f'license_expression: {referenced_license_expression}',
2010+
)
2011+
return False
2012+
2013+
if TRACE_REFERENCE and referenced_resources:
2014+
paths = [
2015+
resource.path
2016+
for resource in referenced_resource
2017+
]
2018+
logger_debug(
2019+
f'use_referenced_license_expression: True for '
2020+
f'resources: {paths} and '
2021+
f'license_expression: {referenced_license_expression}',
2022+
)
2023+
2024+
matches_to_extend = get_matches_from_detection_mappings(
2025+
license_detections=referenced_detections,
2026+
)
2027+
license_match_mappings.extend(matches_to_extend)
2028+
2029+
detection_log, license_expression = get_detected_license_expression(
2030+
license_match_mappings=license_match_mappings,
2031+
analysis=analysis,
2032+
post_scan=True,
2033+
)
2034+
2035+
license_expression_spdx = build_spdx_license_expression(
2036+
license_expression=str(license_expression),
2037+
licensing=get_cache().licensing,
2038+
)
2039+
if license_expression is not None:
2040+
license_expression = str(license_expression)
2041+
if license_expression_spdx is not None:
2042+
license_expression_spdx = str(license_expression_spdx)
2043+
license_detection_mapping["license_expression"] = license_expression
2044+
license_detection_mapping["license_expression_spdx"] = license_expression_spdx
2045+
license_detection_mapping["detection_log"] = detection_log
2046+
license_detection_mapping["identifier"] = get_new_identifier_from_detections(
2047+
initial_detection=license_detection_mapping,
2048+
detections_added=referenced_detections,
2049+
license_expression=license_expression,
2050+
)
2051+
return True
2052+
2053+
18672054
def process_detections(detections, licensing=Licensing()):
18682055
"""
18692056
Yield LicenseDetection objects given a list of LicenseDetection objects

src/licensedcode/plugin_license.py

Lines changed: 18 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,19 @@
1515
from commoncode.cliutils import PluggableCommandLineOption
1616
from commoncode.cliutils import SCAN_GROUP
1717
from commoncode.cliutils import SCAN_OPTIONS_GROUP
18-
from license_expression import combine_expressions
1918
from plugincode.scan import ScanPlugin
2019
from plugincode.scan import scan_impl
2120

22-
from licensedcode.cache import build_spdx_license_expression, get_cache
2321
from licensedcode.detection import collect_license_detections
24-
from licensedcode.detection import populate_matches_with_path
25-
from licensedcode.detection import find_referenced_resource
26-
from licensedcode.detection import get_detected_license_expression
27-
from licensedcode.detection import get_matches_from_detection_mappings
28-
from licensedcode.detection import get_new_identifier_from_detections
2922
from licensedcode.detection import get_referenced_filenames
3023
from licensedcode.detection import DetectionCategory
3124
from licensedcode.detection import LicenseDetectionFromResult
3225
from licensedcode.detection import sort_unique_detections
3326
from licensedcode.detection import UniqueDetection
34-
from licensedcode.detection import use_referenced_license_expression
27+
from licensedcode.detection import update_detection_from_referenced_files
28+
from licensedcode.detection import update_expressions_from_license_detections
29+
from licensedcode.detection import find_referenced_resource
30+
from licensedcode.detection import has_resolved_referenced_file
3531
from scancode.api import SCANCODE_LICENSEDB_URL
3632

3733
TRACE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE', False)
@@ -269,122 +265,35 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
269265
f'add_referenced_license_matches: resource_path: {resource.path}',
270266
)
271267

272-
licensing = get_cache().licensing
273-
274268
for license_detection_mapping in license_detection_mappings:
275269

276270
license_detection = LicenseDetectionFromResult.from_license_detection_mapping(
277271
license_detection_mapping=license_detection_mapping,
278272
file_path=resource.path,
279273
)
280-
license_match_mappings = license_detection_mapping["matches"]
281274
referenced_filenames = get_referenced_filenames(license_detection.matches)
282275

283-
if not referenced_filenames:
284-
if TRACE_REFERENCE:
285-
logger_debug(
286-
f'No references at license detection with expression: {license_detection.license_expression}',
287-
)
288-
continue
289-
290-
referenced_detections = []
291-
for referenced_filename in referenced_filenames:
292-
referenced_resource = find_referenced_resource(
293-
referenced_filename=referenced_filename,
294-
resource=resource,
295-
codebase=codebase,
296-
)
297-
298-
if referenced_resource and referenced_resource.license_detections:
299-
referenced_detections.extend(
300-
referenced_resource.license_detections
301-
)
302-
303-
for detection in referenced_resource.license_detections:
304-
populate_matches_with_path(
305-
matches=detection["matches"],
306-
path=referenced_resource.path
307-
)
308-
309-
referenced_license_expression = combine_expressions(
310-
expressions=[
311-
detection["license_expression"]
312-
for detection in referenced_detections
313-
],
314-
relation='AND',
315-
licensing=licensing,
316-
)
317-
if not use_referenced_license_expression(
318-
referenced_license_expression=referenced_license_expression,
319-
license_detection=license_detection,
320-
):
276+
if not referenced_filenames or has_resolved_referenced_file(license_detection.matches):
321277
if TRACE_REFERENCE:
322278
logger_debug(
323-
f'use_referenced_license_expression: False for '
324-
f'resource: {referenced_resource.path} and '
325-
f'license_expression: {referenced_license_expression}',
279+
f'No references to resolve at license detection with expression: {license_detection.license_expression}',
326280
)
327281
continue
328282

329-
if TRACE_REFERENCE:
330-
logger_debug(
331-
f'use_referenced_license_expression: True for '
332-
f'resource: {referenced_resource.path} and '
333-
f'license_expression: {referenced_license_expression}',
334-
)
335-
336-
modified = True
337-
matches_to_extend = get_matches_from_detection_mappings(
338-
license_detections=referenced_detections
339-
)
340-
license_match_mappings.extend(matches_to_extend)
341-
342-
detection_log, license_expression = get_detected_license_expression(
343-
license_match_mappings=license_match_mappings,
283+
is_modified = update_detection_from_referenced_files(
284+
referenced_filenames=referenced_filenames,
285+
license_detection_mapping=license_detection_mapping,
286+
resource=resource,
287+
codebase=codebase,
344288
analysis=DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value,
345-
post_scan=True,
346-
)
347-
348-
license_expression_spdx = build_spdx_license_expression(
349-
license_expression=str(license_expression),
350-
licensing=licensing,
351-
)
352-
if license_expression is not None:
353-
license_expression = str(license_expression)
354-
if license_expression_spdx is not None:
355-
license_expression_spdx = str(license_expression_spdx)
356-
license_detection_mapping["license_expression"] = license_expression
357-
license_detection_mapping["license_expression_spdx"] = license_expression_spdx
358-
license_detection_mapping["detection_log"] = detection_log
359-
license_detection_mapping["identifier"] = get_new_identifier_from_detections(
360-
initial_detection=license_detection_mapping,
361-
detections_added=referenced_detections,
362-
license_expression=license_expression,
289+
find_referenced_resource_func=find_referenced_resource,
363290
)
291+
if is_modified:
292+
modified = True
364293

365294
if modified:
366-
license_expressions = [
367-
detection["license_expression"]
368-
for detection in resource.license_detections
369-
]
370-
detected_license_expression = combine_expressions(
371-
expressions=license_expressions,
372-
relation='AND',
373-
unique=True,
374-
licensing=licensing)
375-
if detected_license_expression is not None:
376-
detected_license_expression = str(detected_license_expression)
377-
378-
resource.detected_license_expression = detected_license_expression
379-
380-
detected_license_expression_spdx = build_spdx_license_expression(
381-
license_expression=resource.detected_license_expression,
382-
licensing=licensing)
383-
384-
if detected_license_expression_spdx is not None:
385-
detected_license_expression_spdx = str(detected_license_expression_spdx)
386-
387-
resource.detected_license_expression_spdx = detected_license_expression_spdx
388-
389-
codebase.save_resource(resource)
295+
resource = update_expressions_from_license_detections(
296+
resource=resource,
297+
codebase=codebase,
298+
)
390299
return resource

0 commit comments

Comments
 (0)