Skip to content

Commit 1af3877

Browse files
Refine referenced_filenames in license rules
Reference: #3547 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 2ddb31c commit 1af3877

File tree

4 files changed

+180
-8
lines changed

4 files changed

+180
-8
lines changed

src/licensedcode/detection.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,25 @@ def identifier_with_expression(self):
333333
id_safe_expression = python_safe_name(s=str(self.license_expression))
334334
return "{}-{}".format(id_safe_expression, self._identifier)
335335

336+
@property
337+
def is_unknown(self):
338+
"""
339+
Return True if there are unknown license keys in the license expression
340+
for this detection, return False otherwise.
341+
"""
342+
unknown_license_keys = [
343+
"unknown-license-reference",
344+
"unknown-spdx",
345+
"unknown",
346+
"free-unknown"
347+
]
348+
349+
for license_key in unknown_license_keys:
350+
if license_key in self.license_expression:
351+
return True
352+
353+
return False
354+
336355
def get_start_end_line(self):
337356
"""
338357
Return start and end line for a license detection issue, from the
@@ -1355,6 +1374,83 @@ def has_references_to_local_files(license_matches):
13551374
)
13561375

13571376

1377+
def use_referenced_license_expression(referenced_license_expression, license_detection, licensing=Licensing()):
1378+
"""
1379+
Return True if the `license_detection` LicenseDetection object should
1380+
include the referenced LicenseMatch objects (the `referenced_license_expression`
1381+
LicenseExpression string is the combined License Expression for these matches)
1382+
that it references, otherwise if return False if the LicenseDetection object
1383+
should remain intact.
1384+
"""
1385+
if not referenced_license_expression or not license_detection:
1386+
return False
1387+
1388+
# We should always include referenced license matches to resolve an unknown
1389+
# license reference
1390+
if license_detection.is_unknown:
1391+
return True
1392+
1393+
# We should always include referenced license matches when the license
1394+
# expression from the referenced license matches match the license
1395+
# expression for the detection
1396+
if referenced_license_expression == license_detection.license_expression:
1397+
return True
1398+
1399+
# Here for a key-value pair, the license texts for a value (for example `gpl`)
1400+
# is often included in the license text of the key (for example `lgpl`)
1401+
dependent_license_keys = {
1402+
"lgpl": "gpl",
1403+
"agpl": "gpl",
1404+
}
1405+
1406+
# The license keys which contatin these have `or-later` licenses
1407+
license_keys_with_or_later = [
1408+
"gpl", "lgpl", "agpl"
1409+
]
1410+
1411+
license_keys = set(
1412+
licensing.license_keys(expression=license_detection.license_expression)
1413+
)
1414+
referenced_license_keys = set(
1415+
licensing.license_keys(expression=referenced_license_expression)
1416+
)
1417+
same_expression = referenced_license_expression == license_detection.license_expression
1418+
same_license_keys = license_keys == referenced_license_keys
1419+
1420+
if same_license_keys and not same_expression:
1421+
return False
1422+
1423+
for primary_key, dependent_key in dependent_license_keys.items():
1424+
dependent_key_only_in_referenced = dependent_key in referenced_license_keys and dependent_key not in license_keys
1425+
if primary_key in license_keys and dependent_key_only_in_referenced:
1426+
return False
1427+
1428+
all_license_keys_special = [
1429+
key
1430+
for key in license_keys
1431+
if all([
1432+
key.startswith(reference_key)
1433+
for reference_key in license_keys_with_or_later
1434+
])
1435+
]
1436+
all_referenced_license_keys_special = [
1437+
key
1438+
for key in referenced_license_keys
1439+
if all([
1440+
key.startswith(reference_key)
1441+
for reference_key in license_keys_with_or_later
1442+
])
1443+
]
1444+
1445+
if all_license_keys_special and all_referenced_license_keys_special and not same_license_keys:
1446+
True
1447+
1448+
if len(referenced_license_keys) > 5:
1449+
return False
1450+
1451+
return True
1452+
1453+
13581454
def get_detected_license_expression(
13591455
analysis,
13601456
license_matches=None,

src/licensedcode/plugin_license.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from commoncode.cliutils import PluggableCommandLineOption
1616
from commoncode.cliutils import SCAN_GROUP
1717
from commoncode.cliutils import SCAN_OPTIONS_GROUP
18+
from license_expression import combine_expressions
1819
from plugincode.scan import ScanPlugin
1920
from plugincode.scan import scan_impl
2021

@@ -30,10 +31,12 @@
3031
from licensedcode.detection import LicenseDetectionFromResult
3132
from licensedcode.detection import sort_unique_detections
3233
from licensedcode.detection import UniqueDetection
34+
from licensedcode.detection import use_referenced_license_expression
3335
from packagedcode.utils import combine_expressions
3436
from scancode.api import SCANCODE_LICENSEDB_URL
3537

3638
TRACE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE', False)
39+
TRACE_REFERENCE = os.environ.get('SCANCODE_DEBUG_PLUGIN_LICENSE_REFERENCE', False)
3740

3841

3942
def logger_debug(*args):
@@ -259,6 +262,11 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
259262

260263
modified = False
261264

265+
if TRACE_REFERENCE:
266+
logger_debug(
267+
f'add_referenced_license_matches: resource_path: {resource.path}',
268+
)
269+
262270
for license_detection_mapping in license_detection_mappings:
263271

264272
license_detection = LicenseDetectionFromResult.from_license_detection_mapping(
@@ -271,6 +279,10 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
271279
referenced_filenames = get_referenced_filenames(license_detection.matches)
272280

273281
if not referenced_filenames:
282+
if TRACE_REFERENCE:
283+
logger_debug(
284+
f'No references at license detection with expression: {license_detection.license_expression}',
285+
)
274286
continue
275287

276288
for referenced_filename in referenced_filenames:
@@ -281,6 +293,31 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
281293
)
282294

283295
if referenced_resource and referenced_resource.license_detections:
296+
referenced_license_expression = combine_expressions(
297+
expressions=[
298+
detection["license_expression"]
299+
for detection in referenced_resource.license_detections
300+
],
301+
)
302+
if not use_referenced_license_expression(
303+
referenced_license_expression=referenced_license_expression,
304+
license_detection=license_detection,
305+
):
306+
if TRACE_REFERENCE:
307+
logger_debug(
308+
f'use_referenced_license_expression: False for '
309+
f'resource: {referenced_resource.path} and '
310+
f'license_expression: {referenced_license_expression}',
311+
)
312+
continue
313+
314+
if TRACE_REFERENCE:
315+
logger_debug(
316+
f'use_referenced_license_expression: True for '
317+
f'resource: {referenced_resource.path} and '
318+
f'license_expression: {referenced_license_expression}',
319+
)
320+
284321
modified = True
285322
detection_modified = True
286323
detections_added.extend(referenced_resource.license_detections)
@@ -301,6 +338,7 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
301338
analysis=DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value,
302339
post_scan=True,
303340
)
341+
304342
license_expression_spdx = build_spdx_license_expression(
305343
license_expression=str(license_expression),
306344
licensing=get_cache().licensing,

src/packagedcode/licensing.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from licensedcode.detection import detect_licenses
2727
from licensedcode.detection import LicenseDetectionFromResult
2828
from licensedcode.detection import populate_matches_with_path
29+
from licensedcode.detection import use_referenced_license_expression
2930
from licensedcode.spans import Span
3031
from licensedcode import query
3132

@@ -93,6 +94,7 @@ def add_referenced_license_matches_for_package(resource, codebase):
9394
file_path=resource.path,
9495
)
9596

97+
detections_added = []
9698
detection_modified = False
9799
license_match_mappings = license_detection_mapping["matches"]
98100
referenced_filenames = get_referenced_filenames(license_detection_object.matches)
@@ -106,16 +108,24 @@ def add_referenced_license_matches_for_package(resource, codebase):
106108
codebase=codebase,
107109
)
108110

109-
if not referenced_resource:
110-
continue
111-
112-
referenced_license_detections = referenced_resource.license_detections
111+
if referenced_resource and referenced_resource.license_detections:
112+
referenced_license_expression = combine_expressions(
113+
expressions=[
114+
detection["license_expression"]
115+
for detection in referenced_resource.license_detections
116+
],
117+
)
118+
if not use_referenced_license_expression(
119+
referenced_license_expression=referenced_license_expression,
120+
license_detection=license_detection_object,
121+
):
122+
continue
113123

114-
if referenced_license_detections:
115124
modified = True
116125
detection_modified = True
126+
detections_added.extend(referenced_resource.license_detections)
117127
matches_to_extend = get_matches_from_detection_mappings(
118-
license_detections=referenced_license_detections
128+
license_detections=referenced_resource.license_detections
119129
)
120130
# For LicenseMatches with different resources as origin, add the
121131
# resource path to these matches as origin info
@@ -142,7 +152,7 @@ def add_referenced_license_matches_for_package(resource, codebase):
142152
license_detection_mapping["detection_log"] = detection_log
143153
license_detection_mapping["identifier"] = get_new_identifier_from_detections(
144154
initial_detection=license_detection_mapping,
145-
detections_added=referenced_license_detections,
155+
detections_added=detections_added,
146156
license_expression=license_expression,
147157
)
148158

@@ -223,7 +233,20 @@ def add_referenced_license_detection_from_package(resource, codebase):
223233
f'sibling_license_detections: {sibling_license_detections}'
224234
)
225235

236+
referenced_license_expression = combine_expressions(
237+
expressions=[
238+
detection["license_expression"]
239+
for detection in sibling_license_detections
240+
],
241+
)
242+
if not use_referenced_license_expression(
243+
referenced_license_expression=referenced_license_expression,
244+
license_detection=license_detection_object,
245+
):
246+
continue
247+
226248
for sibling_detection in sibling_license_detections:
249+
227250
modified = True
228251
detection_modified = True
229252
license_match_mappings.extend(sibling_detection["matches"])
@@ -239,6 +262,21 @@ def add_referenced_license_detection_from_package(resource, codebase):
239262
break
240263

241264
pkg_detections = codebase_package["license_detections"]
265+
if not pkg_detections:
266+
continue
267+
268+
referenced_license_expression = combine_expressions(
269+
expressions=[
270+
detection["license_expression"]
271+
for detection in pkg_detections
272+
],
273+
)
274+
if not use_referenced_license_expression(
275+
referenced_license_expression=referenced_license_expression,
276+
license_detection=license_detection_object,
277+
):
278+
continue
279+
242280
for pkg_detection in pkg_detections:
243281
modified = True
244282
detection_modified = True

tests/packagedcode/test_license_detection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_license_reference_detection_in_manifest_unknown():
3838
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)
3939

4040

41-
def test_license_reference_detection_in_manifest_known():
41+
def test_license_reference_detection_in_manifest_known_with_license():
4242
test_dir = test_env.get_test_loc('license_detection/reference-at-manifest/nanopb/', copy=True)
4343
result_file = test_env.get_temp_file('json')
4444
args = [

0 commit comments

Comments
 (0)