Skip to content

Commit 1ff605e

Browse files
committed
Improve alpine license tests debuggability
Provide details on each step of the Alpine expression cleanups Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 16c0e65 commit 1ff605e

File tree

2 files changed

+70
-28
lines changed

2 files changed

+70
-28
lines changed

src/packagedcode/alpine.py

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99
import base64
1010
import codecs
11+
import dataclasses
1112
import email
1213
import posixpath
1314
import re
@@ -1362,32 +1363,76 @@ def source_handler(value, **kwargs):
13621363
}
13631364

13641365

1365-
def detect_declared_license(declared):
1366+
@dataclasses.dataclass
1367+
class ApkLicenseDetection:
1368+
"""
1369+
Represent the results of an Alpine license detection, including intermediate steps.
13661370
"""
1367-
Return a tuple of (cleaned declared license, detected license expression)
1368-
strings from a ``declared`` license text. Both can be None.
1371+
declared_license: str
1372+
cleaned_license: str
1373+
mapped_license: str
1374+
license_detections: list
1375+
license_expression: str
1376+
1377+
def to_dict(self):
1378+
return dict(
1379+
declared_license=self.declared_license ,
1380+
cleaned_license=self.cleaned_license ,
1381+
mapped_license=self.mapped_license ,
1382+
license_detections=self.license_detections ,
1383+
license_expression=self.license_expression ,
1384+
)
1385+
1386+
1387+
def get_alpine_license_detection(declared):
1388+
"""
1389+
Return an ApkLicenseDetection from a ``declared`` license text
13691390
"""
13701391
# cleaning first to fix syntax quirks and try to get something we can parse
1371-
cleaned = normalize_and_cleanup_declared_license(declared)
1372-
if not cleaned:
1373-
return None, None
1392+
cleaned_license = normalize_and_cleanup_declared_license(declared)
1393+
if not cleaned_license:
1394+
return None
13741395

1375-
# then we apply mappings for known non-standard symbols
1396+
# then we apply mappings for known non-standard symbols.
13761397
# the output should be a proper SPDX expression
1377-
mapped = apply_expressions_mapping(cleaned)
1398+
mapped_license = apply_expressions_mapping(cleaned_license)
13781399

13791400
# Finally perform SPDX expressions detection: Alpine uses mostly SPDX, but
13801401
# with some quirks such as some non standard symbols (in addition to the
13811402
# non-standard syntax)
13821403
extra_licenses = {}
13831404
expression_symbols = get_license_symbols(extra_licenses=extra_licenses)
13841405

1385-
license_detections, detected_license_expression = get_license_detections_and_expression(
1386-
extracted_license_statement=mapped,
1406+
license_detections, license_expression = get_license_detections_and_expression(
1407+
extracted_license_statement=mapped_license,
13871408
expression_symbols=expression_symbols,
13881409
)
13891410

1390-
return cleaned, detected_license_expression, license_detections
1411+
return ApkLicenseDetection(
1412+
declared_license=declared,
1413+
cleaned_license=cleaned_license,
1414+
mapped_license=mapped_license,
1415+
license_expression=license_expression,
1416+
license_detections=license_detections,
1417+
)
1418+
1419+
1420+
def detect_declared_license(declared):
1421+
"""
1422+
Return a three-tuple of detected license data from a ``declared`` license text, with this shape:
1423+
(cleaned declared license, detected license expression, license_detections)
1424+
- cleaned declared license and detected license expression are strings.
1425+
- license_detections is a list of LicenseDetection.
1426+
- Any of these can be None.
1427+
"""
1428+
if alpine_detection := get_alpine_license_detection(declared):
1429+
return (
1430+
alpine_detection.cleaned_license,
1431+
alpine_detection.license_expression,
1432+
alpine_detection.license_detections,
1433+
)
1434+
else:
1435+
return None, None, None
13911436

13921437

13931438
def get_license_symbols(extra_licenses):
@@ -1416,25 +1461,19 @@ def get_license_symbols(extra_licenses):
14161461
def normalize_and_cleanup_declared_license(declared):
14171462
"""
14181463
Return a cleaned and normalized declared license.
1419-
1420-
The expression should be valida SPDX but are far from this in practice.
1421-
1464+
The expressions should be valid SPDX license expressions but they are far from this in practice.
14221465
Several fixes are applied:
1423-
14241466
- plain text replacemnet aka. syntax fixes are plain text replacements
14251467
to make the expression parsable
1426-
14271468
- common fixes includes also nadling space-separated and comma-separated
14281469
lists of licenses
14291470
"""
14301471
declared = declared or ''
14311472

1432-
# normalize spaces
1473+
# normalize spaces and case
14331474
declared = ' '.join(declared.split())
1434-
14351475
declared = declared.lower()
14361476

1437-
# performa replacements
14381477
declared = apply_syntax_fixes(declared)
14391478

14401479
# comma-separated as in gpl-2.0+, lgpl-2.1+, zlib
@@ -1516,15 +1555,15 @@ def normalize_and_cleanup_declared_license(declared):
15161555

15171556
def apply_syntax_fixes(s):
15181557
"""
1519-
Fix the expression string s by aplying replacement for various quirks.
1558+
Fix the expression string ``s`` by aplying replacement for various quirks to get clean license
1559+
expression syntax.
15201560
"""
15211561
for src, tgt in EXPRESSION_SYNTAX_FIXES.items():
15221562
s = s.replace(src, tgt)
15231563
return s
15241564

1525-
# These are parsed expression objects replacement that make the expression SPDX compliant
1526-
15271565

1566+
# These are parsed expression objects replacement that make the expression SPDX compliant
15281567
# {alpine sub-expression: SPDX subexpression}
15291568
DECLARED_TO_SPDX = {
15301569
'openssl-exception': 'licenseref-scancode-generic-exception',

tests/packagedcode/test_alpine_license.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from packagedcode import alpine
2626
from scancode_config import REGEN_TEST_FIXTURES
2727

28-
2928
"""
3029
Data-driven tests using tests and expectations stored in YAML files.
3130
Test functions are attached to test classes at module import time
@@ -53,8 +52,7 @@ def from_file(cls, data_file):
5352
data = saneyaml.load(df.read())
5453
data['data_file'] = data_file
5554
alptest = cls(**data)
56-
alptest.license_expression = cls.licensing.parse(
57-
alptest.license_expression).render()
55+
alptest.license_expression = cls.licensing.parse(alptest.license_expression).render()
5856
return alptest
5957

6058
def to_dict(self):
@@ -110,14 +108,19 @@ def make_test(license_test, regen=REGEN_TEST_FIXTURES):
110108

111109
def closure_test_function(*args, **kwargs):
112110
declared = license_test.declared_license
113-
_cleaned, detected, _license_detections = alpine.detect_declared_license(declared)
111+
detection = alpine.get_alpine_license_detection(declared)
114112

115113
if regen:
116-
license_test.license_expression = detected
114+
license_test.license_expression = detection.license_expression
117115
license_test.dump()
118116
return
119117

120-
assert detected == license_test.license_expression
118+
if detection.license_expression != license_test.license_expression:
119+
assert (
120+
(detection.license_expression, list(detection.to_dict().items()))
121+
==
122+
(license_test.license_expression, list(license_test.to_dict().items()))
123+
)
121124

122125
return closure_test_function
123126

0 commit comments

Comments
 (0)