Skip to content

Commit 9862f7a

Browse files
Make license references default
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent c540cdf commit 9862f7a

File tree

3 files changed

+94
-41
lines changed

3 files changed

+94
-41
lines changed

src/formattedcode/output_debian.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from plugincode.output import output_impl
1818
from plugincode.output import OutputPlugin
1919
from licensedcode.detection import get_matches_from_detection_mappings
20-
20+
from licensedcode.plugin_licenses_reference import get_matched_text_from_reference_data
2121
from scancode import notice
2222

2323
"""
@@ -106,7 +106,7 @@ def build_copyright_paragraphs(codebase, **kwargs):
106106
if scanned_file['type'] == 'directory':
107107
continue
108108
dfiles = scanned_file['path']
109-
dlicense = build_license(scanned_file)
109+
dlicense = build_license(codebase, scanned_file)
110110
dcopyright = build_copyright_field(scanned_file)
111111

112112
file_para = CopyrightFilesParagraph.from_dict(dict(
@@ -132,7 +132,7 @@ def build_copyright_field(scanned_file):
132132
return '\n'.join(statements)
133133

134134

135-
def build_license(scanned_file):
135+
def build_license(codebase, scanned_file):
136136
"""
137137
Return Debian-like text where the first line is the expression and the
138138
remaining lines are the license text from licenses detected in
@@ -146,11 +146,11 @@ def build_license(scanned_file):
146146
return
147147

148148
licenses = scanned_file.get('license_detections', [])
149-
text = '\n'.join(get_texts(licenses))
149+
text = '\n'.join(get_texts(codebase, licenses))
150150
return f'{expression}\n{text}'
151151

152152

153-
def get_texts(detected_licenses):
153+
def get_texts(codebase, detected_licenses):
154154
"""
155155
Yield license texts detected in this file.
156156
@@ -179,8 +179,12 @@ def get_texts(detected_licenses):
179179
# set of (start line, end line, matched_rule identifier)
180180
seen = set()
181181
for lic in get_matches_from_detection_mappings(detected_licenses):
182+
matched_text = get_matched_text_from_reference_data(
183+
codebase=codebase,
184+
rule_identifier=lic['rule_identifier']
185+
)
182186
key = lic['start_line'], lic['end_line'], lic['rule_identifier']
183187
if key not in seen:
184-
yield lic['matched_text']
188+
if matched_text != None:
189+
yield matched_text
185190
seen.add(key)
186-

src/formattedcode/output_spdx.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
from spdx.utils import SPDXNone
2424
from spdx.version import Version
2525

26+
from license_expression import Licensing
2627
from commoncode.cliutils import OUTPUT_GROUP
2728
from commoncode.cliutils import PluggableCommandLineOption
2829
from commoncode.fileutils import file_name
2930
from commoncode.fileutils import parent_directory
3031
from commoncode.text import python_safe_name
3132
from formattedcode import FileOptionType
3233
from licensedcode.detection import get_matches_from_detection_mappings
34+
from licensedcode.plugin_licenses_reference import get_matched_text_from_reference_data
3335
from plugincode.output import output_impl
3436
from plugincode.output import OutputPlugin
3537
import scancode_config
@@ -170,6 +172,7 @@ def _process_codebase(
170172
package_name = build_package_name(input_path)
171173

172174
write_spdx(
175+
codebase=codebase,
173176
output_file=output_file,
174177
files=files,
175178
tool_name=tool_name,
@@ -208,6 +211,7 @@ def check_sha1(codebase):
208211

209212

210213
def write_spdx(
214+
codebase,
211215
output_file,
212216
files,
213217
tool_name,
@@ -229,6 +233,10 @@ def write_spdx(
229233
producing this SPDX document.
230234
Use ``package_name`` as a Package name and as a namespace prefix base.
231235
"""
236+
from licensedcode import cache
237+
licenses = cache.get_licenses_db()
238+
licensing = Licensing()
239+
232240
as_rdf = not as_tagvalue
233241
_patch_license_list()
234242

@@ -282,11 +290,20 @@ def write_spdx(
282290
if license_matches:
283291
all_files_have_no_license = False
284292
for match in license_matches:
285-
file_licenses = match["licenses"]
286-
for file_license in file_licenses:
287-
license_key = file_license.get('key')
288-
289-
spdx_id = file_license.get('spdx_license_key')
293+
file_license_expression = match["license_expression"]
294+
file_license_keys = licensing.license_keys(
295+
expression=file_license_expression,
296+
unique=True
297+
)
298+
matched_text = get_matched_text_from_reference_data(
299+
codebase=codebase,
300+
rule_identifier=match["rule_identifier"],
301+
)
302+
for license_key in file_license_keys:
303+
file_license = licenses.get(license_key)
304+
license_key = file_license.key
305+
306+
spdx_id = file_license.spdx_license_key
290307
if not spdx_id:
291308
spdx_id = f'LicenseRef-scancode-{license_key}'
292309
is_license_ref = spdx_id.lower().startswith('licenseref-')
@@ -295,7 +312,7 @@ def write_spdx(
295312
spdx_license = License.from_identifier(spdx_id)
296313
else:
297314
spdx_license = ExtractedLicense(spdx_id)
298-
spdx_license.name = file_license.get('short_name')
315+
spdx_license.name = file_license.short_name
299316
# FIXME: replace this with the licensedb URL
300317
comment = (
301318
f'See details at https://github.com/nexB/scancode-toolkit'

src/licensedcode/plugin_licenses_reference.py

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,33 @@ class LicensesReference(PostScanPlugin):
5151
sort_order = 500
5252

5353
options = [
54-
PluggableCommandLineOption(('--licenses-reference',),
55-
is_flag=True, default=False,
54+
PluggableCommandLineOption(('--no-licenses-reference',),
55+
is_flag=True,
56+
default=True,
5657
help='Include a reference of all the licenses referenced in this '
5758
'scan with the data details and full texts.',
5859
help_group=POST_SCAN_GROUP)
5960
]
6061

61-
def is_enabled(self, licenses_reference, **kwargs):
62-
return licenses_reference
62+
def is_enabled(self, no_licenses_reference, **kwargs):
63+
return no_licenses_reference
6364

64-
def process_codebase(self, codebase, licenses_reference, **kwargs):
65+
def process_codebase(self, codebase, no_licenses_reference, **kwargs):
6566
"""
6667
Get unique License and Rule data from all license detections in a codebase-level
6768
list and only refer to them in the resource level detections.
6869
"""
6970
licexps = []
7071
rules_data = []
7172

73+
if not hasattr(codebase.attributes, 'licenses'):
74+
return
75+
76+
has_packages = False
7277
if hasattr(codebase.attributes, 'packages'):
78+
has_packages = True
79+
80+
if has_packages:
7381
codebase_packages = codebase.attributes.packages
7482
for pkg in codebase_packages:
7583
rules_data.extend(
@@ -78,40 +86,58 @@ def process_codebase(self, codebase, licenses_reference, **kwargs):
7886
)
7987
)
8088
licexps.append(pkg['declared_license_expression'])
89+
90+
# This license rules reference data is duplicate as `licenses` is a
91+
# top level summary of all unique license detections but this function
92+
# is called as the side effect is removing the reference attributes
93+
# from license matches
94+
try:
95+
_discard = get_license_rules_reference_data(codebase.attributes.licenses)
96+
except KeyError:
97+
pass
8198

8299
for resource in codebase.walk():
83100

84101
# Get license_expressions from both package and license detections
85102
license_licexp = getattr(resource, 'detected_license_expression')
86103
if license_licexp:
87104
licexps.append(license_licexp)
88-
package_data = getattr(resource, 'package_data', []) or []
89-
package_licexps = [
90-
pkg['declared_license_expression']
91-
for pkg in package_data
92-
]
93-
licexps.extend(package_licexps)
94-
95-
# Get license matches from both package and license detections
96-
package_license_detections = []
97-
for pkg in package_data:
98-
if not pkg['license_detections']:
99-
continue
100-
101-
package_license_detections.extend(pkg['license_detections'])
102-
103-
rules_data.extend(
104-
get_license_rules_reference_data(license_detections=package_license_detections)
105-
)
105+
106+
if has_packages:
107+
package_data = getattr(resource, 'package_data', []) or []
108+
package_licexps = [
109+
pkg['declared_license_expression']
110+
for pkg in package_data
111+
]
112+
licexps.extend(package_licexps)
113+
114+
# Get license matches from both package and license detections
115+
package_license_detections = []
116+
for pkg in package_data:
117+
if not pkg['license_detections']:
118+
continue
119+
120+
package_license_detections.extend(pkg['license_detections'])
121+
122+
try:
123+
rules_data.extend(
124+
get_license_rules_reference_data(license_detections=package_license_detections)
125+
)
126+
except KeyError:
127+
pass
106128

107129
license_detections = getattr(resource, 'license_detections', []) or []
108130
license_clues = getattr(resource, 'license_clues', []) or []
109-
rules_data.extend(
110-
get_license_rules_reference_data(
111-
license_detections=license_detections,
112-
license_clues=license_clues,
131+
132+
try:
133+
rules_data.extend(
134+
get_license_rules_reference_data(
135+
license_detections=license_detections,
136+
license_clues=license_clues,
137+
)
113138
)
114-
)
139+
except KeyError:
140+
pass
115141

116142
codebase.save_resource(resource)
117143

@@ -122,6 +148,12 @@ def process_codebase(self, codebase, licenses_reference, **kwargs):
122148
codebase.attributes.rule_references.extend(rule_references)
123149

124150

151+
def get_matched_text_from_reference_data(codebase, rule_identifier):
152+
for rule_reference_data in codebase.attributes.rule_references:
153+
if rule_reference_data["rule_identifier"] == rule_identifier:
154+
matched_text = getattr(rule_reference_data, "matched_text", None) or None
155+
return matched_text
156+
125157
def get_license_references(license_expressions, licensing=Licensing()):
126158
"""
127159
Get a list of unique License data from a list of `license_expression` strings.

0 commit comments

Comments
 (0)