Skip to content

Commit c7e503c

Browse files
authored
Merge pull request #3154 from nexB/license-detection-follow-up
More License Detection changes
2 parents e72110b + 279c305 commit c7e503c

File tree

139 files changed

+30875
-35542
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+30875
-35542
lines changed

docs/source/explanations/license-detection-reference.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ After::
528528
"text": "Apache License\nVersion 2.0, {Truncated text}"
529529
}
530530
],
531-
"rule_references": [
531+
"license_rule_references": [
532532
{
533533
"license_expression": "apache-2.0",
534534
"rule_identifier": "apache-2.0_65.RULE",

setup-mini.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@ scancode_post_scan =
189189
is-license-text = licensedcode.plugin_license_text:IsLicenseText
190190
filter-clues = cluecode.plugin_filter_clues:RedundantCluesFilter
191191
consolidate = summarycode.plugin_consolidate:Consolidator
192-
licenses-reference = licensedcode.plugin_licenses_reference:LicensesReference
193192

194193

195194
# scancode_output_filter is the entry point for filter plugins executed after

setup.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ scancode_post_scan =
190190
is-license-text = licensedcode.plugin_license_text:IsLicenseText
191191
filter-clues = cluecode.plugin_filter_clues:RedundantCluesFilter
192192
consolidate = summarycode.plugin_consolidate:Consolidator
193-
licenses-reference = licensedcode.plugin_licenses_reference:LicensesReference
194193

195194

196195
# scancode_output_filter is the entry point for filter plugins executed after

src/cluecode/plugin_copyright.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class CopyrightScanner(ScanPlugin):
2828
('authors',attr.ib(default=attr.Factory(list))),
2929
])
3030

31-
sort_order = 4
31+
sort_order = 6
3232

3333
options = [
3434
PluggableCommandLineOption(('-c', '--copyright',),

src/cluecode/plugin_email.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class EmailScanner(ScanPlugin):
2525
"""
2626
resource_attributes = dict(emails=attr.ib(default=attr.Factory(list)))
2727

28-
sort_order = 8
28+
sort_order = 7
2929

3030
options = [
3131
PluggableCommandLineOption(('-e', '--email',),

src/cluecode/plugin_filter_clues.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,8 @@ def process_codebase(self, codebase, **kwargs):
6363

6464
from licensedcode.cache import get_index
6565

66-
rules_by_id = {r.identifier: r for r in get_index().rules_by_rid}
67-
6866
for resource in codebase.walk():
69-
filtered = filter_ignorable_resource_clues(resource, rules_by_id)
67+
filtered = filter_ignorable_resource_clues(resource, get_index().rules_by_id)
7068
if filtered:
7169
filtered.save(codebase)
7270

src/cluecode/plugin_url.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class UrlScanner(ScanPlugin):
2626

2727
resource_attributes = dict(urls=attr.ib(default=attr.Factory(list)))
2828

29-
sort_order = 10
29+
sort_order = 8
3030

3131
options = [
3232
PluggableCommandLineOption(('-u', '--url',),

src/formattedcode/output_debian.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from plugincode.output import output_impl
1818
from plugincode.output import OutputPlugin
1919
from licensedcode.detection import get_matches_from_detection_mappings
20-
20+
from licensedcode.licenses_reference import get_matched_text_from_reference_data
2121
from scancode import notice
2222

2323
"""
@@ -106,7 +106,7 @@ def build_copyright_paragraphs(codebase, **kwargs):
106106
if scanned_file['type'] == 'directory':
107107
continue
108108
dfiles = scanned_file['path']
109-
dlicense = build_license(scanned_file)
109+
dlicense = build_license(codebase, scanned_file)
110110
dcopyright = build_copyright_field(scanned_file)
111111

112112
file_para = CopyrightFilesParagraph.from_dict(dict(
@@ -132,7 +132,7 @@ def build_copyright_field(scanned_file):
132132
return '\n'.join(statements)
133133

134134

135-
def build_license(scanned_file):
135+
def build_license(codebase, scanned_file):
136136
"""
137137
Return Debian-like text where the first line is the expression and the
138138
remaining lines are the license text from licenses detected in
@@ -146,11 +146,11 @@ def build_license(scanned_file):
146146
return
147147

148148
licenses = scanned_file.get('license_detections', [])
149-
text = '\n'.join(get_texts(licenses))
149+
text = '\n'.join(get_texts(codebase, licenses))
150150
return f'{expression}\n{text}'
151151

152152

153-
def get_texts(detected_licenses):
153+
def get_texts(codebase, detected_licenses):
154154
"""
155155
Yield license texts detected in this file.
156156
@@ -179,8 +179,12 @@ def get_texts(detected_licenses):
179179
# set of (start line, end line, matched_rule identifier)
180180
seen = set()
181181
for lic in get_matches_from_detection_mappings(detected_licenses):
182+
matched_text = get_matched_text_from_reference_data(
183+
codebase=codebase,
184+
rule_identifier=lic['rule_identifier']
185+
)
182186
key = lic['start_line'], lic['end_line'], lic['rule_identifier']
183187
if key not in seen:
184-
yield lic['matched_text']
188+
if matched_text != None:
189+
yield matched_text
185190
seen.add(key)
186-

src/formattedcode/output_spdx.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
from spdx.utils import SPDXNone
2424
from spdx.version import Version
2525

26+
from license_expression import Licensing
2627
from commoncode.cliutils import OUTPUT_GROUP
2728
from commoncode.cliutils import PluggableCommandLineOption
2829
from commoncode.fileutils import file_name
2930
from commoncode.fileutils import parent_directory
3031
from commoncode.text import python_safe_name
3132
from formattedcode import FileOptionType
3233
from licensedcode.detection import get_matches_from_detection_mappings
34+
from licensedcode.licenses_reference import get_matched_text_from_reference_data
3335
from plugincode.output import output_impl
3436
from plugincode.output import OutputPlugin
3537
import scancode_config
@@ -170,6 +172,7 @@ def _process_codebase(
170172
package_name = build_package_name(input_path)
171173

172174
write_spdx(
175+
codebase=codebase,
173176
output_file=output_file,
174177
files=files,
175178
tool_name=tool_name,
@@ -208,6 +211,7 @@ def check_sha1(codebase):
208211

209212

210213
def write_spdx(
214+
codebase,
211215
output_file,
212216
files,
213217
tool_name,
@@ -229,6 +233,10 @@ def write_spdx(
229233
producing this SPDX document.
230234
Use ``package_name`` as a Package name and as a namespace prefix base.
231235
"""
236+
from licensedcode import cache
237+
licenses = cache.get_licenses_db()
238+
licensing = Licensing()
239+
232240
as_rdf = not as_tagvalue
233241
_patch_license_list()
234242

@@ -282,11 +290,20 @@ def write_spdx(
282290
if license_matches:
283291
all_files_have_no_license = False
284292
for match in license_matches:
285-
file_licenses = match["licenses"]
286-
for file_license in file_licenses:
287-
license_key = file_license.get('key')
288-
289-
spdx_id = file_license.get('spdx_license_key')
293+
file_license_expression = match["license_expression"]
294+
file_license_keys = licensing.license_keys(
295+
expression=file_license_expression,
296+
unique=True
297+
)
298+
matched_text = get_matched_text_from_reference_data(
299+
codebase=codebase,
300+
rule_identifier=match["rule_identifier"],
301+
)
302+
for license_key in file_license_keys:
303+
file_license = licenses.get(license_key)
304+
license_key = file_license.key
305+
306+
spdx_id = file_license.spdx_license_key
290307
if not spdx_id:
291308
spdx_id = f'LicenseRef-scancode-{license_key}'
292309
is_license_ref = spdx_id.lower().startswith('licenseref-')
@@ -295,7 +312,7 @@ def write_spdx(
295312
spdx_license = License.from_identifier(spdx_id)
296313
else:
297314
spdx_license = ExtractedLicense(spdx_id)
298-
spdx_license.name = file_license.get('short_name')
315+
spdx_license.name = file_license.short_name
299316
# FIXME: replace this with the licensedb URL
300317
comment = (
301318
f'See details at https://github.com/nexB/scancode-toolkit'

0 commit comments

Comments
 (0)