Skip to content

Commit feea3fa

Browse files
committed
Add new convenience tool to debug licenses
This is a minimal CSV output for key license detection data that lives in the etc directory and can be intalled via pip locally Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 89ed9cb commit feea3fa

File tree

5 files changed

+162
-0
lines changed

5 files changed

+162
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/build/
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
A ScanCode CSV output helpful to debug license detection
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/scancode-toolkit for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import csv
10+
import warnings
11+
12+
import saneyaml
13+
14+
from commoncode.cliutils import PluggableCommandLineOption
15+
from commoncode.cliutils import OUTPUT_GROUP
16+
from plugincode.output import output_impl
17+
from plugincode.output import OutputPlugin
18+
19+
from formattedcode import FileOptionType
20+
21+
# Tracing flags
22+
TRACE = False
23+
24+
25+
def logger_debug(*args):
26+
pass
27+
28+
29+
if TRACE:
30+
import sys
31+
import logging
32+
33+
logger = logging.getLogger(__name__)
34+
logging.basicConfig(stream=sys.stdout)
35+
logger.setLevel(logging.DEBUG)
36+
37+
def logger_debug(*args):
38+
return logger.debug(' '.join(isinstance(a, str)
39+
and a or repr(a) for a in args))
40+
41+
42+
@output_impl
43+
class LicenseCsvOutput(OutputPlugin):
44+
45+
options = [
46+
PluggableCommandLineOption(('--license-csv',),
47+
type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
48+
metavar='FILE',
49+
help='Write license scan debug output as CSV to FILE.',
50+
help_group=OUTPUT_GROUP,
51+
sort_order=30),
52+
]
53+
54+
def is_enabled(self, license_csv, **kwargs):
55+
return license_csv
56+
57+
def process_codebase(self, codebase, license_csv, **kwargs):
58+
results = self.get_files(codebase, **kwargs)
59+
write_csv(results=results, output_file=license_csv)
60+
61+
62+
def write_csv(results, output_file):
63+
results = list(results)
64+
65+
headers = dict([
66+
('license', []),
67+
])
68+
69+
rows = list(flatten_scan(results, headers))
70+
71+
ordered_headers = []
72+
for key_group in headers.values():
73+
ordered_headers.extend(key_group)
74+
75+
w = csv.DictWriter(output_file, fieldnames=ordered_headers)
76+
w.writeheader()
77+
78+
for r in rows:
79+
w.writerow(r)
80+
81+
82+
def flatten_scan(scan, headers):
83+
"""
84+
Yield ordered dictionaries of key/values flattening the sequence
85+
data in a single line-separated value and keying always by path,
86+
given a ScanCode `scan` results list. Update the `headers` mapping
87+
sequences with seen keys as a side effect.
88+
"""
89+
seen = set()
90+
91+
def collect_keys(mapping, key_group):
92+
"""Update the headers with new keys."""
93+
keys = mapping.keys()
94+
headers[key_group].extend(k for k in keys if k not in seen)
95+
seen.update(keys)
96+
97+
seen = set()
98+
for scanned_file in scan:
99+
path = scanned_file.pop('path')
100+
101+
# removing any slash at the begening of the path
102+
path = path.lstrip('/')
103+
104+
# use a trailing slash for directories
105+
if scanned_file.get('type') == 'directory':
106+
continue
107+
108+
for licensing in scanned_file.get('licenses', []):
109+
matched_rule = licensing['matched_rule']
110+
lic = dict(
111+
path=path,
112+
score = with_two_decimals(licensing['score']),
113+
start_line = licensing['start_line'],
114+
end_line = licensing['end_line'],
115+
identifier=matched_rule['identifier'],
116+
license_expression=matched_rule['license_expression'],
117+
matcher=matched_rule['matcher'],
118+
rule_length=matched_rule['rule_length'],
119+
matched_length=matched_rule['matched_length'],
120+
match_coverage=with_two_decimals(matched_rule['match_coverage']),
121+
rule_relevance=with_two_decimals(matched_rule['rule_relevance']),
122+
)
123+
values= tuple(lic.items())
124+
if values in seen:
125+
continue
126+
else:
127+
seen.add(values)
128+
collect_keys(lic, 'license')
129+
yield lic
130+
131+
def with_two_decimals(val):
132+
"""
133+
Return a normalized score string with two decimal values
134+
"""
135+
if isinstance(val, (float, int)):
136+
val = '{:.2f}'.format(val)
137+
if not isinstance(val, str):
138+
val = str(val)
139+
return val
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[metadata]
2+
name = scancode-debug-license-csv
3+
version = 0.0.1
4+
license = Apache-2.0
5+
description = A plugin for license detection debug output as CSV
6+
7+
[options]
8+
py_modules =
9+
scancode_debug_license_csv
10+
install_requires =
11+
scancode-toolkit
12+
13+
[options.entry_points]
14+
scancode_output =
15+
license_csv = scancode_debug_license_csv:LicenseCsvOutput
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/env python
2+
3+
import setuptools
4+
5+
if __name__ == "__main__":
6+
setuptools.setup()

0 commit comments

Comments
 (0)