Skip to content

Commit 7f730f7

Browse files
committed
Add test for extra-words and improve detection_log
Reference: #4400 Signed-off-by: Alok Kumar <alokkumarjipura9973@gmail.com>
1 parent a4415e7 commit 7f730f7

File tree

4 files changed

+127
-0
lines changed

4 files changed

+127
-0
lines changed

src/licensedcode/detection.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ class DetectionRule(Enum):
124124
These are logged in LicenseDetection.detection_log for verbosity.
125125
"""
126126
UNKNOWN_MATCH = 'unknown-match'
127+
EXTRA_WORDS = 'extra-words'
127128
LICENSE_CLUES = 'license-clues'
128129
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
129130
FALSE_POSITIVE = 'possible-false-positive'
@@ -1545,6 +1546,13 @@ def get_detected_license_expression(
15451546
# in detections but ideally we should return synthetic unknowns for these
15461547
detection_log.append(DetectionRule.LOW_QUALITY_MATCH_FRAGMENTS.value)
15471548
return detection_log, combined_expression
1549+
1550+
elif analysis == DetectionCategory.EXTRA_WORDS.value:
1551+
if TRACE_ANALYSIS:
1552+
logger_debug(f'analysis {DetectionCategory.EXTRA_WORDS.value}')
1553+
# Apply filtering or handling logic if needed
1554+
matches_for_expression = license_matches
1555+
detection_log.append(DetectionRule.EXTRA_WORDS.value)
15481556

15491557
else:
15501558
if TRACE_ANALYSIS:
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"license_detections": [
3+
{
4+
"identifier": "bsd_new-95249a8d-f533-e7c7-159a-9b6e173cba42",
5+
"license_expression": "bsd-new",
6+
"license_expression_spdx": "BSD-3-Clause",
7+
"detection_count": 1,
8+
"detection_log": [
9+
"extra-words"
10+
],
11+
"reference_matches": [
12+
{
13+
"license_expression": "bsd-new",
14+
"license_expression_spdx": "BSD-3-Clause",
15+
"from_file": "scan-extra-words-3-seq-license/LICENSE",
16+
"start_line": 1,
17+
"end_line": 31,
18+
"matcher": "3-seq",
19+
"score": 93.89,
20+
"matched_length": 215,
21+
"match_coverage": 100.0,
22+
"rule_relevance": 100,
23+
"rule_identifier": "bsd-new_578.RULE",
24+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_578.RULE",
25+
"matched_text": "Software License Agreement (BSD License)\n\nCopyright (c) 2009-2015, Kevin Decker <kpdecker@gmail.com>\n\nAll rights reserved.\n\nRedistribution and use of this software in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above\n copyright notice, this list of conditions and the\n following disclaimer.\n\n* Redistributions in binary form must reproduce the above\n copyright notice, this list of conditions and the\n following disclaimer in the documentation and/or other\n materials provided with the distribution.\n\n* Neither the name of Kevin Decker nor the names of its\n contributors may be used to endorse or promote products\n derived from this software without specific prior\n written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR\nIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\nIN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\nOF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.",
26+
"matched_text_diagnostics": "Software License Agreement (BSD License)\n\n[Copyright] ([c]) [2009]-[2015], [Kevin] [Decker] <[kpdecker]@[gmail].[com]>\n\n[All] [rights] [reserved].\n\nRedistribution and use [of] [this] [software] in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above\n copyright notice, this list of conditions and the\n following disclaimer.\n\n* Redistributions in binary form must reproduce the above\n copyright notice, this list of conditions and the\n following disclaimer in the documentation and/or other\n materials provided with the distribution.\n\n* Neither the name of [Kevin] [Decker] nor the names of its\n contributors may be used to endorse or promote products\n derived from this software without specific prior\n written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR\nIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\nIN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\nOF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
27+
}
28+
]
29+
}
30+
],
31+
"files": [
32+
{
33+
"path": "LICENSE",
34+
"type": "file",
35+
"detected_license_expression": "bsd-new",
36+
"detected_license_expression_spdx": "BSD-3-Clause",
37+
"license_detections": [
38+
{
39+
"license_expression": "bsd-new",
40+
"license_expression_spdx": "BSD-3-Clause",
41+
"matches": [
42+
{
43+
"license_expression": "bsd-new",
44+
"license_expression_spdx": "BSD-3-Clause",
45+
"from_file": "scan-extra-words-3-seq-license/LICENSE",
46+
"start_line": 1,
47+
"end_line": 31,
48+
"matcher": "3-seq",
49+
"score": 93.89,
50+
"matched_length": 215,
51+
"match_coverage": 100.0,
52+
"rule_relevance": 100,
53+
"rule_identifier": "bsd-new_578.RULE",
54+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_578.RULE",
55+
"matched_text": "Software License Agreement (BSD License)\n\nCopyright (c) 2009-2015, Kevin Decker <kpdecker@gmail.com>\n\nAll rights reserved.\n\nRedistribution and use of this software in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above\n copyright notice, this list of conditions and the\n following disclaimer.\n\n* Redistributions in binary form must reproduce the above\n copyright notice, this list of conditions and the\n following disclaimer in the documentation and/or other\n materials provided with the distribution.\n\n* Neither the name of Kevin Decker nor the names of its\n contributors may be used to endorse or promote products\n derived from this software without specific prior\n written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR\nIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\nIN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\nOF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.",
56+
"matched_text_diagnostics": "Software License Agreement (BSD License)\n\n[Copyright] ([c]) [2009]-[2015], [Kevin] [Decker] <[kpdecker]@[gmail].[com]>\n\n[All] [rights] [reserved].\n\nRedistribution and use [of] [this] [software] in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above\n copyright notice, this list of conditions and the\n following disclaimer.\n\n* Redistributions in binary form must reproduce the above\n copyright notice, this list of conditions and the\n following disclaimer in the documentation and/or other\n materials provided with the distribution.\n\n* Neither the name of [Kevin] [Decker] nor the names of its\n contributors may be used to endorse or promote products\n derived from this software without specific prior\n written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR\nIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER\nIN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\nOF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
57+
}
58+
],
59+
"detection_log": [
60+
"extra-words"
61+
],
62+
"identifier": "bsd_new-95249a8d-f533-e7c7-159a-9b6e173cba42"
63+
}
64+
],
65+
"license_clues": [],
66+
"percentage_of_license_text": 92.67,
67+
"scan_errors": []
68+
}
69+
]
70+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
Software License Agreement (BSD License)
2+
3+
Copyright (c) 2009-2015, Kevin Decker <kpdecker@gmail.com>
4+
5+
All rights reserved.
6+
7+
Redistribution and use of this software in source and binary forms, with or without modification,
8+
are permitted provided that the following conditions are met:
9+
10+
* Redistributions of source code must retain the above
11+
copyright notice, this list of conditions and the
12+
following disclaimer.
13+
14+
* Redistributions in binary form must reproduce the above
15+
copyright notice, this list of conditions and the
16+
following disclaimer in the documentation and/or other
17+
materials provided with the distribution.
18+
19+
* Neither the name of Kevin Decker nor the names of its
20+
contributors may be used to endorse or promote products
21+
derived from this software without specific prior
22+
written permission.
23+
24+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
25+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
26+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
27+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30+
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31+
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

tests/licensedcode/test_plugin_license_detection.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,24 @@ def test_license_match_unknown_clues_is_not_in_expression():
9393
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)
9494

9595

96+
def test_license_match_extra_words_3_seq():
97+
test_dir = test_env.get_test_loc('plugin_license/extra-words/scan-extra-words-3-seq-license/', copy=True)
98+
result_file = test_env.get_temp_file('json')
99+
args = [
100+
'--license',
101+
'--license-text',
102+
'--license-text-diagnostics',
103+
'--license-diagnostics',
104+
'--strip-root',
105+
'--verbose',
106+
'--json', result_file,
107+
test_dir,
108+
]
109+
run_scan_click(args)
110+
test_loc = test_env.get_test_loc('plugin_license/extra-words/scan-extra-words-3-seq-license.expected.json')
111+
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)
112+
113+
96114
def test_license_match_unknown_license_intro_eclipse_foundation():
97115
test_dir = test_env.get_test_loc('plugin_license/unknown_intro/scan-unknown-intro-eclipse-foundation/', copy=True)
98116
result_file = test_env.get_temp_file('json')

0 commit comments

Comments
 (0)