Skip to content

Commit 36a5bc2

Browse files
Merge pull request #4402 from alok1304/improve-detection-log-for-extra-words
Display `extra-words` in `detection_log` if present
2 parents a4415e7 + 7eb8db7 commit 36a5bc2

File tree

7 files changed

+265
-9
lines changed

7 files changed

+265
-9
lines changed

src/licensedcode/detection.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ class DetectionRule(Enum):
124124
These are logged in LicenseDetection.detection_log for verbosity.
125125
"""
126126
UNKNOWN_MATCH = 'unknown-match'
127+
EXTRA_WORDS = 'extra-words'
127128
LICENSE_CLUES = 'license-clues'
128129
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
129130
FALSE_POSITIVE = 'possible-false-positive'
@@ -1056,7 +1057,8 @@ def is_correct_detection_non_unknown(license_matches):
10561057
return (
10571058
is_correct_detection(license_matches)
10581059
and not has_unknown_matches(license_matches)
1059-
)
1060+
and not has_extra_words(license_matches)
1061+
)
10601062

10611063

10621064
def is_correct_detection(license_matches):
@@ -1380,7 +1382,7 @@ def has_references_to_local_files(license_matches):
13801382
Return True if any of the matched Rule for the ``license_matches`` has a
13811383
non empty `referenced_filenames`, otherwise return False.
13821384
"""
1383-
return any(
1385+
return not has_extra_words(license_matches) and any(
13841386
bool(match.rule.referenced_filenames)
13851387
for match in license_matches
13861388
)
@@ -1545,6 +1547,13 @@ def get_detected_license_expression(
15451547
# in detections but ideally we should return synthetic unknowns for these
15461548
detection_log.append(DetectionRule.LOW_QUALITY_MATCH_FRAGMENTS.value)
15471549
return detection_log, combined_expression
1550+
1551+
elif analysis == DetectionCategory.EXTRA_WORDS.value:
1552+
if TRACE_ANALYSIS:
1553+
logger_debug(f'analysis {DetectionCategory.EXTRA_WORDS.value}')
1554+
# Apply filtering or handling logic if needed
1555+
matches_for_expression = license_matches
1556+
detection_log.append(DetectionRule.EXTRA_WORDS.value)
15481557

15491558
else:
15501559
if TRACE_ANALYSIS:
@@ -1726,7 +1735,7 @@ def analyze_detection(license_matches, package_license=False):
17261735
):
17271736
return DetectionCategory.LICENSE_CLUES.value
17281737

1729-
# Case where all matches have `matcher` as `1-hash` or `4-spdx-id`
1738+
# Case where all matches have `matcher` as `1-hash` or `1-spdx-id`
17301739
elif is_correct_detection_non_unknown(license_matches=license_matches):
17311740
return DetectionCategory.PERFECT_DETECTION.value
17321741

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"license_detections": [
3+
{
4+
"identifier": "bsd_new-fbfc5955-0c63-4c98-2ce9-08e1e1796f50",
5+
"license_expression": "bsd-new",
6+
"license_expression_spdx": "BSD-3-Clause",
7+
"detection_count": 1,
8+
"detection_log": [
9+
"extra-words"
10+
],
11+
"reference_matches": [
12+
{
13+
"license_expression": "bsd-new",
14+
"license_expression_spdx": "BSD-3-Clause",
15+
"from_file": "scan-extra-words-2-aho-license/LICENSE",
16+
"start_line": 4,
17+
"end_line": 27,
18+
"matcher": "2-aho",
19+
"score": 99.53,
20+
"matched_length": 210,
21+
"match_coverage": 100.0,
22+
"rule_relevance": 100,
23+
"rule_identifier": "bsd-new_158.RULE",
24+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_158.RULE",
25+
"matched_text": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of filesize nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.",
26+
"matched_text_diagnostics": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of [filesize] nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
27+
}
28+
]
29+
}
30+
],
31+
"files": [
32+
{
33+
"path": "LICENSE",
34+
"type": "file",
35+
"detected_license_expression": "bsd-new",
36+
"detected_license_expression_spdx": "BSD-3-Clause",
37+
"license_detections": [
38+
{
39+
"license_expression": "bsd-new",
40+
"license_expression_spdx": "BSD-3-Clause",
41+
"matches": [
42+
{
43+
"license_expression": "bsd-new",
44+
"license_expression_spdx": "BSD-3-Clause",
45+
"from_file": "scan-extra-words-2-aho-license/LICENSE",
46+
"start_line": 4,
47+
"end_line": 27,
48+
"matcher": "2-aho",
49+
"score": 99.53,
50+
"matched_length": 210,
51+
"match_coverage": 100.0,
52+
"rule_relevance": 100,
53+
"rule_identifier": "bsd-new_158.RULE",
54+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_158.RULE",
55+
"matched_text": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of filesize nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.",
56+
"matched_text_diagnostics": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of [filesize] nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
57+
}
58+
],
59+
"detection_log": [
60+
"extra-words"
61+
],
62+
"identifier": "bsd_new-fbfc5955-0c63-4c98-2ce9-08e1e1796f50"
63+
}
64+
],
65+
"license_clues": [],
66+
"percentage_of_license_text": 95.89,
67+
"scan_errors": []
68+
}
69+
]
70+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Copyright (c) 2022, Jason Mulligan
2+
All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are met:
6+
7+
* Redistributions of source code must retain the above copyright notice, this
8+
list of conditions and the following disclaimer.
9+
10+
* Redistributions in binary form must reproduce the above copyright notice,
11+
this list of conditions and the following disclaimer in the documentation
12+
and/or other materials provided with the distribution.
13+
14+
* Neither the name of filesize nor the names of its
15+
contributors may be used to endorse or promote products derived from
16+
this software without specific prior written permission.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+

0 commit comments

Comments
 (0)