Skip to content

Commit 3a97066

Browse files
Add misc license detection improvements
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 64bf18c commit 3a97066

File tree

14 files changed

+390
-120
lines changed

14 files changed

+390
-120
lines changed

src/licensedcode/data/rules/lead-in_unknown_43.RULE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
license_expression: unknown-license-reference
3-
is_license_tag: yes
3+
is_license_clue: yes
44
relevance: 60
55
notes: Creative commons tag seen in RDF or XML documents
66
---

src/licensedcode/detection.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from commoncode.resource import clean_path
2525
from commoncode.text import python_safe_name
26+
from commoncode.fileutils import as_posixpath
2627
from licensedcode.cache import build_spdx_license_expression
2728
from licensedcode.cache import get_cache
2829
from licensedcode.cache import get_index
@@ -130,6 +131,7 @@ class DetectionRule(Enum):
130131
EXTRA_WORDS = 'extra-words'
131132
LICENSE_CLUES = 'license-clues'
132133
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
134+
IMPERFECT_COVERAGE = 'imperfect-match-coverage'
133135
FALSE_POSITIVE = 'possible-false-positive'
134136
NOT_LICENSE_CLUES = 'not-license-clues-as-more-detections-present'
135137
UNKNOWN_REFERENCE_TO_LOCAL_FILE = 'unknown-reference-to-local-file'
@@ -1139,10 +1141,10 @@ def has_extra_words(license_matches):
11391141

11401142
def has_low_rule_relevance(license_matches):
11411143
"""
1142-
Return True if any on the matches in ``license_matches`` List of LicenseMatch
1144+
Return True if all on the matches in ``license_matches`` List of LicenseMatch
11431145
objects has a match with low score because of low rule relevance.
11441146
"""
1145-
return any(
1147+
return all(
11461148
license_match.rule.relevance < LOW_RELEVANCE_THRESHOLD
11471149
for license_match in license_matches
11481150
)
@@ -1238,11 +1240,16 @@ def has_unknown_matches(license_matches):
12381240

12391241
def is_unknown_intro(license_match):
12401242
"""
1241-
Return True if the LicenseMatch is an unknown license intro.
1243+
Return True if the LicenseMatch is unknown and can be considered
1244+
as a license intro to other license matches.
1245+
I.e. this is not an unknown when followed by other proper matches.
12421246
"""
12431247
return (
12441248
license_match.rule.has_unknown and
1245-
license_match.rule.is_license_intro
1249+
(
1250+
license_match.rule.is_license_intro or license_match.rule.is_license_clue or
1251+
license_match.rule.license_expression == 'free-unknown'
1252+
)
12461253
)
12471254

12481255

@@ -1338,7 +1345,10 @@ def is_license_intro(license_match):
13381345
from licensedcode.match_aho import MATCH_AHO_EXACT
13391346

13401347
return (
1341-
license_match.rule.is_license_intro
1348+
(
1349+
license_match.rule.is_license_intro or license_match.rule.is_license_clue or
1350+
license_match.rule.license_expression == 'free-unknown'
1351+
)
13421352
and (
13431353
license_match.matcher == MATCH_AHO_EXACT
13441354
or license_match.coverage() == 100
@@ -1554,10 +1564,16 @@ def get_detected_license_expression(
15541564
elif analysis == DetectionCategory.EXTRA_WORDS.value:
15551565
if TRACE_ANALYSIS:
15561566
logger_debug(f'analysis {DetectionCategory.EXTRA_WORDS.value}')
1557-
# Apply filtering or handling logic if needed
1567+
# TODO: Fix score if extra words allowed in rules
15581568
matches_for_expression = license_matches
15591569
detection_log.append(DetectionRule.EXTRA_WORDS.value)
15601570

1571+
elif analysis == DetectionCategory.IMPERFECT_COVERAGE.value:
1572+
if TRACE_ANALYSIS:
1573+
logger_debug(f'analysis {DetectionCategory.IMPERFECT_COVERAGE.value}')
1574+
matches_for_expression = license_matches
1575+
detection_log.append(DetectionRule.IMPERFECT_COVERAGE.value)
1576+
15611577
else:
15621578
if TRACE_ANALYSIS:
15631579
logger_debug(f'analysis not-combined')
@@ -1903,7 +1919,11 @@ def find_referenced_resource_from_package(referenced_filename, resource, codebas
19031919

19041920
datafile_paths = datafile_paths_by_package_uid.get(package_uid)
19051921
for path in datafile_paths:
1906-
datafile_path = posixpath.join(root_path, path)
1922+
# support strip_root and normal cases
1923+
if not as_posixpath(path).startswith(f"{as_posixpath(root_path)}/"):
1924+
datafile_path = posixpath.join(root_path, path)
1925+
else:
1926+
datafile_path = path
19071927
datafile_resource = codebase.get_resource(path=datafile_path)
19081928
if not datafile_resource or not datafile_resource.parent_path():
19091929
continue
@@ -1941,8 +1961,6 @@ def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
19411961
return resource
19421962

19431963
# Also look at codebase root for referenced file
1944-
# TODO: look at project root identified by key-files
1945-
# instead of codebase scan root
19461964
root_path = codebase.root.path
19471965
path = posixpath.join(root_path, referenced_filename)
19481966
resource = codebase.get_resource(path=path)

tests/formattedcode/data/common/manifests-expected.yaml

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ headers:
2929
system_environment:
3030
operating_system: linux
3131
cpu_architecture: 64
32-
platform: Linux-6.8.0-52-generic-x86_64-with-glibc2.35
33-
platform_version: '#53~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Jan 15 19:18:46 UTC 2'
34-
python_version: 3.12.9 (main, Feb 20 2025, 19:13:20) [GCC 11.4.0]
32+
platform: Linux-5.15.0-141-generic-x86_64-with-glibc2.35
33+
platform_version: '#151-Ubuntu SMP Sun May 18 21:35:19 UTC 2025'
34+
python_version: 3.10.12 (main, Feb 4 2025, 14:57:36) [GCC 11.4.0]
3535
spdx_license_list_version: '3.26'
3636
files_count: 4
3737
summary:
@@ -1882,13 +1882,14 @@ files:
18821882
is_media: no
18831883
is_source: no
18841884
is_script: no
1885-
package_data: []
1886-
for_packages: []
18871885
is_legal: no
18881886
is_manifest: no
18891887
is_readme: no
18901888
is_top_level: yes
18911889
is_key_file: no
1890+
is_community: no
1891+
package_data: []
1892+
for_packages: []
18921893
detected_license_expression:
18931894
detected_license_expression_spdx:
18941895
license_detections: []
@@ -1921,14 +1922,15 @@ files:
19211922
is_media: no
19221923
is_source: no
19231924
is_script: no
1924-
package_data: []
1925-
for_packages:
1926-
- pkg:maven/javax.persistence/persistence-api@1.0?uuid=fixed-uid-done-for-testing-5642512d1758
19271925
is_legal: no
19281926
is_manifest: no
19291927
is_readme: no
19301928
is_top_level: yes
19311929
is_key_file: no
1930+
is_community: no
1931+
package_data: []
1932+
for_packages:
1933+
- pkg:maven/javax.persistence/persistence-api@1.0?uuid=fixed-uid-done-for-testing-5642512d1758
19321934
detected_license_expression:
19331935
detected_license_expression_spdx:
19341936
license_detections: []
@@ -1961,6 +1963,12 @@ files:
19611963
is_media: no
19621964
is_source: no
19631965
is_script: no
1966+
is_legal: no
1967+
is_manifest: no
1968+
is_readme: no
1969+
is_top_level: no
1970+
is_key_file: no
1971+
is_community: no
19641972
package_data:
19651973
- type: maven
19661974
namespace: javax.persistence
@@ -2032,11 +2040,6 @@ files:
20322040
purl: pkg:maven/javax.persistence/persistence-api@1.0
20332041
for_packages:
20342042
- pkg:maven/javax.persistence/persistence-api@1.0?uuid=fixed-uid-done-for-testing-5642512d1758
2035-
is_legal: no
2036-
is_manifest: no
2037-
is_readme: no
2038-
is_top_level: no
2039-
is_key_file: no
20402043
detected_license_expression: cddl-1.0
20412044
detected_license_expression_spdx: CDDL-1.0
20422045
license_detections:
@@ -2096,13 +2099,14 @@ files:
20962099
is_media: no
20972100
is_source: no
20982101
is_script: no
2099-
package_data: []
2100-
for_packages: []
21012102
is_legal: no
21022103
is_manifest: no
21032104
is_readme: no
21042105
is_top_level: yes
21052106
is_key_file: no
2107+
is_community: no
2108+
package_data: []
2109+
for_packages: []
21062110
detected_license_expression:
21072111
detected_license_expression_spdx:
21082112
license_detections: []
@@ -2135,6 +2139,12 @@ files:
21352139
is_media: no
21362140
is_source: no
21372141
is_script: no
2142+
is_legal: no
2143+
is_manifest: no
2144+
is_readme: no
2145+
is_top_level: no
2146+
is_key_file: no
2147+
is_community: no
21382148
package_data:
21392149
- type: npm
21402150
namespace:
@@ -2315,11 +2325,6 @@ files:
23152325
purl: pkg:npm/grunt-esvm@3.2.8
23162326
for_packages:
23172327
- pkg:npm/grunt-esvm@3.2.8?uuid=fixed-uid-done-for-testing-5642512d1758
2318-
is_legal: no
2319-
is_manifest: no
2320-
is_readme: no
2321-
is_top_level: no
2322-
is_key_file: no
23232328
detected_license_expression: apache-2.0
23242329
detected_license_expression_spdx: Apache-2.0
23252330
license_detections:
@@ -2407,13 +2412,14 @@ files:
24072412
is_media: no
24082413
is_source: no
24092414
is_script: no
2410-
package_data: []
2411-
for_packages: []
24122415
is_legal: no
24132416
is_manifest: no
24142417
is_readme: no
24152418
is_top_level: yes
24162419
is_key_file: no
2420+
is_community: no
2421+
package_data: []
2422+
for_packages: []
24172423
detected_license_expression:
24182424
detected_license_expression_spdx:
24192425
license_detections: []
@@ -2446,6 +2452,12 @@ files:
24462452
is_media: no
24472453
is_source: no
24482454
is_script: no
2455+
is_legal: no
2456+
is_manifest: no
2457+
is_readme: no
2458+
is_top_level: no
2459+
is_key_file: no
2460+
is_community: no
24492461
package_data:
24502462
- type: npm
24512463
namespace:
@@ -2559,11 +2571,6 @@ files:
25592571
purl: pkg:npm/angular-compare-validator@0.1.1
25602572
for_packages:
25612573
- pkg:npm/angular-compare-validator@0.1.1?uuid=fixed-uid-done-for-testing-5642512d1758
2562-
is_legal: no
2563-
is_manifest: no
2564-
is_readme: no
2565-
is_top_level: no
2566-
is_key_file: no
25672574
detected_license_expression: mit
25682575
detected_license_expression_spdx: MIT
25692576
license_detections:
@@ -2628,13 +2635,14 @@ files:
26282635
is_media: no
26292636
is_source: no
26302637
is_script: no
2631-
package_data: []
2632-
for_packages: []
26332638
is_legal: no
26342639
is_manifest: no
26352640
is_readme: no
26362641
is_top_level: yes
26372642
is_key_file: no
2643+
is_community: no
2644+
package_data: []
2645+
for_packages: []
26382646
detected_license_expression:
26392647
detected_license_expression_spdx:
26402648
license_detections: []
@@ -2667,6 +2675,12 @@ files:
26672675
is_media: no
26682676
is_source: yes
26692677
is_script: yes
2678+
is_legal: no
2679+
is_manifest: no
2680+
is_readme: no
2681+
is_top_level: no
2682+
is_key_file: no
2683+
is_community: no
26702684
package_data:
26712685
- type: pypi
26722686
namespace:
@@ -2863,11 +2877,6 @@ files:
28632877
purl: pkg:pypi/bluepyopt
28642878
for_packages:
28652879
- pkg:pypi/bluepyopt?uuid=fixed-uid-done-for-testing-5642512d1758
2866-
is_legal: no
2867-
is_manifest: no
2868-
is_readme: no
2869-
is_top_level: no
2870-
is_key_file: no
28712880
detected_license_expression: lgpl-3.0
28722881
detected_license_expression_spdx: LGPL-3.0-only
28732882
license_detections:

0 commit comments

Comments
 (0)