Skip to content

Commit 820d7a7

Browse files
authored
Introduce "License Clarity" thresholds mechanism (#1689)
Signed-off-by: NucleonGodX <racerpro41@gmail.com>
1 parent 16d3f55 commit 820d7a7

File tree

3 files changed

+331
-0
lines changed

3 files changed

+331
-0
lines changed

scanpipe/pipes/license_clarity.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
"""
24+
License Clarity Thresholds Management
25+
26+
This module provides an independent mechanism to read, validate, and evaluate
27+
license clarity score thresholds from policy files. Unlike license policies
28+
which are applied during scan processing, clarity thresholds are evaluated
29+
post-scan during summary generation.
30+
31+
The clarity thresholds system uses a simple key-value mapping where:
32+
- Keys are integer threshold values (minimum scores)
33+
- Values are compliance alert levels ('ok', 'warning', 'error')
34+
35+
Example policies.yml structure:
36+
37+
license_clarity_thresholds:
38+
80: ok # Scores >= 80 get 'ok' alert
39+
50: warning # Scores 50-79 get 'warning' alert
40+
"""
41+
42+
from pathlib import Path
43+
44+
from django.core.exceptions import ValidationError
45+
46+
import saneyaml
47+
48+
49+
def load_yaml_content(yaml_content):
50+
"""Load and parse YAML content into a Python dictionary."""
51+
try:
52+
return saneyaml.load(yaml_content)
53+
except saneyaml.YAMLError as e:
54+
raise ValidationError(f"Policies file format error: {e}")
55+
56+
57+
class ClarityThresholdsPolicy:
58+
"""
59+
Manages clarity score thresholds and compliance evaluation.
60+
61+
This class reads clarity thresholds from a dictionary, validates them
62+
against threshold configurations and determines compliance alerts based on
63+
clarity scores.
64+
"""
65+
66+
def __init__(self, threshold_dict):
67+
"""Initialize with validated threshold dictionary."""
68+
self.thresholds = self.validate_thresholds(threshold_dict)
69+
70+
@staticmethod
71+
def validate_thresholds(threshold_dict):
72+
if not isinstance(threshold_dict, dict):
73+
raise ValidationError(
74+
"The `license_clarity_thresholds` must be a dictionary"
75+
)
76+
validated = {}
77+
seen = set()
78+
for key, value in threshold_dict.items():
79+
try:
80+
threshold = int(key)
81+
except (ValueError, TypeError):
82+
raise ValidationError(f"Threshold keys must be integers, got: {key}")
83+
if threshold in seen:
84+
raise ValidationError(f"Duplicate threshold key: {threshold}")
85+
seen.add(threshold)
86+
if value not in ["ok", "warning", "error"]:
87+
raise ValidationError(
88+
f"Compliance alert must be one of 'ok', 'warning', 'error', "
89+
f"got: {value}"
90+
)
91+
validated[threshold] = value
92+
sorted_keys = sorted(validated.keys(), reverse=True)
93+
if list(validated.keys()) != sorted_keys:
94+
raise ValidationError("Thresholds must be strictly descending")
95+
return validated
96+
97+
def get_alert_for_score(self, score):
98+
"""
99+
Determine compliance alert level for a given clarity score
100+
101+
Returns:
102+
str: Compliance alert level ('ok', 'warning', 'error')
103+
104+
"""
105+
if score is None:
106+
return "error"
107+
108+
# Find the highest threshold that the score meets or exceeds
109+
applicable_thresholds = [t for t in self.thresholds if score >= t]
110+
if not applicable_thresholds:
111+
return "error"
112+
113+
max_threshold = max(applicable_thresholds)
114+
return self.thresholds[max_threshold]
115+
116+
def get_thresholds_summary(self):
117+
"""
118+
Get a summary of configured thresholds for reporting
119+
120+
Returns:
121+
dict: Summary of thresholds and their alert levels
122+
123+
"""
124+
return dict(sorted(self.thresholds.items(), reverse=True))
125+
126+
127+
def load_clarity_thresholds_from_yaml(yaml_content):
128+
"""
129+
Load clarity thresholds from YAML content.
130+
131+
Returns:
132+
ClarityThresholdsPolicy: Configured policy object
133+
134+
"""
135+
data = load_yaml_content(yaml_content)
136+
137+
if not isinstance(data, dict):
138+
raise ValidationError("YAML content must be a dictionary.")
139+
140+
if "license_clarity_thresholds" not in data:
141+
raise ValidationError(
142+
"Missing 'license_clarity_thresholds' key in policies file."
143+
)
144+
145+
return ClarityThresholdsPolicy(data["license_clarity_thresholds"])
146+
147+
148+
def load_clarity_thresholds_from_file(file_path):
149+
"""
150+
Load clarity thresholds from a YAML file.
151+
152+
Returns:
153+
ClarityThresholdsPolicy: Configured policy object or None if file not found
154+
155+
"""
156+
file_path = Path(file_path)
157+
158+
if not file_path.exists():
159+
return None
160+
161+
try:
162+
yaml_content = file_path.read_text(encoding="utf-8")
163+
return load_clarity_thresholds_from_yaml(yaml_content)
164+
except (OSError, UnicodeDecodeError) as e:
165+
raise ValidationError(f"Error reading file {file_path}: {e}")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
license_clarity_thresholds:
2+
90: ok
3+
70: warning
4+
40: error
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
from pathlib import Path
24+
25+
from django.core.exceptions import ValidationError
26+
from django.test import TestCase
27+
28+
from scanpipe.pipes.license_clarity import ClarityThresholdsPolicy
29+
from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_file
30+
from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml
31+
32+
33+
class ClarityThresholdsPolicyTest(TestCase):
34+
data = Path(__file__).parent.parent / "data"
35+
"""Test ClarityThresholdsPolicy class functionality."""
36+
37+
def test_valid_thresholds_initialization(self):
38+
thresholds = {80: "ok", 50: "warning", 20: "error"}
39+
policy = ClarityThresholdsPolicy(thresholds)
40+
self.assertEqual(policy.thresholds, thresholds)
41+
42+
def test_string_keys_converted_to_integers(self):
43+
thresholds = {"80": "ok", "50": "warning"}
44+
policy = ClarityThresholdsPolicy(thresholds)
45+
expected = {80: "ok", 50: "warning"}
46+
self.assertEqual(policy.thresholds, expected)
47+
48+
def test_invalid_threshold_key_raises_error(self):
49+
with self.assertRaises(ValidationError) as cm:
50+
ClarityThresholdsPolicy({"invalid": "ok"})
51+
self.assertIn("must be integers", str(cm.exception))
52+
53+
def test_invalid_alert_value_raises_error(self):
54+
with self.assertRaises(ValidationError) as cm:
55+
ClarityThresholdsPolicy({80: "invalid"})
56+
self.assertIn("must be one of 'ok', 'warning', 'error'", str(cm.exception))
57+
58+
def test_non_dict_input_raises_error(self):
59+
with self.assertRaises(ValidationError) as cm:
60+
ClarityThresholdsPolicy([80, 50])
61+
self.assertIn("must be a dictionary", str(cm.exception))
62+
63+
def test_duplicate_threshold_keys_raise_error(self):
64+
with self.assertRaises(ValidationError) as cm:
65+
ClarityThresholdsPolicy({80: "ok", "80": "warning"})
66+
self.assertIn("Duplicate threshold key", str(cm.exception))
67+
68+
def test_overlapping_thresholds_wrong_order(self):
69+
with self.assertRaises(ValidationError) as cm:
70+
ClarityThresholdsPolicy({70: "ok", 80: "warning"})
71+
self.assertIn("Thresholds must be strictly descending", str(cm.exception))
72+
73+
def test_float_threshold_keys(self):
74+
thresholds = {80.5: "ok", 50.9: "warning"}
75+
policy = ClarityThresholdsPolicy(thresholds)
76+
expected = {80: "ok", 50: "warning"}
77+
self.assertEqual(policy.thresholds, expected)
78+
79+
def test_negative_threshold_values(self):
80+
thresholds = {50: "ok", 0: "warning", -10: "error"}
81+
policy = ClarityThresholdsPolicy(thresholds)
82+
self.assertEqual(policy.get_alert_for_score(60), "ok")
83+
self.assertEqual(policy.get_alert_for_score(25), "warning")
84+
self.assertEqual(policy.get_alert_for_score(-5), "error")
85+
self.assertEqual(policy.get_alert_for_score(-20), "error")
86+
87+
def test_empty_thresholds_dict(self):
88+
policy = ClarityThresholdsPolicy({})
89+
self.assertEqual(policy.get_alert_for_score(100), "error")
90+
self.assertEqual(policy.get_alert_for_score(50), "error")
91+
self.assertEqual(policy.get_alert_for_score(0), "error")
92+
self.assertEqual(policy.get_alert_for_score(None), "error")
93+
94+
def test_very_high_threshold_values(self):
95+
thresholds = {150: "ok", 100: "warning"}
96+
policy = ClarityThresholdsPolicy(thresholds)
97+
self.assertEqual(policy.get_alert_for_score(100), "warning")
98+
self.assertEqual(policy.get_alert_for_score(90), "error")
99+
self.assertEqual(policy.get_alert_for_score(50), "error")
100+
self.assertEqual(policy.get_alert_for_score(99), "error")
101+
102+
# Policy logic via YAML string (mock policies.yml content)
103+
def test_yaml_string_ok_and_warning(self):
104+
yaml_content = """
105+
license_clarity_thresholds:
106+
90: ok
107+
30: warning
108+
"""
109+
policy = load_clarity_thresholds_from_yaml(yaml_content)
110+
self.assertEqual(policy.get_alert_for_score(95), "ok")
111+
self.assertEqual(policy.get_alert_for_score(60), "warning")
112+
self.assertEqual(policy.get_alert_for_score(20), "error")
113+
114+
def test_yaml_string_single_threshold(self):
115+
yaml_content = """
116+
license_clarity_thresholds:
117+
80: ok
118+
"""
119+
policy = load_clarity_thresholds_from_yaml(yaml_content)
120+
self.assertEqual(policy.get_alert_for_score(90), "ok")
121+
self.assertEqual(policy.get_alert_for_score(79), "error")
122+
123+
def test_yaml_string_invalid_alert(self):
124+
yaml_content = """
125+
license_clarity_thresholds:
126+
80: great
127+
"""
128+
with self.assertRaises(ValidationError):
129+
load_clarity_thresholds_from_yaml(yaml_content)
130+
131+
def test_yaml_string_invalid_key(self):
132+
yaml_content = """
133+
license_clarity_thresholds:
134+
eighty: ok
135+
"""
136+
with self.assertRaises(ValidationError):
137+
load_clarity_thresholds_from_yaml(yaml_content)
138+
139+
def test_yaml_string_missing_key(self):
140+
yaml_content = """
141+
license_policies:
142+
- license_key: mit
143+
"""
144+
with self.assertRaises(ValidationError):
145+
load_clarity_thresholds_from_yaml(yaml_content)
146+
147+
def test_yaml_string_invalid_yaml(self):
148+
yaml_content = "license_clarity_thresholds: [80, 50"
149+
with self.assertRaises(ValidationError):
150+
load_clarity_thresholds_from_yaml(yaml_content)
151+
152+
def test_load_from_existing_file(self):
153+
test_file = self.data / "license_clarity" / "sample_thresholds.yml"
154+
policy = load_clarity_thresholds_from_file(test_file)
155+
self.assertIsNotNone(policy)
156+
self.assertEqual(policy.get_alert_for_score(95), "ok")
157+
self.assertEqual(policy.get_alert_for_score(75), "warning")
158+
self.assertEqual(policy.get_alert_for_score(50), "error")
159+
160+
def test_load_from_nonexistent_file(self):
161+
policy = load_clarity_thresholds_from_file("/nonexistent/file.yml")
162+
self.assertIsNone(policy)

0 commit comments

Comments
 (0)