Introduce "License Clarity" thresholds mechanism (#1689)

NucleonGodX · web-flow · commit 820d7a7896c7 · 2025-06-30T13:16:24.000+04:00
Signed-off-by: NucleonGodX &lt;racerpro41@gmail.com&gt;
diff --git a/scanpipe/pipes/license_clarity.py b/scanpipe/pipes/license_clarity.py
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+"""
+License Clarity Thresholds Management
+
+This module provides an independent mechanism to read, validate, and evaluate
+license clarity score thresholds from policy files. Unlike license policies
+which are applied during scan processing, clarity thresholds are evaluated
+post-scan during summary generation.
+
+The clarity thresholds system uses a simple key-value mapping where:
+- Keys are integer threshold values (minimum scores)
+- Values are compliance alert levels ('ok', 'warning', 'error')
+
+Example policies.yml structure:
+
+license_clarity_thresholds:
+  80: ok # Scores >= 80 get 'ok' alert
+  50: warning # Scores 50-79 get 'warning' alert
+"""
+
+from pathlib import Path
+
+from django.core.exceptions import ValidationError
+
+import saneyaml
+
+
+def load_yaml_content(yaml_content):
+    """Load and parse YAML content into a Python dictionary."""
+    try:
+        return saneyaml.load(yaml_content)
+    except saneyaml.YAMLError as e:
+        raise ValidationError(f"Policies file format error: {e}")
+
+
+class ClarityThresholdsPolicy:
+    """
+    Manages clarity score thresholds and compliance evaluation.
+
+    This class reads clarity thresholds from a dictionary, validates them
+    against threshold configurations and determines compliance alerts based on
+    clarity scores.
+    """
+
+    def __init__(self, threshold_dict):
+        """Initialize with validated threshold dictionary."""
+        self.thresholds = self.validate_thresholds(threshold_dict)
+
+    @staticmethod
+    def validate_thresholds(threshold_dict):
+        if not isinstance(threshold_dict, dict):
+            raise ValidationError(
+                "The `license_clarity_thresholds` must be a dictionary"
+            )
+        validated = {}
+        seen = set()
+        for key, value in threshold_dict.items():
+            try:
+                threshold = int(key)
+            except (ValueError, TypeError):
+                raise ValidationError(f"Threshold keys must be integers, got: {key}")
+            if threshold in seen:
+                raise ValidationError(f"Duplicate threshold key: {threshold}")
+            seen.add(threshold)
+            if value not in ["ok", "warning", "error"]:
+                raise ValidationError(
+                    f"Compliance alert must be one of 'ok', 'warning', 'error', "
+                    f"got: {value}"
+                )
+            validated[threshold] = value
+        sorted_keys = sorted(validated.keys(), reverse=True)
+        if list(validated.keys()) != sorted_keys:
+            raise ValidationError("Thresholds must be strictly descending")
+        return validated
+
+    def get_alert_for_score(self, score):
+        """
+        Determine compliance alert level for a given clarity score
+
+        Returns:
+            str: Compliance alert level ('ok', 'warning', 'error')
+
+        """
+        if score is None:
+            return "error"
+
+        # Find the highest threshold that the score meets or exceeds
+        applicable_thresholds = [t for t in self.thresholds if score >= t]
+        if not applicable_thresholds:
+            return "error"
+
+        max_threshold = max(applicable_thresholds)
+        return self.thresholds[max_threshold]
+
+    def get_thresholds_summary(self):
+        """
+        Get a summary of configured thresholds for reporting
+
+        Returns:
+            dict: Summary of thresholds and their alert levels
+
+        """
+        return dict(sorted(self.thresholds.items(), reverse=True))
+
+
+def load_clarity_thresholds_from_yaml(yaml_content):
+    """
+    Load clarity thresholds from YAML content.
+
+    Returns:
+        ClarityThresholdsPolicy: Configured policy object
+
+    """
+    data = load_yaml_content(yaml_content)
+
+    if not isinstance(data, dict):
+        raise ValidationError("YAML content must be a dictionary.")
+
+    if "license_clarity_thresholds" not in data:
+        raise ValidationError(
+            "Missing 'license_clarity_thresholds' key in policies file."
+        )
+
+    return ClarityThresholdsPolicy(data["license_clarity_thresholds"])
+
+
+def load_clarity_thresholds_from_file(file_path):
+    """
+    Load clarity thresholds from a YAML file.
+
+    Returns:
+        ClarityThresholdsPolicy: Configured policy object or None if file not found
+
+    """
+    file_path = Path(file_path)
+
+    if not file_path.exists():
+        return None
+
+    try:
+        yaml_content = file_path.read_text(encoding="utf-8")
+        return load_clarity_thresholds_from_yaml(yaml_content)
+    except (OSError, UnicodeDecodeError) as e:
+        raise ValidationError(f"Error reading file {file_path}: {e}")
diff --git a/scanpipe/tests/data/license_clarity/sample_thresholds.yml b/scanpipe/tests/data/license_clarity/sample_thresholds.yml
@@ -0,0 +1,4 @@
+license_clarity_thresholds:
+  90: ok
+  70: warning
+  40: error
diff --git a/scanpipe/tests/pipes/test_license_clarity.py b/scanpipe/tests/pipes/test_license_clarity.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+from pathlib import Path
+
+from django.core.exceptions import ValidationError
+from django.test import TestCase
+
+from scanpipe.pipes.license_clarity import ClarityThresholdsPolicy
+from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_file
+from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml
+
+
+class ClarityThresholdsPolicyTest(TestCase):
+    data = Path(__file__).parent.parent / "data"
+    """Test ClarityThresholdsPolicy class functionality."""
+
+    def test_valid_thresholds_initialization(self):
+        thresholds = {80: "ok", 50: "warning", 20: "error"}
+        policy = ClarityThresholdsPolicy(thresholds)
+        self.assertEqual(policy.thresholds, thresholds)
+
+    def test_string_keys_converted_to_integers(self):
+        thresholds = {"80": "ok", "50": "warning"}
+        policy = ClarityThresholdsPolicy(thresholds)
+        expected = {80: "ok", 50: "warning"}
+        self.assertEqual(policy.thresholds, expected)
+
+    def test_invalid_threshold_key_raises_error(self):
+        with self.assertRaises(ValidationError) as cm:
+            ClarityThresholdsPolicy({"invalid": "ok"})
+        self.assertIn("must be integers", str(cm.exception))
+
+    def test_invalid_alert_value_raises_error(self):
+        with self.assertRaises(ValidationError) as cm:
+            ClarityThresholdsPolicy({80: "invalid"})
+        self.assertIn("must be one of 'ok', 'warning', 'error'", str(cm.exception))
+
+    def test_non_dict_input_raises_error(self):
+        with self.assertRaises(ValidationError) as cm:
+            ClarityThresholdsPolicy([80, 50])
+        self.assertIn("must be a dictionary", str(cm.exception))
+
+    def test_duplicate_threshold_keys_raise_error(self):
+        with self.assertRaises(ValidationError) as cm:
+            ClarityThresholdsPolicy({80: "ok", "80": "warning"})
+        self.assertIn("Duplicate threshold key", str(cm.exception))
+
+    def test_overlapping_thresholds_wrong_order(self):
+        with self.assertRaises(ValidationError) as cm:
+            ClarityThresholdsPolicy({70: "ok", 80: "warning"})
+        self.assertIn("Thresholds must be strictly descending", str(cm.exception))
+
+    def test_float_threshold_keys(self):
+        thresholds = {80.5: "ok", 50.9: "warning"}
+        policy = ClarityThresholdsPolicy(thresholds)
+        expected = {80: "ok", 50: "warning"}
+        self.assertEqual(policy.thresholds, expected)
+
+    def test_negative_threshold_values(self):
+        thresholds = {50: "ok", 0: "warning", -10: "error"}
+        policy = ClarityThresholdsPolicy(thresholds)
+        self.assertEqual(policy.get_alert_for_score(60), "ok")
+        self.assertEqual(policy.get_alert_for_score(25), "warning")
+        self.assertEqual(policy.get_alert_for_score(-5), "error")
+        self.assertEqual(policy.get_alert_for_score(-20), "error")
+
+    def test_empty_thresholds_dict(self):
+        policy = ClarityThresholdsPolicy({})
+        self.assertEqual(policy.get_alert_for_score(100), "error")
+        self.assertEqual(policy.get_alert_for_score(50), "error")
+        self.assertEqual(policy.get_alert_for_score(0), "error")
+        self.assertEqual(policy.get_alert_for_score(None), "error")
+
+    def test_very_high_threshold_values(self):
+        thresholds = {150: "ok", 100: "warning"}
+        policy = ClarityThresholdsPolicy(thresholds)
+        self.assertEqual(policy.get_alert_for_score(100), "warning")
+        self.assertEqual(policy.get_alert_for_score(90), "error")
+        self.assertEqual(policy.get_alert_for_score(50), "error")
+        self.assertEqual(policy.get_alert_for_score(99), "error")
+
+    # Policy logic via YAML string (mock policies.yml content)
+    def test_yaml_string_ok_and_warning(self):
+        yaml_content = """
+license_clarity_thresholds:
+  90: ok
+  30: warning
+"""
+        policy = load_clarity_thresholds_from_yaml(yaml_content)
+        self.assertEqual(policy.get_alert_for_score(95), "ok")
+        self.assertEqual(policy.get_alert_for_score(60), "warning")
+        self.assertEqual(policy.get_alert_for_score(20), "error")
+
+    def test_yaml_string_single_threshold(self):
+        yaml_content = """
+license_clarity_thresholds:
+  80: ok
+"""
+        policy = load_clarity_thresholds_from_yaml(yaml_content)
+        self.assertEqual(policy.get_alert_for_score(90), "ok")
+        self.assertEqual(policy.get_alert_for_score(79), "error")
+
+    def test_yaml_string_invalid_alert(self):
+        yaml_content = """
+license_clarity_thresholds:
+  80: great
+"""
+        with self.assertRaises(ValidationError):
+            load_clarity_thresholds_from_yaml(yaml_content)
+
+    def test_yaml_string_invalid_key(self):
+        yaml_content = """
+license_clarity_thresholds:
+  eighty: ok
+"""
+        with self.assertRaises(ValidationError):
+            load_clarity_thresholds_from_yaml(yaml_content)
+
+    def test_yaml_string_missing_key(self):
+        yaml_content = """
+license_policies:
+  - license_key: mit
+"""
+        with self.assertRaises(ValidationError):
+            load_clarity_thresholds_from_yaml(yaml_content)
+
+    def test_yaml_string_invalid_yaml(self):
+        yaml_content = "license_clarity_thresholds: [80, 50"
+        with self.assertRaises(ValidationError):
+            load_clarity_thresholds_from_yaml(yaml_content)
+
+    def test_load_from_existing_file(self):
+        test_file = self.data / "license_clarity" / "sample_thresholds.yml"
+        policy = load_clarity_thresholds_from_file(test_file)
+        self.assertIsNotNone(policy)
+        self.assertEqual(policy.get_alert_for_score(95), "ok")
+        self.assertEqual(policy.get_alert_for_score(75), "warning")
+        self.assertEqual(policy.get_alert_for_score(50), "error")
+
+    def test_load_from_nonexistent_file(self):
+        policy = load_clarity_thresholds_from_file("/nonexistent/file.yml")
+        self.assertIsNone(policy)