Adding V2 API with support for conciseness, completeness and toxicity checks (#1)

pjoshi30 · Preetam Joshi · web-flow · commit 935b94afa426 · 2024-03-26T10:36:46.000-07:00
* Adding V2 API with support for conciseness, completeness and toxicity checks.

* Removing prints and updating config for the example application.

* Updating README

---------

Co-authored-by: Preetam Joshi &lt;info@aimon.ai&gt;
diff --git a/README.md b/README.md
@@ -20,10 +20,10 @@ available in a future release.
 | Metric                                           | Status                                                       |
 |--------------------------------------------------|--------------------------------------------------------------|
 | Model Hallucination (Passage and Sentence Level) | <span style="font-size: 24px; color: green;">&#10003;</span> | 
+| Completeness                                     | <span style="font-size: 24px; color: green;">&#10003;</span>                     |
+| Conciseness                                      | <span style="font-size: 24px; color: green;">&#10003;</span>                    |
+| Toxicity                                         | <span style="font-size: 24px; color: green;">&#10003;</span>                      |
 | Semantic Similarity                              | <span style="font-size: 24px;">⌛</span>                      |
-| Completeness                                     | <span style="font-size: 24px;">⌛</span>                      |
-| Conciseness                                      | <span style="font-size: 24px;">⌛</span>                      |
-| Toxicity                                         | <span style="font-size: 24px;">⌛</span>                      |
 | Sentiment                                        | <span style="font-size: 24px;">⌛</span>                      |
 | Coherence                                        | <span style="font-size: 24px;">⌛</span>                      |
 | Sensitive Data (PII/PHI/PCI)                     | <span style="font-size: 24px;">⌛</span>                      |
@@ -56,6 +56,7 @@ to demonstrate the ease of integration.**
 
 ## Benchmarks
 
+### Hallucination Detection
 To demonstrate the effectiveness of our system, we benchmarked it against popular industry benchmarks for the
 hallucination detection task. The table below shows our results.
 
@@ -76,6 +77,11 @@ making it a suitable choice for both offline and online detection of hallucinati
     <img src="images/hallucination-benchmarks.png" alt="Hallucination Benchmarks">
 </div>
 
+### Completeness, Conciseness Detection
+
+There is a lack of industry standard benchmark datasets here. We will be publishing an evaluation dataset soon.
+Stay Tuned! <span style="font-size: 16px;">⌛</span>
+
 ## Pricing
 
 We offer a generous free tier and an attractive low cost, low latency API.
diff --git a/images/aimon-rely-app-example.gif b/images/aimon-rely-app-example.gif
diff --git a/postman_collections/aimon_hallucination_detection_beta.postman_collection.march2024.json b/postman_collections/aimon_hallucination_detection_beta.postman_collection.march2024.json
diff --git a/src/aimon_rely_client/simple_client.py b/src/aimon_rely_client/simple_client.py
@@ -1,19 +1,20 @@
 from functools import wraps
 import logging
-from typing import Callable, Type, Union, Tuple, Optional, List, Dict
+from typing import Callable, Type, Union, Tuple, Optional, List, Dict, Any
 import random
 import time
 import requests
 
+
 def retry(
-    exception_to_check: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
-    tries: int = 5,
-    delay: int = 3,
-    backoff: int = 2,
-    logger: Optional[logging.Logger] = None,
-    log_level: int = logging.WARNING,
-    re_raise: bool = True,
-    jitter: float = 0.1
+        exception_to_check: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
+        tries: int = 5,
+        delay: int = 3,
+        backoff: int = 2,
+        logger: Optional[logging.Logger] = None,
+        log_level: int = logging.WARNING,
+        re_raise: bool = True,
+        jitter: float = 0.1
 ) -> Callable:
     """
     Retry calling the decorated function using an exponential backoff.
@@ -27,6 +28,7 @@ def retry(
     :param re_raise: Whether to re-raise the exception after the last retry.
     :param jitter: The maximum jitter to apply to the delay as a fraction of the delay.
     """
+
     def deco_retry(func: Callable) -> Callable:
         @wraps(func)
         def f_retry(*args, **kwargs):
@@ -56,50 +58,109 @@ def f_retry(*args, **kwargs):
                     raise
 
         return f_retry
+
     return deco_retry
 
+
 class RetryableError(Exception):
     pass
 
+
 class InvalidAPIKeyError(Exception):
     pass
 
+
+class Config:
+    SUPPORTED_DETECTORS = {'hallucination': 'default', 'toxicity': 'default', 'conciseness': 'default',
+                           'completeness': 'default'}
+    SUPPORTED_VALUES = {'default'}
+
+    def __init__(self, detectors: Dict[str, str] = None):
+        """
+        A Config object for detectors to be used in the Aimon API.
+
+        :param detectors: A dictionary containing names of detectors and the kind of detector to use.
+        """
+        detectors_enabled = {}
+        if detectors is None or len(detectors) == 0:
+            detectors_enabled = {'hallucination': 'default'}
+        else:
+            for key, value in detectors.items():
+                if value not in self.SUPPORTED_VALUES:
+                    raise Exception(
+                        "Value {} not supported, please contact the Aimon team on info@aimon.ai or on Discord for help".format(
+                            value))
+                if key in self.SUPPORTED_DETECTORS:
+                    detectors_enabled[key] = value
+        self.detectors = {}
+        for key, value in detectors_enabled.items():
+            self.detectors[key] = {'detector_name': value}
+
+
 class SimpleAimonRelyClient(object):
     """
     A simple client that
     """
-    URL = "https://api.aimon.ai/v1/inference"
+    URL = "https://api.aimon.ai/v2/inference"
+    DEFAULT_CONFIG = Config()
 
-    def __init__(self, api_key: str):
+    def __init__(self, api_key: str, config: Config = DEFAULT_CONFIG):
         """
         :param api_key: the Aimon Rely API key. If you don't have one, request one by sending an email to info@aimon.ai
+        :param config: The detector configuration that will be applied to every single request.
         """
         if len(api_key) == 0 or "YOUR API KEY" in api_key:
             raise InvalidAPIKeyError("Enter a valid Aimon API key. Request it at info@aimon.ai or on Discord.")
         self.api_key = api_key
+        self.config = config
 
     @retry(RetryableError)
-    def detect(self, data_to_send: List[Dict[str, str]]):
+    def detect(self, data_to_send: List[Dict[str, Any]]):
         """
         Sends an HTTP POST request to the Aimon Rely Hallucination Detection API
-        :param data_to_send: An array of dict objects where each dict contains a "context" and "generated_text"
-        :return: A JSON object containing the following fields:
+        :param data_to_send: An array of dict objects where each dict contains a "context", a "generated_text" and
+                             optionally a "config" object
+        :return: A JSON object containing the following fields (if applicable):
+                "hallucination": Indicates whether the response consisted of intrinsic or extrinsic hallucinations.
                     "is_hallucinated": top level string indicating if hallucinated or not,
                     "score": A score indicating the probability that the whole "generated_text" is hallucinated
                     "sentences": An array of objects where each object contains a sentence level hallucination "score" and
                                  the "text" of the sentence.
+                "quality_metrics": A collection of quality metrics for the response of the LLM
+                    "results": A dict containing results of response quality detectors like conciseness and completeness
+                        "conciseness": This detector checks whether or not the response had un-necessary information
+                                       for the given query and the context documents
+                            "reasoning": An explanation of the score that was provided.
+                            "score": A probability score of how concise the response is for the user query and context documents.
+                        "completeness": This detector checks whether or not the response was complete enough for the
+                                        given query and context documents
+                            "reasoning": An explanation of the score that was provided.
+                            "score": A probability score of how complete the response is for the user query and context documents.
+                "toxicity": Indicates whether there was toxic content in the response. It uses 6 different label types for this.
+                    "identity_hate": The response contained hateful content that calls out real or perceived "identity factors" of an individual or a group.
+                    "insult": The response contained insulting content.
+                    "obscene": The response contained lewd or disgusting words.
+                    "threat": The response contained comments that threatened an individual or a group.
+                    "severe_toxic", "toxic": The response did not fall into the above 4 labels but is still considered
+                                             either severely toxic or generally toxic content.
         """
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             'Content-Type': 'application/json'
         }
-        response = requests.post(self.URL, json=data_to_send, headers=headers, timeout=30)
+        payload = []
+        for item in data_to_send:
+            if 'config' not in item:
+                item['config'] = self.config.detectors
+            payload.append(item)
+        response = requests.post(self.URL, json=payload, headers=headers, timeout=30)
         if response.status_code in [503, 504]:
             raise RetryableError("Status code: {} received".format(response.status_code))
         if response.status_code == 401:
             raise InvalidAPIKeyError("Use a valid Aimon API key. Request it at info@aimon.ai or on Discord.")
         if response.status_code != 200:
             raise Exception(f"Error, bad response: {response}")
         if len(response.json()) == 0 or 'error' in response.json() or 'error' in response.json()[0]:
-            raise Exception(f"Received an error in the response: {response if len(response.json()) == 0 else response.json()}")
+            raise Exception(
+                f"Received an error in the response: {response if len(response.json()) == 0 else response.json()}")
         return response.json()[0]
diff --git a/src/aimon_rely_client/simple_client_test.py b/src/aimon_rely_client/simple_client_test.py
@@ -3,42 +3,97 @@
 # Generated by CodiumAI
 
 import pytest
-from aimon_rely_client.simple_client import SimpleAimonRelyClient, RetryableError
+from aimon_rely_client.simple_client import SimpleAimonRelyClient, RetryableError, Config
 
 API_KEY = "YOUR API KEY HERE"
 class TestSimpleAimonRelyClient:
 
     #  Sends an HTTP POST request to the Aimon Rely Hallucination Detection API with valid data and receives a valid response
     def test_valid_data_valid_response(self):
-        client = SimpleAimonRelyClient(api_key=API_KEY)
+        config = Config({'hallucination': 'default'})
+        client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
         data_to_send = [{"context": "This is the context", "generated_text": "This is the context"}]
         response = client.detect(data_to_send)
-        assert "is_hallucinated" in response
-        assert response["is_hallucinated"] == "False"
-        assert "score" in response
-        assert "sentences" in response
-        assert len(response["sentences"]) == 1
+        assert "hallucination" in response
+        assert "is_hallucinated" in response['hallucination']
+        assert response['hallucination']["is_hallucinated"] == "False"
+        assert "score" in response['hallucination']
+        assert "sentences" in response['hallucination']
+        assert len(response['hallucination']["sentences"]) == 1
 
     #  Sends an HTTP POST request to the Aimon Rely Hallucination Detection API with a single dict object containing a valid "context" and "generated_text" but with a very short text and receives a valid response
     def test_short_text_valid_response(self):
-        client = SimpleAimonRelyClient(api_key=API_KEY)
+        config = Config({'hallucination': 'default'})
+        client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
         short_text = "a"
         data_to_send = [{"context": "This is the context", "generated_text": short_text}]
         response = client.detect(data_to_send)
-        assert "is_hallucinated" in response
-        assert response["is_hallucinated"] == "True"
-        assert "score" in response
-        assert "sentences" in response
-        assert len(response["sentences"]) == 1
+        assert "hallucination" in response
+        assert "is_hallucinated" in response['hallucination']
+        assert response['hallucination']["is_hallucinated"] == "True"
+        assert "score" in response['hallucination']
+        assert "sentences" in response['hallucination']
+        assert len(response['hallucination']["sentences"]) == 1
 
     #  Sends an HTTP POST request to the Aimon Rely Hallucination Detection API with a single dict object containing a valid "context" and "generated_text" but with a text containing special characters and receives a valid response
     def test_special_characters_valid_response(self):
         client = SimpleAimonRelyClient(api_key=API_KEY)
         special_text = "!@#$%^&*()_+"
         data_to_send = [{"context": "This is the context", "generated_text": special_text}]
         response = client.detect(data_to_send)
-        assert "is_hallucinated" in response
-        assert response["is_hallucinated"] == "True"
-        assert "score" in response
-        assert "sentences" in response
-        assert len(response["sentences"]) == 12
+        assert "hallucination" in response
+        assert "is_hallucinated" in response['hallucination']
+        assert response['hallucination']["is_hallucinated"] == "True"
+        assert "score" in response['hallucination']
+        assert "sentences" in response['hallucination']
+        assert len(response['hallucination']["sentences"]) == 12
+
+    def test_valid_data_valid_response_conciseness(self):
+        config = Config({'conciseness': 'default'})
+        client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
+        data_to_send = [{
+                            "context": "the abc have reported that those who receive centrelink payments made up half of radio rental's income last year. Centrelink payments themselves were up 20%.",
+                            "generated_text": "those who receive centrelink payments made up half of radio rental's income last year. The Centrelink payments were 20% up."}]
+        response = client.detect(data_to_send)
+        print(response["quality_metrics"])
+        assert "quality_metrics" in response
+        assert "results" in response["quality_metrics"]
+        assert "conciseness" in response["quality_metrics"]["results"]
+        assert "reasoning" in response["quality_metrics"]["results"]["conciseness"]
+        assert "score" in response["quality_metrics"]["results"]["conciseness"]
+        assert response["quality_metrics"]["results"]["conciseness"]["score"] >= 0.7
+
+    def test_valid_data_valid_response_completeness(self):
+        config = Config({'completeness': 'default'})
+        client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
+        data_to_send = [{
+                            "context": "the abc have reported that those who receive centrelink payments made up half of radio rental's income last year. Centrelink payments themselves were up 20%.",
+                            "generated_text": "those who receive centrelink payments made up half of radio rental's income last year. The Centrelink payments were 20% up."}]
+        response = client.detect(data_to_send)
+        assert "quality_metrics" in response
+        print(response["quality_metrics"])
+        assert "results" in response["quality_metrics"]
+        assert "completeness" in response["quality_metrics"]["results"]
+        assert "reasoning" in response["quality_metrics"]["results"]["completeness"]
+        assert "score" in response["quality_metrics"]["results"]["completeness"]
+        assert response["quality_metrics"]["results"]["completeness"]["score"] > 0.7
+
+    def test_valid_data_valid_response_conciseness_completeness(self):
+        config = Config({'conciseness': 'default', 'completeness': 'default'})
+        client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
+        data_to_send = [{"context": "the abc have reported that those who receive centrelink payments made up half of radio rental's income last year. Centrelink payments themselves were up 20%.",
+                         "generated_text": "those who receive centrelink payments made up half of radio rental's income last year. The Centrelink payments were 20% up."}]
+        response = client.detect(data_to_send)
+        assert "quality_metrics" in response
+        print(response["quality_metrics"])
+        assert "results" in response["quality_metrics"]
+        assert "completeness" in response["quality_metrics"]["results"]
+        assert "reasoning" in response["quality_metrics"]["results"]["completeness"]
+        assert "score" in response["quality_metrics"]["results"]["completeness"]
+        assert response["quality_metrics"]["results"]["completeness"]["score"] > 0.7
+        assert "quality_metrics" in response
+        assert "results" in response["quality_metrics"]
+        assert "conciseness" in response["quality_metrics"]["results"]
+        assert "reasoning" in response["quality_metrics"]["results"]["conciseness"]
+        assert "score" in response["quality_metrics"]["results"]["conciseness"]
+        assert response["quality_metrics"]["results"]["conciseness"]["score"] > 0.7
diff --git a/src/examples/langchain_summarization_app.py b/src/examples/langchain_summarization_app.py
@@ -9,7 +9,7 @@
 from langchain.llms.openai import OpenAI
 from langchain.chains.summarize import load_summarize_chain
 
-from aimon_rely_client.simple_client import SimpleAimonRelyClient, InvalidAPIKeyError
+from aimon_rely_client.simple_client import SimpleAimonRelyClient, Config, InvalidAPIKeyError
 
 # Streamlit app
 st.title('LangChain Text Summarizer')
@@ -26,7 +26,8 @@
         st.write(f"Please complete the missing fields.")
     else:
         try:
-            aimon_rely_client = SimpleAimonRelyClient(aimon_api_key)
+            aimon_rely_client = SimpleAimonRelyClient(aimon_api_key, config=Config(
+                {'hallucination': 'default', 'conciseness': 'default', 'completeness': 'default'}))
             # Split the source text
             text_splitter = CharacterTextSplitter()
             texts = text_splitter.split_text(source_text)
@@ -55,7 +56,12 @@
 
             # Display the Aimon Rely response
             st.header('Aimon Rely - Hallucination Detector Response')
-            st.json(ar_response)
+            st.json(ar_response['hallucination'])
+
+            st.header('Aimon Rely - Model Quality Detector Response')
+            st.json(ar_response['quality_metrics']['results'])
+
+
         except InvalidAPIKeyError as ivk:
             st.header(":red[ERROR: Add a valid Aimon API key.]")
             st.write("Request it at info@aimon.ai or on Discord.")
diff --git a/src/setup.py b/src/setup.py
@@ -3,5 +3,5 @@
     name = 'aimon_rely',
     python_requires='>3.8.0',
     packages = find_packages(),
-    version = "0.0.1",
+    version = "0.1.0",
 )

Original file line number	Diff line number	Diff line change
`@@ -3,5 +3,5 @@`
`3`	`3`	`name = 'aimon_rely',`
`4`	`4`	`python_requires='>3.8.0',`
`5`	`5`	`packages = find_packages(),`
`6`		`- version = "0.0.1",`
	`6`	`+ version = "0.1.0",`
`7`	`7`	`)`