Skip to content

Commit 935b94a

Browse files
pjoshi30Preetam Joshi
andauthored
Adding V2 API with support for conciseness, completeness and toxicity checks (#1)
* Adding V2 API with support for conciseness, completeness and toxicity checks. * Removing prints and updating config for the example application. * Updating README --------- Co-authored-by: Preetam Joshi <info@aimon.ai>
1 parent eecf944 commit 935b94a

File tree

7 files changed

+298
-58
lines changed

7 files changed

+298
-58
lines changed

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ available in a future release.
2020
| Metric | Status |
2121
|--------------------------------------------------|--------------------------------------------------------------|
2222
| Model Hallucination (Passage and Sentence Level) | <span style="font-size: 24px; color: green;">&#10003;</span> |
23+
| Completeness | <span style="font-size: 24px; color: green;">&#10003;</span> |
24+
| Conciseness | <span style="font-size: 24px; color: green;">&#10003;</span> |
25+
| Toxicity | <span style="font-size: 24px; color: green;">&#10003;</span> |
2326
| Semantic Similarity | <span style="font-size: 24px;">⌛</span> |
24-
| Completeness | <span style="font-size: 24px;">⌛</span> |
25-
| Conciseness | <span style="font-size: 24px;">⌛</span> |
26-
| Toxicity | <span style="font-size: 24px;">⌛</span> |
2727
| Sentiment | <span style="font-size: 24px;">⌛</span> |
2828
| Coherence | <span style="font-size: 24px;">⌛</span> |
2929
| Sensitive Data (PII/PHI/PCI) | <span style="font-size: 24px;">⌛</span> |
@@ -56,6 +56,7 @@ to demonstrate the ease of integration.**
5656

5757
## Benchmarks
5858

59+
### Hallucination Detection
5960
To demonstrate the effectiveness of our system, we benchmarked it against popular industry benchmarks for the
6061
hallucination detection task. The table below shows our results.
6162

@@ -76,6 +77,11 @@ making it a suitable choice for both offline and online detection of hallucinati
7677
<img src="images/hallucination-benchmarks.png" alt="Hallucination Benchmarks">
7778
</div>
7879

80+
### Completeness, Conciseness Detection
81+
82+
There is a lack of industry standard benchmark datasets here. We will be publishing an evaluation dataset soon.
83+
Stay Tuned! <span style="font-size: 16px;">⌛</span>
84+
7985
## Pricing
8086

8187
We offer a generous free tier and an attractive low cost, low latency API.

images/aimon-rely-app-example.gif

8.98 KB
Loading

postman_collections/aimon_hallucination_detection_beta.postman_collection.march2024.json

Lines changed: 129 additions & 17 deletions
Large diffs are not rendered by default.

src/aimon_rely_client/simple_client.py

Lines changed: 77 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
from functools import wraps
22
import logging
3-
from typing import Callable, Type, Union, Tuple, Optional, List, Dict
3+
from typing import Callable, Type, Union, Tuple, Optional, List, Dict, Any
44
import random
55
import time
66
import requests
77

8+
89
def retry(
9-
exception_to_check: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
10-
tries: int = 5,
11-
delay: int = 3,
12-
backoff: int = 2,
13-
logger: Optional[logging.Logger] = None,
14-
log_level: int = logging.WARNING,
15-
re_raise: bool = True,
16-
jitter: float = 0.1
10+
exception_to_check: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
11+
tries: int = 5,
12+
delay: int = 3,
13+
backoff: int = 2,
14+
logger: Optional[logging.Logger] = None,
15+
log_level: int = logging.WARNING,
16+
re_raise: bool = True,
17+
jitter: float = 0.1
1718
) -> Callable:
1819
"""
1920
Retry calling the decorated function using an exponential backoff.
@@ -27,6 +28,7 @@ def retry(
2728
:param re_raise: Whether to re-raise the exception after the last retry.
2829
:param jitter: The maximum jitter to apply to the delay as a fraction of the delay.
2930
"""
31+
3032
def deco_retry(func: Callable) -> Callable:
3133
@wraps(func)
3234
def f_retry(*args, **kwargs):
@@ -56,50 +58,109 @@ def f_retry(*args, **kwargs):
5658
raise
5759

5860
return f_retry
61+
5962
return deco_retry
6063

64+
6165
class RetryableError(Exception):
6266
pass
6367

68+
6469
class InvalidAPIKeyError(Exception):
6570
pass
6671

72+
73+
class Config:
74+
SUPPORTED_DETECTORS = {'hallucination': 'default', 'toxicity': 'default', 'conciseness': 'default',
75+
'completeness': 'default'}
76+
SUPPORTED_VALUES = {'default'}
77+
78+
def __init__(self, detectors: Dict[str, str] = None):
79+
"""
80+
A Config object for detectors to be used in the Aimon API.
81+
82+
:param detectors: A dictionary containing names of detectors and the kind of detector to use.
83+
"""
84+
detectors_enabled = {}
85+
if detectors is None or len(detectors) == 0:
86+
detectors_enabled = {'hallucination': 'default'}
87+
else:
88+
for key, value in detectors.items():
89+
if value not in self.SUPPORTED_VALUES:
90+
raise Exception(
91+
"Value {} not supported, please contact the Aimon team on info@aimon.ai or on Discord for help".format(
92+
value))
93+
if key in self.SUPPORTED_DETECTORS:
94+
detectors_enabled[key] = value
95+
self.detectors = {}
96+
for key, value in detectors_enabled.items():
97+
self.detectors[key] = {'detector_name': value}
98+
99+
67100
class SimpleAimonRelyClient(object):
68101
"""
69102
A simple client that
70103
"""
71-
URL = "https://api.aimon.ai/v1/inference"
104+
URL = "https://api.aimon.ai/v2/inference"
105+
DEFAULT_CONFIG = Config()
72106

73-
def __init__(self, api_key: str):
107+
def __init__(self, api_key: str, config: Config = DEFAULT_CONFIG):
74108
"""
75109
:param api_key: the Aimon Rely API key. If you don't have one, request one by sending an email to info@aimon.ai
110+
:param config: The detector configuration that will be applied to every single request.
76111
"""
77112
if len(api_key) == 0 or "YOUR API KEY" in api_key:
78113
raise InvalidAPIKeyError("Enter a valid Aimon API key. Request it at info@aimon.ai or on Discord.")
79114
self.api_key = api_key
115+
self.config = config
80116

81117
@retry(RetryableError)
82-
def detect(self, data_to_send: List[Dict[str, str]]):
118+
def detect(self, data_to_send: List[Dict[str, Any]]):
83119
"""
84120
Sends an HTTP POST request to the Aimon Rely Hallucination Detection API
85-
:param data_to_send: An array of dict objects where each dict contains a "context" and "generated_text"
86-
:return: A JSON object containing the following fields:
121+
:param data_to_send: An array of dict objects where each dict contains a "context", a "generated_text" and
122+
optionally a "config" object
123+
:return: A JSON object containing the following fields (if applicable):
124+
"hallucination": Indicates whether the response consisted of intrinsic or extrinsic hallucinations.
87125
"is_hallucinated": top level string indicating if hallucinated or not,
88126
"score": A score indicating the probability that the whole "generated_text" is hallucinated
89127
"sentences": An array of objects where each object contains a sentence level hallucination "score" and
90128
the "text" of the sentence.
129+
"quality_metrics": A collection of quality metrics for the response of the LLM
130+
"results": A dict containing results of response quality detectors like conciseness and completeness
131+
"conciseness": This detector checks whether or not the response had un-necessary information
132+
for the given query and the context documents
133+
"reasoning": An explanation of the score that was provided.
134+
"score": A probability score of how concise the response is for the user query and context documents.
135+
"completeness": This detector checks whether or not the response was complete enough for the
136+
given query and context documents
137+
"reasoning": An explanation of the score that was provided.
138+
"score": A probability score of how complete the response is for the user query and context documents.
139+
"toxicity": Indicates whether there was toxic content in the response. It uses 6 different label types for this.
140+
"identity_hate": The response contained hateful content that calls out real or perceived "identity factors" of an individual or a group.
141+
"insult": The response contained insulting content.
142+
"obscene": The response contained lewd or disgusting words.
143+
"threat": The response contained comments that threatened an individual or a group.
144+
"severe_toxic", "toxic": The response did not fall into the above 4 labels but is still considered
145+
either severely toxic or generally toxic content.
91146
"""
92147
headers = {
93148
"Authorization": f"Bearer {self.api_key}",
94149
'Content-Type': 'application/json'
95150
}
96-
response = requests.post(self.URL, json=data_to_send, headers=headers, timeout=30)
151+
payload = []
152+
for item in data_to_send:
153+
if 'config' not in item:
154+
item['config'] = self.config.detectors
155+
payload.append(item)
156+
response = requests.post(self.URL, json=payload, headers=headers, timeout=30)
97157
if response.status_code in [503, 504]:
98158
raise RetryableError("Status code: {} received".format(response.status_code))
99159
if response.status_code == 401:
100160
raise InvalidAPIKeyError("Use a valid Aimon API key. Request it at info@aimon.ai or on Discord.")
101161
if response.status_code != 200:
102162
raise Exception(f"Error, bad response: {response}")
103163
if len(response.json()) == 0 or 'error' in response.json() or 'error' in response.json()[0]:
104-
raise Exception(f"Received an error in the response: {response if len(response.json()) == 0 else response.json()}")
164+
raise Exception(
165+
f"Received an error in the response: {response if len(response.json()) == 0 else response.json()}")
105166
return response.json()[0]

src/aimon_rely_client/simple_client_test.py

Lines changed: 73 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,97 @@
33
# Generated by CodiumAI
44

55
import pytest
6-
from aimon_rely_client.simple_client import SimpleAimonRelyClient, RetryableError
6+
from aimon_rely_client.simple_client import SimpleAimonRelyClient, RetryableError, Config
77

88
API_KEY = "YOUR API KEY HERE"
99
class TestSimpleAimonRelyClient:
1010

1111
# Sends an HTTP POST request to the Aimon Rely Hallucination Detection API with valid data and receives a valid response
1212
def test_valid_data_valid_response(self):
13-
client = SimpleAimonRelyClient(api_key=API_KEY)
13+
config = Config({'hallucination': 'default'})
14+
client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
1415
data_to_send = [{"context": "This is the context", "generated_text": "This is the context"}]
1516
response = client.detect(data_to_send)
16-
assert "is_hallucinated" in response
17-
assert response["is_hallucinated"] == "False"
18-
assert "score" in response
19-
assert "sentences" in response
20-
assert len(response["sentences"]) == 1
17+
assert "hallucination" in response
18+
assert "is_hallucinated" in response['hallucination']
19+
assert response['hallucination']["is_hallucinated"] == "False"
20+
assert "score" in response['hallucination']
21+
assert "sentences" in response['hallucination']
22+
assert len(response['hallucination']["sentences"]) == 1
2123

2224
# Sends an HTTP POST request to the Aimon Rely Hallucination Detection API with a single dict object containing a valid "context" and "generated_text" but with a very short text and receives a valid response
2325
def test_short_text_valid_response(self):
24-
client = SimpleAimonRelyClient(api_key=API_KEY)
26+
config = Config({'hallucination': 'default'})
27+
client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
2528
short_text = "a"
2629
data_to_send = [{"context": "This is the context", "generated_text": short_text}]
2730
response = client.detect(data_to_send)
28-
assert "is_hallucinated" in response
29-
assert response["is_hallucinated"] == "True"
30-
assert "score" in response
31-
assert "sentences" in response
32-
assert len(response["sentences"]) == 1
31+
assert "hallucination" in response
32+
assert "is_hallucinated" in response['hallucination']
33+
assert response['hallucination']["is_hallucinated"] == "True"
34+
assert "score" in response['hallucination']
35+
assert "sentences" in response['hallucination']
36+
assert len(response['hallucination']["sentences"]) == 1
3337

3438
# Sends an HTTP POST request to the Aimon Rely Hallucination Detection API with a single dict object containing a valid "context" and "generated_text" but with a text containing special characters and receives a valid response
3539
def test_special_characters_valid_response(self):
3640
client = SimpleAimonRelyClient(api_key=API_KEY)
3741
special_text = "!@#$%^&*()_+"
3842
data_to_send = [{"context": "This is the context", "generated_text": special_text}]
3943
response = client.detect(data_to_send)
40-
assert "is_hallucinated" in response
41-
assert response["is_hallucinated"] == "True"
42-
assert "score" in response
43-
assert "sentences" in response
44-
assert len(response["sentences"]) == 12
44+
assert "hallucination" in response
45+
assert "is_hallucinated" in response['hallucination']
46+
assert response['hallucination']["is_hallucinated"] == "True"
47+
assert "score" in response['hallucination']
48+
assert "sentences" in response['hallucination']
49+
assert len(response['hallucination']["sentences"]) == 12
50+
51+
def test_valid_data_valid_response_conciseness(self):
52+
config = Config({'conciseness': 'default'})
53+
client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
54+
data_to_send = [{
55+
"context": "the abc have reported that those who receive centrelink payments made up half of radio rental's income last year. Centrelink payments themselves were up 20%.",
56+
"generated_text": "those who receive centrelink payments made up half of radio rental's income last year. The Centrelink payments were 20% up."}]
57+
response = client.detect(data_to_send)
58+
print(response["quality_metrics"])
59+
assert "quality_metrics" in response
60+
assert "results" in response["quality_metrics"]
61+
assert "conciseness" in response["quality_metrics"]["results"]
62+
assert "reasoning" in response["quality_metrics"]["results"]["conciseness"]
63+
assert "score" in response["quality_metrics"]["results"]["conciseness"]
64+
assert response["quality_metrics"]["results"]["conciseness"]["score"] >= 0.7
65+
66+
def test_valid_data_valid_response_completeness(self):
67+
config = Config({'completeness': 'default'})
68+
client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
69+
data_to_send = [{
70+
"context": "the abc have reported that those who receive centrelink payments made up half of radio rental's income last year. Centrelink payments themselves were up 20%.",
71+
"generated_text": "those who receive centrelink payments made up half of radio rental's income last year. The Centrelink payments were 20% up."}]
72+
response = client.detect(data_to_send)
73+
assert "quality_metrics" in response
74+
print(response["quality_metrics"])
75+
assert "results" in response["quality_metrics"]
76+
assert "completeness" in response["quality_metrics"]["results"]
77+
assert "reasoning" in response["quality_metrics"]["results"]["completeness"]
78+
assert "score" in response["quality_metrics"]["results"]["completeness"]
79+
assert response["quality_metrics"]["results"]["completeness"]["score"] > 0.7
80+
81+
def test_valid_data_valid_response_conciseness_completeness(self):
82+
config = Config({'conciseness': 'default', 'completeness': 'default'})
83+
client = SimpleAimonRelyClient(api_key=API_KEY, config=config)
84+
data_to_send = [{"context": "the abc have reported that those who receive centrelink payments made up half of radio rental's income last year. Centrelink payments themselves were up 20%.",
85+
"generated_text": "those who receive centrelink payments made up half of radio rental's income last year. The Centrelink payments were 20% up."}]
86+
response = client.detect(data_to_send)
87+
assert "quality_metrics" in response
88+
print(response["quality_metrics"])
89+
assert "results" in response["quality_metrics"]
90+
assert "completeness" in response["quality_metrics"]["results"]
91+
assert "reasoning" in response["quality_metrics"]["results"]["completeness"]
92+
assert "score" in response["quality_metrics"]["results"]["completeness"]
93+
assert response["quality_metrics"]["results"]["completeness"]["score"] > 0.7
94+
assert "quality_metrics" in response
95+
assert "results" in response["quality_metrics"]
96+
assert "conciseness" in response["quality_metrics"]["results"]
97+
assert "reasoning" in response["quality_metrics"]["results"]["conciseness"]
98+
assert "score" in response["quality_metrics"]["results"]["conciseness"]
99+
assert response["quality_metrics"]["results"]["conciseness"]["score"] > 0.7

src/examples/langchain_summarization_app.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from langchain.llms.openai import OpenAI
1010
from langchain.chains.summarize import load_summarize_chain
1111

12-
from aimon_rely_client.simple_client import SimpleAimonRelyClient, InvalidAPIKeyError
12+
from aimon_rely_client.simple_client import SimpleAimonRelyClient, Config, InvalidAPIKeyError
1313

1414
# Streamlit app
1515
st.title('LangChain Text Summarizer')
@@ -26,7 +26,8 @@
2626
st.write(f"Please complete the missing fields.")
2727
else:
2828
try:
29-
aimon_rely_client = SimpleAimonRelyClient(aimon_api_key)
29+
aimon_rely_client = SimpleAimonRelyClient(aimon_api_key, config=Config(
30+
{'hallucination': 'default', 'conciseness': 'default', 'completeness': 'default'}))
3031
# Split the source text
3132
text_splitter = CharacterTextSplitter()
3233
texts = text_splitter.split_text(source_text)
@@ -55,7 +56,12 @@
5556

5657
# Display the Aimon Rely response
5758
st.header('Aimon Rely - Hallucination Detector Response')
58-
st.json(ar_response)
59+
st.json(ar_response['hallucination'])
60+
61+
st.header('Aimon Rely - Model Quality Detector Response')
62+
st.json(ar_response['quality_metrics']['results'])
63+
64+
5965
except InvalidAPIKeyError as ivk:
6066
st.header(":red[ERROR: Add a valid Aimon API key.]")
6167
st.write("Request it at info@aimon.ai or on Discord.")

src/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
name = 'aimon_rely',
44
python_requires='>3.8.0',
55
packages = find_packages(),
6-
version = "0.0.1",
6+
version = "0.1.0",
77
)

0 commit comments

Comments
 (0)