Skip to content

Commit 4d42d33

Browse files
pjoshi30Preetam Joshi
andauthored
Adding support for an output field in the analyze_eval decorator. Updating example notebooks. (#37)
Co-authored-by: Preetam Joshi <info@aimon.ai>
1 parent 2cb551f commit 4d42d33

File tree

6 files changed

+83
-60
lines changed

6 files changed

+83
-60
lines changed

.DS_Store

-6 KB
Binary file not shown.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Pytest
22
*.pytest_cache
33

4+
*.DS_Store
5+
.DS_Store
6+
47
# pycache
58
*__pycache__*
69

aimon/decorators/analyze.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from functools import wraps
22
from .common import AimonClientSingleton
3-
3+
import inspect
44

55
class Application:
66
def __init__(self, name, stage="evaluation", type="text", metadata={}):
@@ -18,7 +18,6 @@ def __init__(self, name, model_type, metadata={}):
1818

1919

2020
class AnalyzeBase:
21-
2221
DEFAULT_CONFIG = {'hallucination': {'detector_name': 'default'}}
2322

2423
def __init__(self, application, model, api_key=None, config=None):
@@ -54,30 +53,35 @@ def initialize(self):
5453

5554
class AnalyzeEval(AnalyzeBase):
5655

57-
def __init__(self, application, model, evaluation_name, dataset_collection_name,
56+
def __init__(self, application, model, evaluation_name, dataset_collection_name, headers,
5857
api_key=None, eval_tags=None, config=None):
5958
"""
6059
The wrapped function should have a signature as follows:
61-
62-
def func(context_docs, user_query, prompt, *args, **kwargs):
60+
def func(context_docs, user_query, prompt, instructions *args, **kwargs):
6361
# Your code here
6462
return output
65-
The first argument must be a context_docs which is of type List[str]
66-
The second argument must be a user_query which is of type str
67-
The third argument must be a prompt which is of type str
63+
[Required] The first argument must be a 'context_docs' which is of type List[str].
64+
[Required] The second argument must be a 'user_query' which is of type str.
65+
[Optional] The third argument must be a 'prompt' which is of type str
66+
[Optional] If an 'instructions' column is present in the dataset, then the fourth argument
67+
must be 'instructions' which is of type str
68+
[Optional] If an 'output' column is present in the dataset, then the fifth argument
69+
must be 'output' which is of type str
6870
Return: The function must return an output which is of type str
6971
7072
:param application: An Application object
7173
:param model: A Model object
7274
:param evaluation_name: The name of the evaluation
7375
:param dataset_collection_name: The name of the dataset collection
76+
:param headers: A list containing the headers to be used for the evaluation
7477
:param api_key: The API key to use for the AIMon client
7578
:param eval_tags: A list of tags to associate with the evaluation
7679
:param config: A dictionary containing the AIMon configuration for the evaluation
7780
7881
7982
"""
8083
super().__init__(application, model, api_key, config)
84+
self.headers = headers
8185
self.evaluation_name = evaluation_name
8286
self.dataset_collection_name = dataset_collection_name
8387
self.eval_tags = eval_tags
@@ -111,11 +115,27 @@ def _run_eval(self, func, args, kwargs):
111115
dataset_collection_records.extend(dataset_records)
112116
results = []
113117
for record in dataset_collection_records:
114-
if "instructions" in record and "instruction_adherence" in self.config:
115-
# Only pass instructions if instruction_adherence is specified in the config
116-
result = func(record["context_docs"], record["user_query"], record["prompt"], record["instructions"], *args, **kwargs)
117-
else:
118-
result = func(record["context_docs"], record["user_query"], record["prompt"], *args, **kwargs)
118+
# The record must contain the context_docs and user_query fields.
119+
# The prompt, output and instructions fields are optional.
120+
# Inspect the record and call the function with the appropriate arguments
121+
arguments = []
122+
for ag in self.headers:
123+
if ag not in record:
124+
raise ValueError("Record must contain the column '{}' as specified in the 'headers'"
125+
" argument in the decorator".format(ag))
126+
arguments.append(record[ag])
127+
# Inspect the function signature to ensure that it accepts the correct arguments
128+
sig = inspect.signature(func)
129+
params = sig.parameters
130+
if len(params) < len(arguments):
131+
raise ValueError("Function must accept at least {} arguments".format(len(arguments)))
132+
# Ensure that the first len(arguments) parameters are named correctly
133+
param_names = list(params.keys())
134+
if param_names[:len(arguments)] != self.headers:
135+
raise ValueError("Function arguments must be named as specified by the 'headers' argument: {}".format(
136+
self.headers))
137+
138+
result = func(*arguments, *args, **kwargs)
119139
_context = record['context_docs'] if isinstance(record['context_docs'], list) else [record['context_docs']]
120140
payload = {
121141
"application_id": self._am_app.id,
@@ -127,6 +147,9 @@ def _run_eval(self, func, args, kwargs):
127147
"evaluation_id": self._eval.id,
128148
"evaluation_run_id": eval_run.id,
129149
}
150+
if "instruction_adherence" in self.config and "instructions" not in record:
151+
raise ValueError("When instruction_adherence is specified in the config, "
152+
"'instructions' must be present in the dataset")
130153
if "instructions" in record and "instruction_adherence" in self.config:
131154
# Only pass instructions if instruction_adherence is specified in the config
132155
payload["instructions"] = record["instructions"] or ""
@@ -138,6 +161,7 @@ def __call__(self, func):
138161
@wraps(func)
139162
def wrapper(*args, **kwargs):
140163
return self._run_eval(func, args, kwargs)
164+
141165
return wrapper
142166

143167

@@ -164,9 +188,11 @@ def __init__(self, application, model, values_returned, api_key=None, config=Non
164188
if "context" not in self.values_returned:
165189
raise ValueError("values_returned must contain 'context'")
166190
if "instruction_adherence" in self.config and "instructions" not in self.values_returned:
167-
raise ValueError("When instruction_adherence is specified in the config, 'instructions' must be returned by the decorated function")
191+
raise ValueError(
192+
"When instruction_adherence is specified in the config, 'instructions' must be returned by the decorated function")
168193
if "instructions" in self.values_returned and "instruction_adherence" not in self.config:
169-
raise ValueError("instruction_adherence must be specified in the config for returning 'instructions' by the decorated function")
194+
raise ValueError(
195+
"instruction_adherence must be specified in the config for returning 'instructions' by the decorated function")
170196
self.config = config if config else self.DEFAULT_CONFIG
171197

172198
def _run_production_analysis(self, func, args, kwargs):
@@ -197,7 +223,7 @@ def _run_production_analysis(self, func, args, kwargs):
197223
aimon_payload['instructions'] = result_dict['instructions']
198224
if 'actual_request_timestamp' in result_dict:
199225
aimon_payload["actual_request_timestamp"] = result_dict['actual_request_timestamp']
200-
226+
201227
aimon_payload['config'] = self.config
202228
aimon_response = self.client.analyze.create(body=[aimon_payload])
203229
return result + (aimon_response,)

examples/notebooks/aimon_evaluation_decorators_langchain_summarization.ipynb

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
"metadata": {},
88
"outputs": [],
99
"source": [
10-
"!pip install langchain langchain-community openai tiktoken --quiet"
10+
"!pip install langchain langchain-community langchain-openai tiktoken --quiet"
1111
]
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": 2,
15+
"execution_count": 10,
1616
"id": "6e6a72d8-c7ab-4393-ad0d-9edc06159be8",
1717
"metadata": {},
1818
"outputs": [],
@@ -22,7 +22,7 @@
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 3,
25+
"execution_count": 11,
2626
"id": "244a9542-8b05-440f-996d-4ef6919f6c08",
2727
"metadata": {},
2828
"outputs": [],
@@ -33,7 +33,7 @@
3333
},
3434
{
3535
"cell_type": "code",
36-
"execution_count": 4,
36+
"execution_count": 12,
3737
"id": "a7cf7522-c09c-4c4a-b714-f57527de6974",
3838
"metadata": {},
3939
"outputs": [],
@@ -59,7 +59,7 @@
5959
},
6060
{
6161
"cell_type": "code",
62-
"execution_count": 5,
62+
"execution_count": 13,
6363
"id": "b73cf729-d349-4bbe-99bc-bbcfb8101bf0",
6464
"metadata": {},
6565
"outputs": [],
@@ -100,7 +100,7 @@
100100
},
101101
{
102102
"cell_type": "code",
103-
"execution_count": 6,
103+
"execution_count": 14,
104104
"id": "59cc29c4-1cde-4b03-a489-7f055fedf2de",
105105
"metadata": {},
106106
"outputs": [],
@@ -125,14 +125,15 @@
125125
},
126126
{
127127
"cell_type": "code",
128-
"execution_count": 7,
128+
"execution_count": 19,
129129
"id": "790a85fa-3026-4ea9-94e4-8bbb22cdb6b8",
130130
"metadata": {},
131131
"outputs": [],
132132
"source": [
133133
"analyze_eval = AnalyzeEval(\n",
134-
" Application(\"llm_marketing_summarization_app_v2\"),\n",
134+
" Application(\"llm_marketing_summarization_app_v3\"),\n",
135135
" Model(\"my_gpt4_model_fine_tuned\", \"GPT-4\"), \n",
136+
" headers=['context_docs', 'user_query', 'prompt', 'instructions'],\n",
136137
" api_key=os.getenv(\"AIMON_API_KEY\"),\n",
137138
" evaluation_name=\"simple_eval\",\n",
138139
" dataset_collection_name=\"my_first_dataset_collection_aug_9_2024\",\n",
@@ -142,7 +143,7 @@
142143
},
143144
{
144145
"cell_type": "code",
145-
"execution_count": 8,
146+
"execution_count": 20,
146147
"id": "af98abfc-c04d-4cc7-ba3c-62f550de0c99",
147148
"metadata": {},
148149
"outputs": [],
@@ -153,6 +154,9 @@
153154
"from langchain.llms.openai import OpenAI\n",
154155
"from langchain.chains.summarize import load_summarize_chain\n",
155156
"\n",
157+
"# eval = Eval(aimon_config)\n",
158+
"# eval.dataset_collection(data_coll_name)\n",
159+
"\n",
156160
"# The analyze_eval decorator will automatically stream through\n",
157161
"# records in the specified data collection and run it against \n",
158162
"# this function. The signature of this function should necessarily \n",
@@ -172,41 +176,30 @@
172176
" # Initialize the OpenAI module, load and run the summarize chain\n",
173177
" llm = OpenAI(temperature=0, openai_api_key=openai_api_key)\n",
174178
" chain = load_summarize_chain(llm, chain_type=\"map_reduce\")\n",
175-
" return chain.run(docs)"
179+
" return chain.run(docs) "
176180
]
177181
},
178182
{
179183
"cell_type": "code",
180-
"execution_count": 9,
184+
"execution_count": 21,
181185
"id": "bcdddfa8-43c7-446a-9337-3ad0f16a015e",
182186
"metadata": {},
183-
"outputs": [
184-
{
185-
"name": "stderr",
186-
"output_type": "stream",
187-
"text": [
188-
"/var/folders/7l/300zf44j5v9c43jpqdbhkl0h0000gn/T/ipykernel_21311/1548092027.py:24: LangChainDeprecationWarning: The class `OpenAI` was deprecated in LangChain 0.0.10 and will be removed in 1.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAI`.\n",
189-
" llm = OpenAI(temperature=0, openai_api_key=openai_api_key)\n",
190-
"/var/folders/7l/300zf44j5v9c43jpqdbhkl0h0000gn/T/ipykernel_21311/1548092027.py:26: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use invoke instead.\n",
191-
" return chain.run(docs)\n"
192-
]
193-
}
194-
],
187+
"outputs": [],
195188
"source": [
196189
"aimon_eval_res = run_application_eval_mode()"
197190
]
198191
},
199192
{
200193
"cell_type": "code",
201-
"execution_count": 10,
194+
"execution_count": 22,
202195
"id": "d6e94b08-0e2a-4860-a4f2-30d9ddea17af",
203196
"metadata": {},
204197
"outputs": [
205198
{
206199
"name": "stdout",
207200
"output_type": "stream",
208201
"text": [
209-
"[(' Acme recently launched version 2.1 of their Python library, which has deep integrations with the Python ecosystem and has been proven to be beneficial for developers. This new version includes features like async support and improved error handling.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), ('\\n\\nTo configure the Acme python client, follow the official documentation which includes setting up environment variables and installing dependencies for both basic and advanced setups.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client is compatible with Python 3.6+ and multiple databases, including MySQL, PostgreSQL, and MongoDB. It is also suitable for cross-language projects with Node.js.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client may have installation, package conflicts, and connectivity issues. Troubleshooting involves checking the Python environment, dependencies, and log files, with specific error resolutions available in the online help section.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' Acme recently launched version 2.1 of their Python library, which has deep integrations with the Python ecosystem and has been proven to be beneficial for developers. This new version includes features like async support and improved error handling.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), ('\\n\\nTo configure the Acme python client, environment variables must be set up and dependencies must be installed. Detailed instructions for both basic and advanced setups can be found in the official documentation.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client is compatible with Python 3.6+ and multiple databases, including MySQL, PostgreSQL, and MongoDB. It is also suitable for cross-language projects with Node.js.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client may have installation, package conflicts, and connectivity issues. Troubleshooting involves checking the Python environment, dependencies, and log files, with specific error resolutions available in the online help section.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))]\n"
202+
"[(' Acme recently launched version 2.1 of their Python library, which has deep integrations with the Python ecosystem and has been proven to be valuable for developers. This new version includes features like async support and improved error handling. Acme also supports Javascript and Java.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), ('\\n\\nTo configure the Acme python client, follow the official documentation which includes setting up environment variables and installing dependencies for both basic and advanced setups.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client is compatible with Python 3.6+ and multiple databases, including MySQL, PostgreSQL, and MongoDB. It is also suitable for cross-language projects with Node.js.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client may have installation, package conflicts, and connectivity issues. Troubleshooting involves checking the Python environment, dependencies, and log files, with specific error resolutions available in the online help section.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' Acme recently launched version 2.1 of their Python library, which has deep integrations with the Python ecosystem and has been proven to be valuable for developers. This new version includes features like async support and improved error handling.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), ('\\n\\nTo configure the Acme python client, follow the official documentation which includes setting up environment variables and installing dependencies for both basic and advanced setups.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client is compatible with Python 3.6+ and multiple databases, including MySQL, PostgreSQL, and MongoDB. It is also suitable for cross-language projects with Node.js.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200)), (' The Acme python client may have installation, package conflicts, and connectivity issues. Troubleshooting involves checking the Python environment, dependencies, and log files, with specific error resolutions available in the online help section.', AnalyzeCreateResponse(message='Data successfully sent to AIMon.', status=200))]\n"
210203
]
211204
}
212205
],

0 commit comments

Comments
 (0)