Skip to content

Commit 1bb309c

Browse files
committed
add code for v2.1.4
Signed-off-by: kiranscaria <kiranscaria@outlook.com>
1 parent e329340 commit 1bb309c

11 files changed

+128
-178
lines changed

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ requires-python = ">=3.9,<3.13"
1111

1212
version = "2.1.4"
1313
authors = [
14-
{name = "Kiran Scaria", email = "kiranscaria@outlook.com"},
14+
{name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
15+
{name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
16+
{name = "Dushyant Mahajan", email = "dushyant.mahajan@raga.ai"},
1517
{name = "Siddhartha Kosti", email = "siddhartha.kosti@raga.ai"},
1618
{name = "Ritika Goel", email = "ritika.goel@raga.ai"},
17-
{name = "Vijay Chaurasia", email="vijay.chaurasia@raga.ai"},
18-
{name = "Tanaya Pakhale", email="tanaya.pakhale@raga.ai"},
19-
{name = "Tushar Kumar", email="tushar.kumar@raga.ai"},
19+
{name = "Vijay Chaurasia", email="vijay.chaurasia@raga.ai"}
2020
]
2121

2222
dependencies = [

ragaai_catalyst/dataset.py

Lines changed: 4 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,6 @@
1010

1111

1212
class Dataset:
13-
"""
14-
A class to manage datasets within a RagaAI Catalyst project.
15-
16-
This class provides functionality to interact with datasets in a RagaAI project,
17-
including listing datasets, retrieving schema mappings, managing dataset columns,
18-
and creating new datasets from CSV files.
19-
20-
Attributes:
21-
BASE_URL (str): Base URL for the RagaAI Catalyst API endpoint
22-
TIMEOUT (int): Request timeout in seconds
23-
project_name (str): Name of the project to work with
24-
project_id (str): ID of the project retrieved from the API
25-
num_projects (int): Maximum number of projects to retrieve
26-
27-
Args:
28-
project_name (str): The name of the project to initialize the Dataset instance with
29-
30-
Raises:
31-
ValueError: If the provided project name is not found
32-
requests.exceptions.RequestException: If there are any issues with API communication
33-
34-
Example:
35-
>>> dataset = Dataset("my_project")
36-
>>> available_datasets = dataset.list_datasets()
37-
>>> schema = dataset.get_schema_mapping()
38-
"""
39-
4013
BASE_URL = None
4114
TIMEOUT = 30
4215

@@ -106,8 +79,8 @@ def make_request():
10679
response = make_request()
10780
response_checker(response, "Dataset.list_datasets")
10881
if response.status_code == 401:
109-
get_token()
110-
response = make_request()
82+
get_token() # Fetch a new token and set it in the environment
83+
response = make_request() # Retry the request
11184
if response.status_code != 200:
11285
return {
11386
"status_code": response.status_code,
@@ -121,13 +94,6 @@ def make_request():
12194
raise
12295

12396
def get_schema_mapping(self):
124-
"""
125-
Retrieves the schema mapping elements for the project.
126-
127-
Returns:
128-
dict: A dictionary containing the schema elements for the project.
129-
"""
130-
13197
headers = {
13298
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
13399
"X-Project-Name": self.project_name,
@@ -147,19 +113,9 @@ def get_schema_mapping(self):
147113
logger.error(f"Failed to get CSV schema: {e}")
148114
raise
149115

150-
116+
###################### CSV Upload APIs ###################
151117

152118
def get_dataset_columns(self, dataset_name):
153-
"""
154-
Retrieves the column names for a specific dataset.
155-
156-
Args:
157-
dataset_name (str): Name of the dataset to retrieve columns for
158-
159-
Returns:
160-
list: A list of column names in the dataset.
161-
"""
162-
163119
list_dataset = self.list_datasets()
164120
if dataset_name not in list_dataset:
165121
raise ValueError(f"Dataset {dataset_name} does not exists. Please enter a valid dataset name")
@@ -206,15 +162,6 @@ def get_dataset_columns(self, dataset_name):
206162
raise
207163

208164
def create_from_csv(self, csv_path, dataset_name, schema_mapping):
209-
"""
210-
Creates a new dataset from a CSV file.
211-
212-
Args:
213-
csv_path (str): Path to the CSV file to upload
214-
dataset_name (str): Name for the new dataset
215-
schema_mapping (dict): Mapping of column names to their schema types
216-
"""
217-
218165
list_dataset = self.list_datasets()
219166
if dataset_name in list_dataset:
220167
raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
@@ -278,6 +225,7 @@ def put_csv_to_presignedUrl(url):
278225
logger.error(f"Error in put_csv_to_presignedUrl: {e}")
279226
raise
280227

228+
## Upload csv to elastic
281229
def upload_csv_to_elastic(data):
282230
header = {
283231
'Content-Type': 'application/json',

ragaai_catalyst/evaluation.py

Lines changed: 7 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,6 @@
88
logger = logging.getLogger(__name__)
99

1010
class Evaluation:
11-
"""
12-
A class to handle LLM evaluation tasks using the RagaAI Catalyst API.
13-
14-
This class provides functionality to:
15-
- Initialize evaluation projects and datasets
16-
- List available metrics
17-
- Add new metrics for evaluation
18-
- Track evaluation job status
19-
- Retrieve evaluation results
20-
21-
Example:
22-
>>> evaluation = Evaluation("my_project", "my_dataset")
23-
>>> metrics = evaluation.list_metrics()
24-
>>> evaluation.add_metrics([{
25-
... "name": "metric_name",
26-
... "config": {"provider": "openai"},
27-
... "column_name": "result_column",
28-
... "schema_mapping": {"input": "user_input"}
29-
... }])
30-
>>> evaluation.get_status()
31-
>>> results = evaluation.get_results()
32-
33-
Notes:
34-
- Requires RAGAAI_CATALYST_TOKEN environment variable to be set
35-
- API calls are made with a default timeout of 10 seconds
36-
"""
3711

3812
def __init__(self, project_name, dataset_name):
3913
self.project_name = project_name
@@ -98,13 +72,6 @@ def __init__(self, project_name, dataset_name):
9872

9973

10074
def list_metrics(self):
101-
"""
102-
Retrieve list of available metrics for evaluation.
103-
104-
Returns:
105-
list: List of metric names available for evaluation
106-
"""
107-
10875
headers = {
10976
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
11077
'X-Project-Id': str(self.project_id),
@@ -157,6 +124,7 @@ def _get_dataset_id_based_on_dataset_type(self, metric_to_evaluate):
157124

158125

159126
def _get_dataset_schema(self, metric_to_evaluate=None):
127+
#this dataset_id is based on which type of metric_to_evaluate
160128
data_set_id=self._get_dataset_id_based_on_dataset_type(metric_to_evaluate)
161129
self.dataset_id=data_set_id
162130

@@ -215,6 +183,7 @@ def _get_mapping(self, metric_name, metrics_schema, schema_mapping):
215183
if schema["name"]==metric_name:
216184
requiredFields = schema["config"]["requiredFields"]
217185

186+
#this is added to check if "Chat" column is required for metric evaluation
218187
required_variables = [_["name"].lower() for _ in requiredFields]
219188
if "chat" in required_variables:
220189
metric_to_evaluate = "chat"
@@ -278,7 +247,9 @@ def _update_base_json(self, metrics):
278247
base_json = self._get_metricParams()
279248
base_json["metricSpec"]["name"] = metric["name"]
280249

250+
#pasing model configuration
281251
for key, value in metric["config"].items():
252+
#checking if provider is one of the allowed providers
282253
if key.lower()=="provider" and value.lower() not in sub_providers:
283254
raise ValueError("Enter a valid provider name. The following Provider names are supported: openai, azure, gemini, groq, anthropic, bedrock")
284255

@@ -290,6 +261,8 @@ def _update_base_json(self, metrics):
290261
base_json["metricSpec"]["config"]["params"][key] = {f"{key_thres}":value_thres}
291262
else:
292263
base_json["metricSpec"]["config"]["params"][key] = {"value": value}
264+
265+
293266
# if metric["config"]["model"]:
294267
# base_json["metricSpec"]["config"]["params"]["model"]["value"] = metric["config"]["model"]
295268
base_json["metricSpec"]["displayName"] = metric["column_name"]
@@ -330,31 +303,7 @@ def _get_executed_metrics_list(self):
330303
return []
331304

332305
def add_metrics(self, metrics):
333-
"""
334-
Add metrics for evaluation.
335-
336-
Args:
337-
metrics (list): List of metric configurations. Each metric should be a dict with:
338-
- name (str): Name of the metric
339-
- config (dict): Metric configuration including provider and parameters
340-
- column_name (str): Name for the results column
341-
- schema_mapping (dict): Mapping between dataset columns and metric fields
342-
343-
Example:
344-
>>> metrics = [{
345-
... "name": "answer_relevancy",
346-
... "config": {
347-
... "provider": "openai",
348-
... "threshold": {"gte": 0.7}
349-
... },
350-
... "column_name": "relevancy_score",
351-
... "schema_mapping": {
352-
... "question": "user_question",
353-
... "answer": "model_response"
354-
... }
355-
... }]
356-
>>> evaluation.add_metrics(metrics)
357-
"""
306+
#Handle required key if missing
358307
required_keys = {"name", "config", "column_name", "schema_mapping"}
359308
for metric in metrics:
360309
missing_keys = required_keys - metric.keys()
@@ -404,13 +353,6 @@ def add_metrics(self, metrics):
404353
logger.error(f"An unexpected error occurred: {e}")
405354

406355
def get_status(self):
407-
"""
408-
Check the status of the current evaluation job.
409-
410-
Prints the current status and provides a URL to track progress.
411-
Status can be: "Failed", "In Progress", or "Completed"
412-
"""
413-
414356
headers = {
415357
'Content-Type': 'application/json',
416358
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
@@ -442,9 +384,6 @@ def get_status(self):
442384
logger.error(f"An unexpected error occurred: {e}")
443385

444386
def get_results(self):
445-
"""
446-
Retrieve the results of the evaluation.
447-
"""
448387

449388
def get_presignedUrl():
450389
headers = {

ragaai_catalyst/experiment.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ def __init__(
5555
timeout=10,
5656
)
5757
response.raise_for_status()
58+
# logger.debug("Projects list retrieved successfully")
5859
experiment_list = [exp["name"] for project in response.json()["data"]["content"] if project["name"] == self.project_name for exp in project["experiments"]]
60+
# print(experiment_list)
5961
if self.experiment_name in experiment_list:
6062
raise ValueError("The experiment name already exists in the project. Enter a unique experiment name.")
6163

@@ -103,6 +105,7 @@ def _check_if_dataset_exists(self,project_name,dataset_name):
103105

104106

105107
def _check_if_project_exists(self,project_name,num_projects=100):
108+
# TODO: 1. List All projects
106109
params = {
107110
"size": str(num_projects),
108111
"page": "0",
@@ -124,6 +127,8 @@ def _check_if_project_exists(self,project_name,num_projects=100):
124127
project["name"] for project in response.json()["data"]["content"]
125128
]
126129

130+
# TODO: 2. Check if the given project_name exists
131+
# TODO: 3. Return bool (True / False output)
127132
exists = project_name in project_list
128133
if exists:
129134
logger.info(f"Project '{project_name}' exists.")
@@ -162,8 +167,8 @@ def make_request():
162167
response = make_request()
163168
response_checker(response, "Experiment.list_experiments")
164169
if response.status_code == 401:
165-
get_token()
166-
response = make_request()
170+
get_token() # Fetch a new token and set it in the environment
171+
response = make_request() # Retry the request
167172
if response.status_code != 200:
168173
return {
169174
"status_code": response.status_code,
@@ -466,6 +471,7 @@ def parse_response(self, response):
466471
x.columns = x.columns.str.replace("_reason_status", "_status")
467472

468473
columns_list = x.columns.tolist()
474+
#remove trace_uri from columns_list if it exists
469475
columns_list = list(set(columns_list) - {"trace_uri"})
470476
x = x[columns_list]
471477

ragaai_catalyst/internal_api_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def api_completion(messages, model_config, kwargs):
4242
df = pd.DataFrame(json_data)
4343
return(df)
4444
except json.JSONDecodeError:
45-
attempts += 1
45+
attempts += 1 # Increment attempts if JSON parsing fails
4646
if attempts == 3:
4747
raise Exception("Failed to generate a valid response after multiple attempts.")
4848

ragaai_catalyst/prompt_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(self, project_name):
2323
self.project_name = project_name
2424
self.base_url = f"{RagaAICatalyst.BASE_URL}/playground/prompt"
2525
self.timeout = 10
26-
self.size = 99999
26+
self.size = 99999 #Number of projects to fetch
2727

2828
try:
2929
response = requests.get(
@@ -423,7 +423,7 @@ def _convert_value(self, value, type_):
423423
return float(value)
424424
elif type_ == "int":
425425
return int(value)
426-
return value
426+
return value # Default case, return as is
427427

428428
def get_model_parameters(self):
429429
"""

ragaai_catalyst/ragaai_catalyst.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
class RagaAICatalyst:
1010
BASE_URL = None
11-
TIMEOUT = 10
11+
TIMEOUT = 10 # Default timeout in seconds
1212

1313
def __init__(
1414
self,
@@ -64,8 +64,10 @@ def __init__(
6464
"The provided base_url is not accessible. Please re-check the base_url."
6565
)
6666
else:
67+
# Get the token from the server
6768
self.get_token()
6869

70+
# Set the API keys, if available
6971
if self.api_keys:
7072
self._upload_keys()
7173

@@ -158,6 +160,7 @@ def get_token() -> Union[str, None]:
158160
timeout=RagaAICatalyst.TIMEOUT,
159161
)
160162

163+
# Handle specific status codes before raising an error
161164
if response.status_code == 400:
162165
token_response = response.json()
163166
if token_response.get("message") == "Please enter valid credentials":
@@ -195,7 +198,7 @@ def project_use_cases(self):
195198
headers=headers,
196199
timeout=self.TIMEOUT
197200
)
198-
response.raise_for_status()
201+
response.raise_for_status() # Use raise_for_status to handle HTTP errors
199202
usecase = response.json()["data"]["usecase"]
200203
return usecase
201204
except requests.exceptions.RequestException as e:
@@ -214,7 +217,7 @@ def create_project(self, project_name, usecase="Q/A", type="llm"):
214217
Returns:
215218
str: A message indicating the success or failure of the project creation.
216219
"""
217-
220+
# Check if the project already exists
218221
existing_projects = self.list_projects()
219222
if project_name in existing_projects:
220223
raise ValueError(f"Project name '{project_name}' already exists. Please choose a different name.")
@@ -375,6 +378,7 @@ def list_metrics():
375378
logger.debug("Metrics list retrieved successfully")
376379

377380
metrics = response.json()["data"]["metrics"]
381+
# For each dict in metric only return the keys: `name`, `category`
378382
sub_metrics = [metric["name"] for metric in metrics]
379383
return sub_metrics
380384

@@ -399,6 +403,7 @@ def list_metrics():
399403
project["name"]
400404
for project in response.json()["data"]["metrics"]
401405
]
406+
# For each dict in metric only return the keys: `name`, `category`
402407
sub_metrics = [
403408
{
404409
"name": metric["name"],

0 commit comments

Comments
 (0)