raga-ai-hub
diff --git a/‎pyproject.toml
Lines changed: 4 additions & 4 deletions b/‎pyproject.toml
Lines changed: 4 additions & 4 deletions
diff --git a/‎ragaai_catalyst/dataset.py
Lines changed: 4 additions & 56 deletions b/‎ragaai_catalyst/dataset.py
Lines changed: 4 additions & 56 deletions
diff --git a/‎ragaai_catalyst/evaluation.py
Lines changed: 7 additions & 68 deletions b/‎ragaai_catalyst/evaluation.py
Lines changed: 7 additions & 68 deletions
diff --git a/‎ragaai_catalyst/experiment.py
Lines changed: 8 additions & 2 deletions b/‎ragaai_catalyst/experiment.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎ragaai_catalyst/internal_api_completion.py
Lines changed: 1 addition & 1 deletion b/‎ragaai_catalyst/internal_api_completion.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ragaai_catalyst/prompt_manager.py
Lines changed: 2 additions & 2 deletions b/‎ragaai_catalyst/prompt_manager.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎ragaai_catalyst/ragaai_catalyst.py
Lines changed: 8 additions & 3 deletions b/‎ragaai_catalyst/ragaai_catalyst.py
Lines changed: 8 additions & 3 deletions
@@ -11,12 +11,12 @@ requires-python = ">=3.9,<3.13"
 
 version = "2.1.4"
 authors = [
-    {name = "Kiran Scaria", email = "kiranscaria@outlook.com"},
+    {name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
+    {name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
+    {name = "Dushyant Mahajan", email = "dushyant.mahajan@raga.ai"},
     {name = "Siddhartha Kosti", email = "siddhartha.kosti@raga.ai"},
     {name = "Ritika Goel", email = "ritika.goel@raga.ai"},
-    {name = "Vijay Chaurasia", email="vijay.chaurasia@raga.ai"},
-    {name = "Tanaya Pakhale", email="tanaya.pakhale@raga.ai"},
-    {name = "Tushar Kumar", email="tushar.kumar@raga.ai"},
+    {name = "Vijay Chaurasia", email="vijay.chaurasia@raga.ai"}
 ]
 
 dependencies = [
 
@@ -10,33 +10,6 @@
 
 
 class Dataset:
-    """
-    A class to manage datasets within a RagaAI Catalyst project.
-
-    This class provides functionality to interact with datasets in a RagaAI project,
-    including listing datasets, retrieving schema mappings, managing dataset columns,
-    and creating new datasets from CSV files.
-
-    Attributes:
-        BASE_URL (str): Base URL for the RagaAI Catalyst API endpoint
-        TIMEOUT (int): Request timeout in seconds
-        project_name (str): Name of the project to work with
-        project_id (str): ID of the project retrieved from the API
-        num_projects (int): Maximum number of projects to retrieve
-
-    Args:
-        project_name (str): The name of the project to initialize the Dataset instance with
-
-    Raises:
-        ValueError: If the provided project name is not found
-        requests.exceptions.RequestException: If there are any issues with API communication
-
-    Example:
-        >>> dataset = Dataset("my_project")
-        >>> available_datasets = dataset.list_datasets()
-        >>> schema = dataset.get_schema_mapping()
-    """
-
     BASE_URL = None
     TIMEOUT = 30
 
@@ -106,8 +79,8 @@ def make_request():
             response = make_request()
             response_checker(response, "Dataset.list_datasets")
             if response.status_code == 401:
-                get_token()  
-                response = make_request()  
+                get_token()  # Fetch a new token and set it in the environment
+                response = make_request()  # Retry the request
             if response.status_code != 200:
                 return {
                     "status_code": response.status_code,
@@ -121,13 +94,6 @@ def make_request():
             raise
 
     def get_schema_mapping(self):
-        """
-        Retrieves the schema mapping elements for the project.
-
-        Returns:
-            dict: A dictionary containing the schema elements for the project.
-        """
-
         headers = {
             "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
             "X-Project-Name": self.project_name,
@@ -147,19 +113,9 @@ def get_schema_mapping(self):
             logger.error(f"Failed to get CSV schema: {e}")
             raise
 
-    
+    ###################### CSV Upload APIs ###################
 
     def get_dataset_columns(self, dataset_name):
-        """
-        Retrieves the column names for a specific dataset.
-
-        Args:
-            dataset_name (str): Name of the dataset to retrieve columns for
-
-        Returns:
-            list: A list of column names in the dataset.
-        """
-
         list_dataset = self.list_datasets()
         if dataset_name not in list_dataset:
             raise ValueError(f"Dataset {dataset_name} does not exists. Please enter a valid dataset name")
@@ -206,15 +162,6 @@ def get_dataset_columns(self, dataset_name):
             raise
 
     def create_from_csv(self, csv_path, dataset_name, schema_mapping):
-        """
-        Creates a new dataset from a CSV file.
-
-        Args:
-            csv_path (str): Path to the CSV file to upload
-            dataset_name (str): Name for the new dataset
-            schema_mapping (dict): Mapping of column names to their schema types
-        """
-
         list_dataset = self.list_datasets()
         if dataset_name in list_dataset:
             raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
@@ -278,6 +225,7 @@ def put_csv_to_presignedUrl(url):
             logger.error(f"Error in put_csv_to_presignedUrl: {e}")
             raise
 
+        ## Upload csv to elastic
         def upload_csv_to_elastic(data):
             header = {
                 'Content-Type': 'application/json',
 
@@ -8,32 +8,6 @@
 logger = logging.getLogger(__name__)
 
 class Evaluation:
-    """
-    A class to handle LLM evaluation tasks using the RagaAI Catalyst API.
-
-    This class provides functionality to:
-    - Initialize evaluation projects and datasets
-    - List available metrics
-    - Add new metrics for evaluation
-    - Track evaluation job status
-    - Retrieve evaluation results
-
-    Example:
-        >>> evaluation = Evaluation("my_project", "my_dataset")
-        >>> metrics = evaluation.list_metrics()
-        >>> evaluation.add_metrics([{
-        ...     "name": "metric_name",
-        ...     "config": {"provider": "openai"},
-        ...     "column_name": "result_column",
-        ...     "schema_mapping": {"input": "user_input"}
-        ... }])
-        >>> evaluation.get_status()
-        >>> results = evaluation.get_results()
-
-    Notes:
-        - Requires RAGAAI_CATALYST_TOKEN environment variable to be set
-        - API calls are made with a default timeout of 10 seconds
-    """
 
     def __init__(self, project_name, dataset_name):
         self.project_name = project_name
@@ -98,13 +72,6 @@ def __init__(self, project_name, dataset_name):
 
 
     def list_metrics(self):
-        """
-        Retrieve list of available metrics for evaluation.
-
-        Returns:
-            list: List of metric names available for evaluation
-        """
-
         headers = {
             "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
             'X-Project-Id': str(self.project_id),
@@ -157,6 +124,7 @@ def _get_dataset_id_based_on_dataset_type(self, metric_to_evaluate):
 
 
     def _get_dataset_schema(self, metric_to_evaluate=None):
+        #this dataset_id is based on which type of metric_to_evaluate  
         data_set_id=self._get_dataset_id_based_on_dataset_type(metric_to_evaluate)
         self.dataset_id=data_set_id
 
@@ -215,6 +183,7 @@ def _get_mapping(self, metric_name, metrics_schema, schema_mapping):
             if schema["name"]==metric_name:
                 requiredFields = schema["config"]["requiredFields"]
 
+                #this is added to check if "Chat" column is required for metric evaluation
                 required_variables = [_["name"].lower() for _ in requiredFields]
                 if "chat" in required_variables:
                     metric_to_evaluate = "chat"
@@ -278,7 +247,9 @@ def _update_base_json(self, metrics):
             base_json = self._get_metricParams()
             base_json["metricSpec"]["name"] = metric["name"]
 
+            #pasing model configuration
             for key, value in metric["config"].items():
+                #checking if provider is one of the allowed providers
                 if key.lower()=="provider" and value.lower() not in sub_providers:
                     raise ValueError("Enter a valid provider name. The following Provider names are supported: openai, azure, gemini, groq, anthropic, bedrock")
 
@@ -290,6 +261,8 @@ def _update_base_json(self, metrics):
                             base_json["metricSpec"]["config"]["params"][key] = {f"{key_thres}":value_thres}
                 else:
                     base_json["metricSpec"]["config"]["params"][key] = {"value": value}
+
+
             # if metric["config"]["model"]:
             #     base_json["metricSpec"]["config"]["params"]["model"]["value"] = metric["config"]["model"]
             base_json["metricSpec"]["displayName"] = metric["column_name"]
@@ -330,31 +303,7 @@ def _get_executed_metrics_list(self):
             return []
 
     def add_metrics(self, metrics):
-        """
-        Add metrics for evaluation.
-
-        Args:
-            metrics (list): List of metric configurations. Each metric should be a dict with:
-                - name (str): Name of the metric
-                - config (dict): Metric configuration including provider and parameters
-                - column_name (str): Name for the results column
-                - schema_mapping (dict): Mapping between dataset columns and metric fields
-
-        Example:
-            >>> metrics = [{
-            ...     "name": "answer_relevancy",
-            ...     "config": {
-            ...         "provider": "openai",
-            ...         "threshold": {"gte": 0.7}
-            ...     },
-            ...     "column_name": "relevancy_score",
-            ...     "schema_mapping": {
-            ...         "question": "user_question",
-            ...         "answer": "model_response"
-            ...     }
-            ... }]
-            >>> evaluation.add_metrics(metrics)
-        """
+        #Handle required key if missing
         required_keys = {"name", "config", "column_name", "schema_mapping"}
         for metric in metrics:
             missing_keys = required_keys - metric.keys()
@@ -404,13 +353,6 @@ def add_metrics(self, metrics):
             logger.error(f"An unexpected error occurred: {e}")
 
     def get_status(self):
-        """
-        Check the status of the current evaluation job.
-
-        Prints the current status and provides a URL to track progress.
-        Status can be: "Failed", "In Progress", or "Completed"
-        """
-
         headers = {
             'Content-Type': 'application/json',
             "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
@@ -442,9 +384,6 @@ def get_status(self):
             logger.error(f"An unexpected error occurred: {e}")
 
     def get_results(self):
-        """
-        Retrieve the results of the evaluation.
-        """
 
         def get_presignedUrl():
             headers = {
 
@@ -55,7 +55,9 @@ def __init__(
             timeout=10,
         )
         response.raise_for_status()
+        # logger.debug("Projects list retrieved successfully")
         experiment_list = [exp["name"] for project in response.json()["data"]["content"] if project["name"] == self.project_name for exp in project["experiments"]]
+        # print(experiment_list)
         if self.experiment_name in experiment_list:
             raise ValueError("The experiment name already exists in the project. Enter a unique experiment name.")
 
@@ -103,6 +105,7 @@ def _check_if_dataset_exists(self,project_name,dataset_name):
 
 
     def _check_if_project_exists(self,project_name,num_projects=100):
+        # TODO: 1. List All projects
         params = {
             "size": str(num_projects),
             "page": "0",
@@ -124,6 +127,8 @@ def _check_if_project_exists(self,project_name,num_projects=100):
             project["name"] for project in response.json()["data"]["content"]
         ]
 
+        # TODO: 2. Check if the given project_name exists
+        # TODO: 3. Return bool (True / False output)
         exists = project_name in project_list
         if exists:
             logger.info(f"Project '{project_name}' exists.")
@@ -162,8 +167,8 @@ def make_request():
         response = make_request()
         response_checker(response, "Experiment.list_experiments")
         if response.status_code == 401:
-            get_token()  
-            response = make_request()  
+            get_token()  # Fetch a new token and set it in the environment
+            response = make_request()  # Retry the request
         if response.status_code != 200:
             return {
                 "status_code": response.status_code,
@@ -466,6 +471,7 @@ def parse_response(self, response):
             x.columns = x.columns.str.replace("_reason_status", "_status")
 
             columns_list = x.columns.tolist()
+            #remove trace_uri from columns_list if it exists
             columns_list = list(set(columns_list) - {"trace_uri"})
             x = x[columns_list]
 
 
@@ -42,7 +42,7 @@ def api_completion(messages, model_config, kwargs):
                         df = pd.DataFrame(json_data)
                         return(df)
                     except json.JSONDecodeError:
-                        attempts += 1  
+                        attempts += 1  # Increment attempts if JSON parsing fails
                         if attempts == 3:
                             raise Exception("Failed to generate a valid response after multiple attempts.")
 
 
@@ -23,7 +23,7 @@ def __init__(self, project_name):
         self.project_name = project_name
         self.base_url = f"{RagaAICatalyst.BASE_URL}/playground/prompt"
         self.timeout = 10
-        self.size = 99999
+        self.size = 99999 #Number of projects to fetch
 
         try:
             response = requests.get(
@@ -423,7 +423,7 @@ def _convert_value(self, value, type_):
             return float(value)
         elif type_ == "int":
             return int(value)
-        return value 
+        return value  # Default case, return as is
 
     def get_model_parameters(self):
         """
 
@@ -8,7 +8,7 @@
 
 class RagaAICatalyst:
     BASE_URL = None
-    TIMEOUT = 10  
+    TIMEOUT = 10  # Default timeout in seconds
 
     def __init__(
         self,
@@ -64,8 +64,10 @@ def __init__(
                     "The provided base_url is not accessible. Please re-check the base_url."
                 )
         else:
+            # Get the token from the server
             self.get_token()
 
+        # Set the API keys, if  available
         if self.api_keys:
             self._upload_keys()
 
@@ -158,6 +160,7 @@ def get_token() -> Union[str, None]:
             timeout=RagaAICatalyst.TIMEOUT,
         )
 
+        # Handle specific status codes before raising an error
         if response.status_code == 400:
             token_response = response.json()
             if token_response.get("message") == "Please enter valid credentials":
@@ -195,7 +198,7 @@ def project_use_cases(self):
                 headers=headers,
                 timeout=self.TIMEOUT
             )
-            response.raise_for_status() 
+            response.raise_for_status()  # Use raise_for_status to handle HTTP errors
             usecase = response.json()["data"]["usecase"]
             return usecase
         except requests.exceptions.RequestException as e:
@@ -214,7 +217,7 @@ def create_project(self, project_name, usecase="Q/A", type="llm"):
         Returns:
             str: A message indicating the success or failure of the project creation.
         """
-        
+        # Check if the project already exists
         existing_projects = self.list_projects()
         if project_name in existing_projects:
             raise ValueError(f"Project name '{project_name}' already exists. Please choose a different name.")
@@ -375,6 +378,7 @@ def list_metrics():
             logger.debug("Metrics list retrieved successfully")
 
             metrics = response.json()["data"]["metrics"]
+            # For each dict in metric only return the keys: `name`, `category`
             sub_metrics = [metric["name"] for metric in metrics]
             return sub_metrics
 
@@ -399,6 +403,7 @@ def list_metrics():
                         project["name"]
                         for project in response.json()["data"]["metrics"]
                     ]
+                    # For each dict in metric only return the keys: `name`, `category`
                     sub_metrics = [
                         {
                             "name": metric["name"],