Merge pull request #201 from raga-ai-hub/v2.1.6

kiranscaria · web-flow · commit 8a169ca634af · 2025-03-28T10:52:35.000+05:30
v2.1.6.2
diff --git a/examples/crewai/scifi_writer/scifi_writer.py b/examples/crewai/scifi_writer/scifi_writer.py
@@ -1,15 +1,11 @@
-import sys
-sys.path.append('.')
-
-from ragaai_catalyst import RagaAICatalyst, init_tracing
-from ragaai_catalyst.tracers import Tracer
-
 import os
 from dotenv import load_dotenv
 from crewai import Agent, Task, Crew, Process
 from crewai.tools import tool
 from typing import Any
 
+from ragaai_catalyst import RagaAICatalyst, init_tracing
+from ragaai_catalyst.tracers import Tracer
 
 load_dotenv()
 
@@ -98,4 +94,4 @@ def write_to_file(filename: str, content: str) -> str:
         print("\nGenerated Story Content:")
         print(file.read())
 except FileNotFoundError:
-    print("Story file not found. Check the writer agent's execution.")
+    print("Story file not found. Check the writer agent's execution.")
diff --git a/examples/openai_agents_sdk/README.md b/examples/openai_agents_sdk/README.md
@@ -0,0 +1,71 @@
+# Email Data Extraction with OpenAI Agents SDK
+
+This example demonstrates how to use the OpenAI Agents SDK with RagaAI Catalyst to extract structured information from emails.
+
+## Overview
+
+The application uses OpenAI's Agents SDK to parse unstructured email text and extract key information such as:
+- Email subject and sender details
+- Main discussion points
+- Meeting information (date, time, location)
+- Action items and tasks with assignees
+- Next steps
+
+The extracted data is structured using Pydantic models for easy manipulation and validation.
+
+## Requirements
+
+- Python 3.8+
+- OpenAI API key
+- RagaAI Catalyst credentials
+
+## Installation
+
+1. Clone the repository
+2. Install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+3. Copy [sample.env](cci:7://file:///Users/ragaai_user/work/ragaai-catalyst/examples/openai_agents_sdk/sample.env:0:0-0:0) to [.env](cci:7://file:///Users/ragaai_user/work/ragaai-catalyst/examples/openai_agents_sdk/sample.env:0:0-0:0) and fill in your API keys:
+```bash
+cp sample.env .env
+```
+
+## Environment Variables
+
+Configure the following environment variables in your [.env](cci:7://file:///Users/ragaai_user/work/ragaai-catalyst/examples/openai_agents_sdk/sample.env:0:0-0:0) file:
+
+- `OPENAI_API_KEY`: Your OpenAI API key
+- `CATALYST_ACCESS_KEY`: Your RagaAI Catalyst access key
+- `CATALYST_SECRET_KEY`: Your RagaAI Catalyst secret key
+- `CATALYST_BASE_URL`: RagaAI Catalyst base URL
+- `PROJECT_NAME`: Name for your project in RagaAI Catalyst (default: 'email-extraction')
+- `DATASET_NAME`: Name for your dataset in RagaAI Catalyst (default: 'email-data')
+
+## Usage
+
+Run the example script:
+
+```bash
+python data_extraction_email.py
+```
+The script will:
+
+1. Initialize the RagaAI Catalyst client for tracing
+2. Set up an OpenAI Agent with appropriate instructions
+3. Process a sample email to extract structured data
+4. Display the extracted information
+
+## Customization
+
+You can modify the `sample_email` variable in the script to process different emails, or adapt the code to read emails from files or an API.
+
+The Pydantic models (`Person`, `Meeting`, `Task`, `EmailData`) can be extended to capture additional information as needed.
+
+## Integration with RagaAI Catalyst
+
+This example integrates with RagaAI Catalyst for tracing and monitoring agent interactions. The integration helps with:
+
+- Tracking agent performance
+- Debugging complex agent workflows
+- Collecting data for future improvements
diff --git a/examples/openai_agents_sdk/data_extraction_email.py b/examples/openai_agents_sdk/data_extraction_email.py
@@ -0,0 +1,189 @@
+import os
+import time
+from typing import List, Optional, Callable, Any
+from pydantic import BaseModel
+from dotenv import load_dotenv
+
+from agents import Agent, Runner, ModelSettings, set_tracing_export_api_key
+
+from ragaai_catalyst import RagaAICatalyst, init_tracing
+from ragaai_catalyst.tracers import Tracer
+
+load_dotenv()
+set_tracing_export_api_key(os.getenv('OPENAI_API_KEY'))
+
+def initialize_catalyst():
+    """Initialize RagaAI Catalyst using environment credentials."""
+    catalyst = RagaAICatalyst(
+        access_key=os.getenv('CATALYST_ACCESS_KEY'), 
+        secret_key=os.getenv('CATALYST_SECRET_KEY'), 
+        base_url=os.getenv('CATALYST_BASE_URL')
+    )
+    
+    tracer = Tracer(
+        project_name=os.environ.get('PROJECT_NAME', 'email-extraction'),
+        dataset_name=os.environ.get('DATASET_NAME', 'email-data'),
+        tracer_type="agentic/openai_agents",
+    )
+    
+    init_tracing(catalyst=catalyst, tracer=tracer)
+
+class Person(BaseModel):
+    """Person data model for email sender and recipients."""
+    name: str
+    role: Optional[str] = None
+    contact: Optional[str] = None
+
+class Meeting(BaseModel):
+    """Meeting data model for scheduled meetings in emails."""
+    date: str
+    time: str
+    location: Optional[str] = None
+    duration: Optional[str] = None
+
+class Task(BaseModel):
+    """Task data model for action items in emails."""
+    description: str
+    assignee: Optional[str] = None
+    deadline: Optional[str] = None
+    priority: Optional[str] = None
+
+class EmailData(BaseModel):
+    """Complete email data model with structured information."""
+    subject: str
+    sender: Person
+    recipients: List[Person]
+    main_points: List[str]
+    meetings: List[Meeting]
+    tasks: List[Task]
+    next_steps: Optional[str] = None
+
+def initialize_agent(agent_name: str, agent_instructions: str|Callable, handoff_description: Optional[str]=None, handoffs: List[Agent]=list(), model_name: str='gpt-4o', temperature: float=0.3, max_tokens: int=1000, output_type: Optional[type[Any]]=None):
+    """Initialize the OpenAI agent for email extraction."""
+    # Initialize the agent with appropriate configuration
+    # This could include model selection, temperature settings, etc.
+    model_settings = ModelSettings(
+        temperature=temperature,
+        max_tokens=max_tokens
+    )
+    agent = Agent(
+        name=agent_name,
+        instructions=agent_instructions,
+        handoff_description=handoff_description,
+        handoffs=handoffs,
+        model=model_name,
+        model_settings=model_settings, 
+        output_type=output_type
+    )
+    return agent
+
+email_extractor = initialize_agent(
+    agent_name="Email Extractor",
+    agent_instructions="You are an expert at extracting structured information from emails.",
+    model_name="gpt-4o",
+    temperature=0.2,
+    output_type=EmailData
+)
+
+async def extract_email_data(email_text: str) -> EmailData:
+    """
+    Extract structured data from an email using an OpenAI agent.
+    
+    Args:
+        email_text: The raw email text to process
+        
+    Returns:
+        EmailData object containing structured information from the email
+    """
+    runner = Runner()
+    extraction_prompt = f"Please extract information from this email:\n\n{email_text}"
+    result = await runner.run(
+        email_extractor,
+        extraction_prompt
+    )
+    return result.final_output
+
+sample_email = """
+From: Alex Johnson <alex.j@techcorp.com>
+To: Team Development <team-dev@techcorp.com>
+CC: Sarah Wong <sarah.w@techcorp.com>, Miguel Fernandez <miguel.f@techcorp.com>
+Subject: Project Phoenix Update and Next Steps
+
+Hi team,
+
+I wanted to follow up on yesterday's discussion about Project Phoenix and outline our next steps.
+
+Key points from our discussion:
+- The beta testing phase has shown promising results with 85% positive feedback
+- We're still facing some performance issues on mobile devices
+- The client has requested additional features for the dashboard
+
+Let's schedule a follow-up meeting this Friday, June 15th at 2:00 PM in Conference Room B. The meeting should last about 1.5 hours, and we'll need to prepare the updated project timeline.
+
+Action items:
+1. Sarah to address the mobile performance issues by June 20th (High priority)
+2. Miguel to create mock-ups for the new dashboard features by next Monday
+3. Everyone to review the beta testing feedback document and add comments by EOD tomorrow
+
+If you have any questions before Friday's meeting, feel free to reach out.
+
+Best regards,
+Alex Johnson
+Senior Project Manager
+(555) 123-4567
+"""
+
+def display_email_data(email_data: EmailData):
+    """
+    Display the extracted email data in a formatted way.
+    
+    Args:
+        email_data: The structured EmailData object to display
+    """
+    print(f"Subject: {email_data.subject}")
+    print(f"From: {email_data.sender.name} ({email_data.sender.role})")
+    
+    print("\nMain points:")
+    for point in email_data.main_points:
+        print(f"- {point}")
+    
+    print("\nMeetings:")
+    for meeting in email_data.meetings:
+        print(f"- {meeting.date} at {meeting.time}, Location: {meeting.location}")
+    
+    print("\nTasks:")
+    for task in email_data.tasks:
+        print(f"- {task.description}")
+        print(
+            f"  Assignee: {task.assignee}, Deadline: {task.deadline}, Priority: {task.priority}"
+        )
+    
+    if email_data.next_steps:
+        print(f"\nNext Steps: {email_data.next_steps}")
+
+async def process_email(email_text: str):
+    """
+    Process an email to extract structured data and display the results.
+    
+    Args:
+        email_text: The raw email text to process
+        
+    Returns:
+        The structured EmailData object
+    """
+    if os.getenv('CATALYST_ACCESS_KEY'):
+        initialize_catalyst()
+    
+    start_time = time.time()
+    email_data = await extract_email_data(email_text)
+    duration = time.time() - start_time
+    
+    print(f"Email processing completed in {duration:.2f} seconds")
+    display_email_data(email_data)
+    
+    return email_data
+
+if __name__ == "__main__":
+    import asyncio
+    
+    asyncio.run(process_email(sample_email))
diff --git a/examples/openai_agents_sdk/requirements.txt b/examples/openai_agents_sdk/requirements.txt
@@ -0,0 +1,3 @@
+openai-agents
+python-dotenv
+ragaai_catalyst
diff --git a/examples/openai_agents_sdk/sample.env b/examples/openai_agents_sdk/sample.env
@@ -0,0 +1,6 @@
+OPENAI_API_KEY=your_openai_api_key
+CATALYST_ACCESS_KEY=your_catalyst_access_key
+CATALYST_SECRET_KEY=your_catalyst_secret_key
+CATALYST_BASE_URL=your_catalyst_base_url
+PROJECT_NAME=your_project_name
+DATSET_NAME=your_dataset_name
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,9 +6,9 @@ build-backend = "setuptools.build_meta"
 name = "ragaai_catalyst"
 description = "RAGA AI CATALYST"
 readme = "README.md"
-requires-python = ">=3.9,<3.13"
+requires-python = ">=3.10,<3.13"
 # license = {file = "LICENSE"}
-version = "2.1.6"
+version = "2.1.6.2"
 authors = [
     {name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
     {name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
@@ -28,8 +28,8 @@ dependencies = [
     "groq>=0.11.0",
     "pypdf>=5.3.1",
     "google-genai>=1.3.0",
-    "Markdown>=3.7",  
-    "litellm>=1.51.1",  
+    "Markdown>=3.7",
+    "litellm>=1.51.1",
     "tenacity==8.3.0",
     "tqdm>=4.66.5",
     "llama-index>=0.10.0",
@@ -53,11 +53,11 @@ dependencies = [
     "openinference-instrumentation-bedrock",
     "openinference-instrumentation-crewai",
     "openinference-instrumentation-haystack",
-    "openinference-instrumentation-autogen",
+    "openinference-instrumentation-openai-agents",
     "openinference-instrumentation-smolagents",
     "opentelemetry-sdk",
     "opentelemetry-exporter-otlp",
-    "opentelemetry-proto>=1.12.0"
+    "opentelemetry-proto>=1.12.0",
 ]
 
 [project.optional-dependencies]
diff --git a/ragaai_catalyst/guard_executor.py b/ragaai_catalyst/guard_executor.py
@@ -164,6 +164,7 @@ def set_variables(self,prompt,prompt_params):
         return doc
 
     def execute_input_guardrails(self, prompt, prompt_params):
+        self.current_trace_id =None
         doc = self.set_variables(prompt,prompt_params)
         deployment_response = self.execute_deployment(self.input_deployment_id,doc)
         self.current_trace_id = deployment_response['data']['results'][0]['executionId']
diff --git a/ragaai_catalyst/tracers/tracer.py b/ragaai_catalyst/tracers/tracer.py
@@ -278,6 +278,14 @@ def __init__(
                         logger.info("Instrumenting Smolagents...")
                     except (ImportError, ModuleNotFoundError):
                         logger.debug("Smolagents not available in environment")
+
+                    # OpenAI Agents
+                    try:
+                        from openinference.instrumentation.openai_agents import OpenAIAgentsInstrumentor
+                        instrumentors.append((OpenAIAgentsInstrumentor, []))
+                        logger.info("Instrumenting OpenAI Agents...")
+                    except (ImportError, ModuleNotFoundError):
+                        logger.debug("OpenAI Agents not available in environment")
                     
                     if not instrumentors:
                         logger.warning("No agentic packages found in environment to instrument")
@@ -293,7 +301,7 @@ def __init__(
             elif tracer_type == "agentic/llamaindex":
                 from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
                 instrumentors += [(LlamaIndexInstrumentor, [])] 
-            
+
             elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph":
                 from openinference.instrumentation.langchain import LangChainInstrumentor
                 instrumentors += [(LangChainInstrumentor, [])]
@@ -314,6 +322,10 @@ def __init__(
             elif tracer_type == "agentic/smolagents":
                 from openinference.instrumentation.smolagents import SmolagentsInstrumentor
                 instrumentors += [(SmolagentsInstrumentor, [])]
+
+            elif tracer_type == "agentic/openai_agents":
+                from openinference.instrumentation.openai_agents import OpenAIAgentsInstrumentor
+                instrumentors += [(OpenAIAgentsInstrumentor, [])] 
             
             else:
                 # Unknown agentic tracer type
@@ -513,6 +525,14 @@ def stop(self):
                 combined_metadata.update(user_detail['trace_user_detail']['metadata'])
             if additional_metadata:
                 combined_metadata.update(additional_metadata)
+                
+            model_cost_latency_metadata = {}
+            if additional_metadata:
+                model_cost_latency_metadata["model"] = additional_metadata["model_name"]
+                model_cost_latency_metadata["total_cost"] = additional_metadata["cost"]
+                model_cost_latency_metadata["total_latency"] = additional_metadata["latency"]
+                model_cost_latency_metadata["recorded_on"] = datetime.datetime.now().astimezone().isoformat()
+                combined_metadata.update(model_cost_latency_metadata)
 
             langchain_traces = langchain_tracer_extraction(data, self.user_context)
             final_result = convert_langchain_callbacks_output(langchain_traces)
diff --git a/ragaai_catalyst/tracers/upload_traces.py b/ragaai_catalyst/tracers/upload_traces.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+openai-agents`
	`2`	`+python-dotenv`
	`3`	`+ragaai_catalyst`