Skip to content

Commit 8a169ca

Browse files
authored
Merge pull request #201 from raga-ai-hub/v2.1.6
v2.1.6.2
2 parents adc75a8 + 88cc2b3 commit 8a169ca

File tree

9 files changed

+305
-16
lines changed

9 files changed

+305
-16
lines changed

examples/crewai/scifi_writer/scifi_writer.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
1-
import sys
2-
sys.path.append('.')
3-
4-
from ragaai_catalyst import RagaAICatalyst, init_tracing
5-
from ragaai_catalyst.tracers import Tracer
6-
71
import os
82
from dotenv import load_dotenv
93
from crewai import Agent, Task, Crew, Process
104
from crewai.tools import tool
115
from typing import Any
126

7+
from ragaai_catalyst import RagaAICatalyst, init_tracing
8+
from ragaai_catalyst.tracers import Tracer
139

1410
load_dotenv()
1511

@@ -98,4 +94,4 @@ def write_to_file(filename: str, content: str) -> str:
9894
print("\nGenerated Story Content:")
9995
print(file.read())
10096
except FileNotFoundError:
101-
print("Story file not found. Check the writer agent's execution.")
97+
print("Story file not found. Check the writer agent's execution.")

examples/openai_agents_sdk/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Email Data Extraction with OpenAI Agents SDK
2+
3+
This example demonstrates how to use the OpenAI Agents SDK with RagaAI Catalyst to extract structured information from emails.
4+
5+
## Overview
6+
7+
The application uses OpenAI's Agents SDK to parse unstructured email text and extract key information such as:
8+
- Email subject and sender details
9+
- Main discussion points
10+
- Meeting information (date, time, location)
11+
- Action items and tasks with assignees
12+
- Next steps
13+
14+
The extracted data is structured using Pydantic models for easy manipulation and validation.
15+
16+
## Requirements
17+
18+
- Python 3.8+
19+
- OpenAI API key
20+
- RagaAI Catalyst credentials
21+
22+
## Installation
23+
24+
1. Clone the repository
25+
2. Install the required dependencies:
26+
```bash
27+
pip install -r requirements.txt
28+
```
29+
3. Copy [sample.env](cci:7://file:///Users/ragaai_user/work/ragaai-catalyst/examples/openai_agents_sdk/sample.env:0:0-0:0) to [.env](cci:7://file:///Users/ragaai_user/work/ragaai-catalyst/examples/openai_agents_sdk/sample.env:0:0-0:0) and fill in your API keys:
30+
```bash
31+
cp sample.env .env
32+
```
33+
34+
## Environment Variables
35+
36+
Configure the following environment variables in your [.env](cci:7://file:///Users/ragaai_user/work/ragaai-catalyst/examples/openai_agents_sdk/sample.env:0:0-0:0) file:
37+
38+
- `OPENAI_API_KEY`: Your OpenAI API key
39+
- `CATALYST_ACCESS_KEY`: Your RagaAI Catalyst access key
40+
- `CATALYST_SECRET_KEY`: Your RagaAI Catalyst secret key
41+
- `CATALYST_BASE_URL`: RagaAI Catalyst base URL
42+
- `PROJECT_NAME`: Name for your project in RagaAI Catalyst (default: 'email-extraction')
43+
- `DATASET_NAME`: Name for your dataset in RagaAI Catalyst (default: 'email-data')
44+
45+
## Usage
46+
47+
Run the example script:
48+
49+
```bash
50+
python data_extraction_email.py
51+
```
52+
The script will:
53+
54+
1. Initialize the RagaAI Catalyst client for tracing
55+
2. Set up an OpenAI Agent with appropriate instructions
56+
3. Process a sample email to extract structured data
57+
4. Display the extracted information
58+
59+
## Customization
60+
61+
You can modify the `sample_email` variable in the script to process different emails, or adapt the code to read emails from files or an API.
62+
63+
The Pydantic models (`Person`, `Meeting`, `Task`, `EmailData`) can be extended to capture additional information as needed.
64+
65+
## Integration with RagaAI Catalyst
66+
67+
This example integrates with RagaAI Catalyst for tracing and monitoring agent interactions. The integration helps with:
68+
69+
- Tracking agent performance
70+
- Debugging complex agent workflows
71+
- Collecting data for future improvements
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
import os
2+
import time
3+
from typing import List, Optional, Callable, Any
4+
from pydantic import BaseModel
5+
from dotenv import load_dotenv
6+
7+
from agents import Agent, Runner, ModelSettings, set_tracing_export_api_key
8+
9+
from ragaai_catalyst import RagaAICatalyst, init_tracing
10+
from ragaai_catalyst.tracers import Tracer
11+
12+
load_dotenv()
13+
set_tracing_export_api_key(os.getenv('OPENAI_API_KEY'))
14+
15+
def initialize_catalyst():
16+
"""Initialize RagaAI Catalyst using environment credentials."""
17+
catalyst = RagaAICatalyst(
18+
access_key=os.getenv('CATALYST_ACCESS_KEY'),
19+
secret_key=os.getenv('CATALYST_SECRET_KEY'),
20+
base_url=os.getenv('CATALYST_BASE_URL')
21+
)
22+
23+
tracer = Tracer(
24+
project_name=os.environ.get('PROJECT_NAME', 'email-extraction'),
25+
dataset_name=os.environ.get('DATASET_NAME', 'email-data'),
26+
tracer_type="agentic/openai_agents",
27+
)
28+
29+
init_tracing(catalyst=catalyst, tracer=tracer)
30+
31+
class Person(BaseModel):
32+
"""Person data model for email sender and recipients."""
33+
name: str
34+
role: Optional[str] = None
35+
contact: Optional[str] = None
36+
37+
class Meeting(BaseModel):
38+
"""Meeting data model for scheduled meetings in emails."""
39+
date: str
40+
time: str
41+
location: Optional[str] = None
42+
duration: Optional[str] = None
43+
44+
class Task(BaseModel):
45+
"""Task data model for action items in emails."""
46+
description: str
47+
assignee: Optional[str] = None
48+
deadline: Optional[str] = None
49+
priority: Optional[str] = None
50+
51+
class EmailData(BaseModel):
52+
"""Complete email data model with structured information."""
53+
subject: str
54+
sender: Person
55+
recipients: List[Person]
56+
main_points: List[str]
57+
meetings: List[Meeting]
58+
tasks: List[Task]
59+
next_steps: Optional[str] = None
60+
61+
def initialize_agent(agent_name: str, agent_instructions: str|Callable, handoff_description: Optional[str]=None, handoffs: List[Agent]=list(), model_name: str='gpt-4o', temperature: float=0.3, max_tokens: int=1000, output_type: Optional[type[Any]]=None):
62+
"""Initialize the OpenAI agent for email extraction."""
63+
# Initialize the agent with appropriate configuration
64+
# This could include model selection, temperature settings, etc.
65+
model_settings = ModelSettings(
66+
temperature=temperature,
67+
max_tokens=max_tokens
68+
)
69+
agent = Agent(
70+
name=agent_name,
71+
instructions=agent_instructions,
72+
handoff_description=handoff_description,
73+
handoffs=handoffs,
74+
model=model_name,
75+
model_settings=model_settings,
76+
output_type=output_type
77+
)
78+
return agent
79+
80+
email_extractor = initialize_agent(
81+
agent_name="Email Extractor",
82+
agent_instructions="You are an expert at extracting structured information from emails.",
83+
model_name="gpt-4o",
84+
temperature=0.2,
85+
output_type=EmailData
86+
)
87+
88+
async def extract_email_data(email_text: str) -> EmailData:
89+
"""
90+
Extract structured data from an email using an OpenAI agent.
91+
92+
Args:
93+
email_text: The raw email text to process
94+
95+
Returns:
96+
EmailData object containing structured information from the email
97+
"""
98+
runner = Runner()
99+
extraction_prompt = f"Please extract information from this email:\n\n{email_text}"
100+
result = await runner.run(
101+
email_extractor,
102+
extraction_prompt
103+
)
104+
return result.final_output
105+
106+
sample_email = """
107+
From: Alex Johnson <alex.j@techcorp.com>
108+
To: Team Development <team-dev@techcorp.com>
109+
CC: Sarah Wong <sarah.w@techcorp.com>, Miguel Fernandez <miguel.f@techcorp.com>
110+
Subject: Project Phoenix Update and Next Steps
111+
112+
Hi team,
113+
114+
I wanted to follow up on yesterday's discussion about Project Phoenix and outline our next steps.
115+
116+
Key points from our discussion:
117+
- The beta testing phase has shown promising results with 85% positive feedback
118+
- We're still facing some performance issues on mobile devices
119+
- The client has requested additional features for the dashboard
120+
121+
Let's schedule a follow-up meeting this Friday, June 15th at 2:00 PM in Conference Room B. The meeting should last about 1.5 hours, and we'll need to prepare the updated project timeline.
122+
123+
Action items:
124+
1. Sarah to address the mobile performance issues by June 20th (High priority)
125+
2. Miguel to create mock-ups for the new dashboard features by next Monday
126+
3. Everyone to review the beta testing feedback document and add comments by EOD tomorrow
127+
128+
If you have any questions before Friday's meeting, feel free to reach out.
129+
130+
Best regards,
131+
Alex Johnson
132+
Senior Project Manager
133+
(555) 123-4567
134+
"""
135+
136+
def display_email_data(email_data: EmailData):
137+
"""
138+
Display the extracted email data in a formatted way.
139+
140+
Args:
141+
email_data: The structured EmailData object to display
142+
"""
143+
print(f"Subject: {email_data.subject}")
144+
print(f"From: {email_data.sender.name} ({email_data.sender.role})")
145+
146+
print("\nMain points:")
147+
for point in email_data.main_points:
148+
print(f"- {point}")
149+
150+
print("\nMeetings:")
151+
for meeting in email_data.meetings:
152+
print(f"- {meeting.date} at {meeting.time}, Location: {meeting.location}")
153+
154+
print("\nTasks:")
155+
for task in email_data.tasks:
156+
print(f"- {task.description}")
157+
print(
158+
f" Assignee: {task.assignee}, Deadline: {task.deadline}, Priority: {task.priority}"
159+
)
160+
161+
if email_data.next_steps:
162+
print(f"\nNext Steps: {email_data.next_steps}")
163+
164+
async def process_email(email_text: str):
165+
"""
166+
Process an email to extract structured data and display the results.
167+
168+
Args:
169+
email_text: The raw email text to process
170+
171+
Returns:
172+
The structured EmailData object
173+
"""
174+
if os.getenv('CATALYST_ACCESS_KEY'):
175+
initialize_catalyst()
176+
177+
start_time = time.time()
178+
email_data = await extract_email_data(email_text)
179+
duration = time.time() - start_time
180+
181+
print(f"Email processing completed in {duration:.2f} seconds")
182+
display_email_data(email_data)
183+
184+
return email_data
185+
186+
if __name__ == "__main__":
187+
import asyncio
188+
189+
asyncio.run(process_email(sample_email))
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
openai-agents
2+
python-dotenv
3+
ragaai_catalyst

examples/openai_agents_sdk/sample.env

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
OPENAI_API_KEY=your_openai_api_key
2+
CATALYST_ACCESS_KEY=your_catalyst_access_key
3+
CATALYST_SECRET_KEY=your_catalyst_secret_key
4+
CATALYST_BASE_URL=your_catalyst_base_url
5+
PROJECT_NAME=your_project_name
6+
DATSET_NAME=your_dataset_name

pyproject.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ build-backend = "setuptools.build_meta"
66
name = "ragaai_catalyst"
77
description = "RAGA AI CATALYST"
88
readme = "README.md"
9-
requires-python = ">=3.9,<3.13"
9+
requires-python = ">=3.10,<3.13"
1010
# license = {file = "LICENSE"}
11-
version = "2.1.6"
11+
version = "2.1.6.2"
1212
authors = [
1313
{name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
1414
{name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
@@ -28,8 +28,8 @@ dependencies = [
2828
"groq>=0.11.0",
2929
"pypdf>=5.3.1",
3030
"google-genai>=1.3.0",
31-
"Markdown>=3.7",
32-
"litellm>=1.51.1",
31+
"Markdown>=3.7",
32+
"litellm>=1.51.1",
3333
"tenacity==8.3.0",
3434
"tqdm>=4.66.5",
3535
"llama-index>=0.10.0",
@@ -53,11 +53,11 @@ dependencies = [
5353
"openinference-instrumentation-bedrock",
5454
"openinference-instrumentation-crewai",
5555
"openinference-instrumentation-haystack",
56-
"openinference-instrumentation-autogen",
56+
"openinference-instrumentation-openai-agents",
5757
"openinference-instrumentation-smolagents",
5858
"opentelemetry-sdk",
5959
"opentelemetry-exporter-otlp",
60-
"opentelemetry-proto>=1.12.0"
60+
"opentelemetry-proto>=1.12.0",
6161
]
6262

6363
[project.optional-dependencies]

ragaai_catalyst/guard_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ def set_variables(self,prompt,prompt_params):
164164
return doc
165165

166166
def execute_input_guardrails(self, prompt, prompt_params):
167+
self.current_trace_id =None
167168
doc = self.set_variables(prompt,prompt_params)
168169
deployment_response = self.execute_deployment(self.input_deployment_id,doc)
169170
self.current_trace_id = deployment_response['data']['results'][0]['executionId']

ragaai_catalyst/tracers/tracer.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,14 @@ def __init__(
278278
logger.info("Instrumenting Smolagents...")
279279
except (ImportError, ModuleNotFoundError):
280280
logger.debug("Smolagents not available in environment")
281+
282+
# OpenAI Agents
283+
try:
284+
from openinference.instrumentation.openai_agents import OpenAIAgentsInstrumentor
285+
instrumentors.append((OpenAIAgentsInstrumentor, []))
286+
logger.info("Instrumenting OpenAI Agents...")
287+
except (ImportError, ModuleNotFoundError):
288+
logger.debug("OpenAI Agents not available in environment")
281289

282290
if not instrumentors:
283291
logger.warning("No agentic packages found in environment to instrument")
@@ -293,7 +301,7 @@ def __init__(
293301
elif tracer_type == "agentic/llamaindex":
294302
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
295303
instrumentors += [(LlamaIndexInstrumentor, [])]
296-
304+
297305
elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph":
298306
from openinference.instrumentation.langchain import LangChainInstrumentor
299307
instrumentors += [(LangChainInstrumentor, [])]
@@ -314,6 +322,10 @@ def __init__(
314322
elif tracer_type == "agentic/smolagents":
315323
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
316324
instrumentors += [(SmolagentsInstrumentor, [])]
325+
326+
elif tracer_type == "agentic/openai_agents":
327+
from openinference.instrumentation.openai_agents import OpenAIAgentsInstrumentor
328+
instrumentors += [(OpenAIAgentsInstrumentor, [])]
317329

318330
else:
319331
# Unknown agentic tracer type
@@ -513,6 +525,14 @@ def stop(self):
513525
combined_metadata.update(user_detail['trace_user_detail']['metadata'])
514526
if additional_metadata:
515527
combined_metadata.update(additional_metadata)
528+
529+
model_cost_latency_metadata = {}
530+
if additional_metadata:
531+
model_cost_latency_metadata["model"] = additional_metadata["model_name"]
532+
model_cost_latency_metadata["total_cost"] = additional_metadata["cost"]
533+
model_cost_latency_metadata["total_latency"] = additional_metadata["latency"]
534+
model_cost_latency_metadata["recorded_on"] = datetime.datetime.now().astimezone().isoformat()
535+
combined_metadata.update(model_cost_latency_metadata)
516536

517537
langchain_traces = langchain_tracer_extraction(data, self.user_context)
518538
final_result = convert_langchain_callbacks_output(langchain_traces)

0 commit comments

Comments
 (0)