Skip to content

Commit fa35b12

Browse files
committed
Add Data Analyst agent with SQL query generation and update gitignore patterns
1 parent cca683a commit fa35b12

File tree

2 files changed

+140
-1
lines changed

2 files changed

+140
-1
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ myenv/
1111
# C extensions
1212
*.so
1313
.env
14-
14+
*.csv
15+
*.db
16+
*.Identifier
1517
# Distribution / packaging
1618
.Python
1719
build/

app/agent/da_agent.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import contextlib # Added for SIM105 fix
2+
import csv
3+
import json
4+
import tempfile
5+
6+
import pandas as pd
7+
import streamlit as st
8+
9+
from agno.models.openai import OpenAIChat # type: ignore
10+
from agno.tools.pandas import PandasTools
11+
from phi.agent.duckdb import DuckDbAgent
12+
13+
14+
# Function to preprocess and save the uploaded file
15+
def preprocess_and_save(file): # type: ignore
16+
try:
17+
# Read the uploaded file into a DataFrame
18+
if file.name.endswith(".csv"):
19+
df = pd.read_csv(file, encoding="utf-8", na_values=["NA", "N/A", "missing"])
20+
elif file.name.endswith(".xlsx"):
21+
df = pd.read_excel(file, na_values=["NA", "N/A", "missing"])
22+
else:
23+
st.error("Unsupported file format. Please upload a CSV or Excel file.")
24+
return None, None, None
25+
26+
# Ensure string columns are properly quoted
27+
for col in df.select_dtypes(include=["object"]):
28+
df[col] = df[col].astype(str).replace({r'"': '""'}, regex=True)
29+
30+
# Parse dates and numeric columns
31+
for col in df.columns:
32+
if "date" in col.lower():
33+
df[col] = pd.to_datetime(df[col], errors="coerce")
34+
elif df[col].dtype == "object":
35+
with contextlib.suppress(ValueError, TypeError): # Fixed SIM105
36+
df[col] = pd.to_numeric(df[col])
37+
38+
# Create a temporary file to save the preprocessed data
39+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file:
40+
temp_path = temp_file.name
41+
# Save the DataFrame to the temporary CSV file with quotes around string fields
42+
df.to_csv(temp_path, index=False, quoting=csv.QUOTE_ALL)
43+
44+
return temp_path, df.columns.tolist(), df # Return the DataFrame as well
45+
except Exception as e:
46+
st.error(f"Error processing file: {e}")
47+
return None, None, None
48+
49+
50+
# Streamlit app
51+
st.title("📊 Data Analyst Agent")
52+
53+
# Sidebar for API keys
54+
with st.sidebar:
55+
st.header("API Keys")
56+
openai_key = st.text_input("Enter your OpenAI API key:", type="password")
57+
if openai_key:
58+
st.session_state.openai_key = openai_key
59+
st.success("API key saved!")
60+
else:
61+
st.warning("Please enter your OpenAI API key to proceed.")
62+
63+
# File upload widget
64+
uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
65+
66+
if uploaded_file is not None and "openai_key" in st.session_state:
67+
# Preprocess and save the uploaded file
68+
temp_path, columns, df = preprocess_and_save(uploaded_file) # type: ignore
69+
70+
if temp_path and columns and df is not None:
71+
# Display the uploaded data as a table
72+
st.write("Uploaded Data:")
73+
st.dataframe(df) # Use st.dataframe for an interactive table
74+
75+
# Display the columns of the uploaded data
76+
st.write("Uploaded columns:", columns)
77+
78+
# Configure the semantic model with the temporary file path
79+
semantic_model = {
80+
"tables": [
81+
{
82+
"name": "uploaded_data",
83+
"description": "Contains the uploaded dataset.",
84+
"path": temp_path,
85+
}
86+
]
87+
}
88+
89+
# Initialize the DuckDbAgent for SQL query generation
90+
duckdb_agent = DuckDbAgent(
91+
model=OpenAIChat(model="gpt-4", api_key=st.session_state.openai_key), # type: ignore
92+
semantic_model=json.dumps(semantic_model),
93+
tools=[PandasTools()], # type: ignore
94+
markdown=True,
95+
add_history_to_messages=False, # Disable chat history
96+
followups=False, # Disable follow-up queries
97+
read_tool_call_history=False, # Disable reading tool call history
98+
system_prompt=(
99+
"You are an expert data analyst. Generate SQL queries to solve the user's query. "
100+
"Return only the SQL query, enclosed in ```sql ``` and give the final answer."
101+
), # Fixed E501 (line too long)
102+
)
103+
104+
# Initialize code storage in session state
105+
if "generated_code" not in st.session_state:
106+
st.session_state.generated_code = None
107+
108+
# Main query input widget
109+
user_query = st.text_area("Ask a query about the data:")
110+
111+
# Add info message about terminal output
112+
st.info("💡 Check your terminal for a clearer output of the agent's response")
113+
114+
if st.button("Submit Query"):
115+
if user_query.strip() == "":
116+
st.warning("Please enter a query.")
117+
else:
118+
try:
119+
# Show loading spinner while processing
120+
with st.spinner("Processing your query..."):
121+
# Get the response from DuckDbAgent
122+
response1 = duckdb_agent.run(user_query)
123+
124+
# Extract the content using a ternary operator (Fixed SIM108)
125+
response_content = response1.content if hasattr(response1, "content") else str(response1)
126+
127+
response = duckdb_agent.print_response( # type: ignore
128+
user_query,
129+
stream=True,
130+
)
131+
132+
# Display the response in Streamlit
133+
st.markdown(response_content)
134+
135+
except Exception as e:
136+
st.error(f"Error generating response from the DuckDbAgent: {e}")
137+
st.error("Please try rephrasing your query or check if the data format is correct.")

0 commit comments

Comments
 (0)