From f61f2987c208aec600ce37e02a635fc9c75d5545 Mon Sep 17 00:00:00 2001
From: fg-nava <189638926+fg-nava@users.noreply.github.com>
Date: Fri, 24 Jan 2025 12:31:11 -0800
Subject: [PATCH 1/3] feat: add dotenv support and improve setup documentation

---
 03-document-classification/README.md       | 55 +++++++++++++++++++++-
 03-document-classification/app.py          |  7 ++-
 03-document-classification/requirements.in |  3 +-
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/03-document-classification/README.md b/03-document-classification/README.md
index 822dbba..8557ded 100644
--- a/03-document-classification/README.md
+++ b/03-document-classification/README.md
@@ -6,4 +6,57 @@ This prototype uses a multimodal LLM (GPT-4o) to automatically:
  - Determine what kind of evidence the document provides (e.g., proof of identity, residence, expenses, etc.)
  - Extract arbitrary key/value pairs from the document
 
-To run it, set OPENAI_API_KEY in your environment and then `streamlit run app.py`.
\ No newline at end of file
+## Prerequisites
+
+- An OpenAI API key with access to GPT-4o
+
+## Setup Instructions
+
+1. Clone this repository and navigate to the project directory:
+```bash
+cd 03-document-classification
+```
+
+2. Create and activate a Python virtual environment:
+```bash
+# Create the virtual environment
+python -m venv venv
+
+# Activate it on macOS
+source venv/bin/activate
+```
+
+3. Install the required packages:
+```bash
+pip install python-dotenv streamlit openai
+```
+
+4. Create a `.env` file in the project directory and add your OpenAI API key:
+```bash
+echo "OPENAI_API_KEY=your-api-key-here" > .env
+```
+Replace `your-api-key-here` with your actual OpenAI API key.
+
+5. Run the Streamlit app:
+```bash
+streamlit run app.py
+```
+
+The app should now be running and accessible at http://localhost:8501 (or another port if 8501 is in use).
+
+## Using the App
+
+1. Open the app in your web browser
+2. Use the file uploader to upload one or more document images
+3. Click "Process documents" to analyze them
+
+## Troubleshooting
+
+If you encounter any issues:
+
+1. Make sure your virtual environment is activated (you should see `(venv)` in your terminal)
+2. Verify your OpenAI API key is correct and has access to GPT-4o
+3. Check that all required packages are installed by running:
+   ```bash
+   pip install -r requirements.txt
+   ```
\ No newline at end of file
diff --git a/03-document-classification/app.py b/03-document-classification/app.py
index 2a29b7e..f0c453f 100644
--- a/03-document-classification/app.py
+++ b/03-document-classification/app.py
@@ -9,10 +9,15 @@
 import json
 import time
 from functools import wraps
+from dotenv import load_dotenv
+import os
 
 import openai
 import streamlit as st
 
+# Load environment variables from .env file
+load_dotenv()
+
 if __name__ == "__main__":
     if "__streamlitmagic__" not in locals():
         import streamlit.web.bootstrap
@@ -25,7 +30,7 @@
 
 @st.cache_resource
 def get_client():
-    return openai.OpenAI()
+    return openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 
 
 PROMPT = """Please review the attached document and respond with a JSON object matching the DocumentAnalysis type definition provided below. Do not respond with anything else besides the DocumentAnalysis JSON object.
diff --git a/03-document-classification/requirements.in b/03-document-classification/requirements.in
index 7f355e2..00872a3 100644
--- a/03-document-classification/requirements.in
+++ b/03-document-classification/requirements.in
@@ -1,2 +1,3 @@
 openai
-streamlit
\ No newline at end of file
+streamlit
+python-dotenv
\ No newline at end of file

From 8fc83ddd52d0ab9b993009a856c0ea8ba10d7e13 Mon Sep 17 00:00:00 2001
From: fg-nava <189638926+fg-nava@users.noreply.github.com>
Date: Fri, 24 Jan 2025 12:35:33 -0800
Subject: [PATCH 2/3] style: fix string quotes to match linting rules

---
 03-document-classification/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/03-document-classification/app.py b/03-document-classification/app.py
index f0c453f..44bee53 100644
--- a/03-document-classification/app.py
+++ b/03-document-classification/app.py
@@ -30,7 +30,7 @@
 
 @st.cache_resource
 def get_client():
-    return openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+    return openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 PROMPT = """Please review the attached document and respond with a JSON object matching the DocumentAnalysis type definition provided below. Do not respond with anything else besides the DocumentAnalysis JSON object.

From d32c4d0cab07e4e8b23c5065ce91b544886e6b04 Mon Sep 17 00:00:00 2001
From: fg-nava <189638926+fg-nava@users.noreply.github.com>
Date: Fri, 24 Jan 2025 12:37:01 -0800
Subject: [PATCH 3/3] style: fix operator spacing to match linting rules

---
 02-household-queries/dspy_engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/02-household-queries/dspy_engine.py b/02-household-queries/dspy_engine.py
index ceab608..57ad12e 100755
--- a/02-household-queries/dspy_engine.py
+++ b/02-household-queries/dspy_engine.py
@@ -104,7 +104,7 @@ def run_retrieval(query, retrieve_k):
 
     print(f"Top {retrieve.k} passages for query: {query} \n", "-" * 30, "\n")
     for i, passage in enumerate(topK_passages):
-        print(f"[{i+1}]", passage, "\n")
+        print(f"[{i + 1}]", passage, "\n")
     return retrieval
 
 
@@ -315,7 +315,7 @@ def print_eval_table(eval_score, results):
         print(score, "|", ex.q_id, "|", ex.answer, "|", pred.get("answer", "")[:20])
     print()
     print("score:", eval_score)
-    print(f"{correct_count} ({int(correct_count/len(results)*100)}%) correct")
+    print(f"{correct_count} ({int(correct_count / len(results) * 100)}%) correct")
     print("--------------------------------------")