Merge pull request #50 from souvikmajumder26/dev

souvikmajumder26 · web-flow · commit b94a08e5234f · 2025-04-08T02:59:17.000+05:30
Updated installation using docker
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,41 @@
+# Use Python 3.11 as the base image
+FROM python:3.11-slim
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    poppler-utils \
+    tesseract-ocr \
+    git \
+    curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Clone the repository
+RUN git clone https://github.com/souvikmajumder26/Multi-Agent-Medical-Assistant.git .
+
+# Create necessary directories for data persistence and uploads
+RUN mkdir -p data/processed data/qdrantdb uploads/backend uploads/skin_lesion_output
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir unstructured[pdf]
+
+# Expose the port that FastAPI will run on
+EXPOSE 8000
+
+# Volumes for data persistence
+VOLUME ["/app/data/processed", "/app/data/qdrantdb", "/app/uploads"]
+
+# Define healthcheck
+HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
+  CMD curl -f http://localhost:8000/health || exit 1
+
+# Create entry point script
+COPY entrypoint.sh /app/entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+
+ENTRYPOINT ["/app/entrypoint.sh"]
diff --git a/README.md b/README.md
@@ -27,6 +27,8 @@
 - [Key Features](#key-features)
 - [Tech Stack](#technology-stack)
 - [Installation and Setup](#installation-setup)
+  - [Using Docker](#docker-setup)
+  - [Manual Installation](#manual-setup)
 - [Usage](#usage)
 - [Contributions](#contributions)
 - [License](#license)
@@ -161,6 +163,85 @@ If you like what you see and would want to support the project's developer, you
 
 ## 🚀 Installation & Setup  <a name="installation-setup"></a>
 
+## 📌 Option 1: Using Docker  <a name="docker-setup"></a>
+
+### 1️⃣ Clone the Repository  
+```bash  
+git clone https://github.com/souvikmajumder26/Multi-Agent-Medical-Assistant.git  
+cd Multi-Agent-Medical-Assistant  
+```
+
+### 2️⃣ Set Up API Keys  
+- Create a `.env` file and add the following API keys:
+
+> [!NOTE]  
+> You may use any llm and embedding model of your choice...
+> 1. If using Azure OpenAI, no modification required.
+> 2. If using direct OpenAI, modify the llm and embedding model definitions in the 'config.py' and provide appropriate env variables.
+> 3. If using local models, appropriate code changes might be required throughout the codebase especially in 'agents'.
+
+> [!WARNING]  
+> If all necessary env variables are not provided, errors will be thrown in console.
+
+```bash
+# LLM Configuration (Azure Open AI - gpt-4o used in development)
+# If using any other LLM API key or local LLM, appropriate code modification is required
+deployment_name = 
+model_name = gpt-4o
+azure_endpoint = 
+openai_api_key = 
+openai_api_version = 
+
+# Embedding Model Configuration (Azure Open AI - text-embedding-ada-002 used in development)
+# If using any other embedding model, appropriate code modification is required
+embedding_deployment_name =
+embedding_model_name = text-embedding-ada-002
+embedding_azure_endpoint = 
+embedding_openai_api_key = 
+embedding_openai_api_version = 
+
+# Speech API Key (Free credits available with new Eleven Labs Account)
+ELEVEN_LABS_API_KEY = 
+
+# Web Search API Key (Free credits available with new Tavily Account)
+TAVILY_API_KEY = 
+
+# Hugging Face Token - using reranker model "ms-marco-TinyBERT-L-6"
+HUGGINGFACE_TOKEN = 
+
+# (OPTIONAL) If using Qdrant server version, local does not require API key
+QDRANT_URL = 
+QDRANT_API_KEY = 
+```
+
+### 3️⃣ Run with Docker Compose
+```bash
+docker-compose up -d
+```
+This will start two services:
+
+- fastapi-backend: Runs the FastAPI backend on port 8000
+- main-app: Runs the main application (app.py)
+
+### 4️⃣ Ingest data into the Vector DB
+```bash
+# To ingest a single file
+docker-compose run --rm fastapi-backend ingest --file ./data/raw/your_file.pdf
+
+# To ingest all files in a directory
+docker-compose run --rm fastapi-backend ingest --dir ./data/raw
+```
+
+### 5️⃣ Access the Application
+The application will be available at: `http://localhost:8000`
+
+### 6️⃣ Stopping the Application
+```bash
+docker-compose down
+```
+
+## 📌 Option 2: Manual Installation  <a name="manual-setup"></a>
+
 ### 1️⃣ Clone the Repository  
 ```bash  
 git clone https://github.com/souvikmajumder26/Multi-Agent-Medical-Assistant.git  
@@ -233,53 +314,13 @@ winget install ffmpeg
 ```bash
 pip install -r requirements.txt  
 ```
-- Might be required, might not be:
+- Might be required:
 ```bash
 pip install unstructured[pdf]
 ```
 
 ### 4️⃣ Set Up API Keys  
-- Create a `.env` file and add the following API keys:
-
-> [!NOTE]  
-> You may use any llm and embedding model of your choice...
-> 1. If using Azure OpenAI, no modification required.
-> 2. If using direct OpenAI, modify the llm and embedding model definitions in the 'config.py' and provide appropriate env variables.
-> 3. If using local models, appropriate code changes might be required throughout the codebase especially in 'agents'.
-
-> [!WARNING]  
-> If all necessary env variables are not provided, errors will be thrown in console.
-
-```bash
-# LLM Configuration (Azure Open AI - gpt-4o used in development)
-# If using any other LLM API key or local LLM, appropriate code modification is required
-deployment_name = 
-model_name = gpt-4o
-azure_endpoint = 
-openai_api_key = 
-openai_api_version = 
-
-# Embedding Model Configuration (Azure Open AI - text-embedding-ada-002 used in development)
-# If using any other embedding model, appropriate code modification is required
-embedding_deployment_name =
-embedding_model_name = text-embedding-ada-002
-embedding_azure_endpoint = 
-embedding_openai_api_key = 
-embedding_openai_api_version = 
-
-# Speech API Key (Free credits available with new Eleven Labs Account)
-ELEVEN_LABS_API_KEY = 
-
-# Web Search API Key (Free credits available with new Tavily Account)
-TAVILY_API_KEY = 
-
-# Hugging Face Token - using reranker model "ms-marco-TinyBERT-L-6"
-HUGGINGFACE_TOKEN = 
-
-# (OPTIONAL) If using Qdrant server version, local does not require API key
-QDRANT_URL = 
-QDRANT_API_KEY = 
-```
+- Create a `.env` file and add the required API keys as shown in `Option 1`.
 
 ### 5️⃣ Run the Application  
 - Run the following commands one after another in separate windows with same directorty and virtual environment. Keep both running simultanesouly.
@@ -303,6 +344,34 @@ python ingest_rag_data.py --dir ./data/raw
 
 ---
 
+## Docker Related Information:
+
+### Data Persistence
+
+The vector database data is stored in Docker volumes:
+
+- `vector-db-processed`: Contains data from the `data/processed` directory
+- `vector-db-qdrant`: Contains data from the `data/qdrantdb` directory
+- `upload-data`: Contains uploaded files in the `uploads` directory
+
+This ensures your data persists even if you remove the containers.
+
+### Troubleshooting Docker Setup
+
+- If the containers aren't starting properly, check logs:
+```bash
+docker-compose logs fastapi-backend
+docker-compose logs main-app
+```
+- Make sure all required environment variables are set in the `.env` file
+- To completely clean up and restart:
+```bash
+docker-compose down -v
+docker-compose up -d --build
+```
+
+---
+
 ## 🧠 Usage  <a name="usage"></a>
 
 > [!NOTE]
diff --git a/api/fastapi_backend.py b/api/fastapi_backend.py
@@ -26,6 +26,12 @@
 class QueryRequest(BaseModel):
     query: str
 
+# Add a health check endpoint
+@app.get("/health")
+def health_check():
+    """Health check endpoint for Docker health checks"""
+    return {"status": "healthy"}
+
 @app.post("/chat")
 def chat(request: QueryRequest, response: Response, request_obj: Request):
     """Process user text query through the multi-agent system."""
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,70 @@
+version: '3.8'
+
+services:
+  fastapi-backend:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    environment:
+      - deployment_name=${deployment_name}
+      - model_name=${model_name}
+      - azure_endpoint=${azure_endpoint}
+      - openai_api_key=${openai_api_key}
+      - openai_api_version=${openai_api_version}
+      - embedding_deployment_name=${embedding_deployment_name}
+      - embedding_model_name=${embedding_model_name}
+      - embedding_azure_endpoint=${embedding_azure_endpoint}
+      - embedding_openai_api_key=${embedding_openai_api_key}
+      - embedding_openai_api_version=${embedding_openai_api_version}
+      - ELEVEN_LABS_API_KEY=${ELEVEN_LABS_API_KEY}
+      - TAVILY_API_KEY=${TAVILY_API_KEY}
+      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
+      - QDRANT_URL=${QDRANT_URL}
+      - QDRANT_API_KEY=${QDRANT_API_KEY}
+    volumes:
+      - vector-db-processed:/app/data/processed
+      - vector-db-qdrant:/app/data/qdrantdb
+      - upload-data:/app/uploads
+    command: api
+    healthcheck:
+      test: curl -f http://localhost:8000/health || exit 1
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 15s
+
+  main-app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    depends_on:
+      fastapi-backend:
+        condition: service_healthy
+    environment:
+      - deployment_name=${deployment_name}
+      - model_name=${model_name}
+      - azure_endpoint=${azure_endpoint}
+      - openai_api_key=${openai_api_key}
+      - openai_api_version=${openai_api_version}
+      - embedding_deployment_name=${embedding_deployment_name}
+      - embedding_model_name=${embedding_model_name}
+      - embedding_azure_endpoint=${embedding_azure_endpoint}
+      - embedding_openai_api_key=${embedding_openai_api_key}
+      - embedding_openai_api_version=${embedding_openai_api_version}
+      - ELEVEN_LABS_API_KEY=${ELEVEN_LABS_API_KEY}
+      - TAVILY_API_KEY=${TAVILY_API_KEY}
+      - HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}
+      - QDRANT_URL=${QDRANT_URL}
+      - QDRANT_API_KEY=${QDRANT_API_KEY}
+    volumes:
+      - vector-db-processed:/app/data/processed
+      - vector-db-qdrant:/app/data/qdrantdb
+      - upload-data:/app/uploads
+    command: app
+
+volumes:
+  vector-db-processed:
+  vector-db-qdrant:
+  upload-data:
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# First argument determines which service to run
+if [ "$1" = "api" ]; then
+    echo "Starting FastAPI backend..."
+    exec uvicorn api.fastapi_backend:app --host 0.0.0.0 --port 8000
+elif [ "$1" = "app" ]; then
+    echo "Starting main application..."
+    exec python app.py
+elif [ "$1" = "all" ]; then
+    echo "Starting both services..."
+    uvicorn api.fastapi_backend:app --host 0.0.0.0 --port 8000 &
+    python app.py
+    wait
+elif [ "$1" = "ingest" ]; then
+    echo "Ingesting data..."
+    if [ "$2" = "--file" ]; then
+        exec python ingest_rag_data.py --file "$3"
+    elif [ "$2" = "--dir" ]; then
+        exec python ingest_rag_data.py --dir "$3"
+    else
+        echo "Please provide valid arguments: --file or --dir"
+        exit 1
+    fi
+else
+    echo "Please specify which service to run: api, app, all, or ingest"
+    exit 1
+fi