Merge pull request #12 from akashmadisetty/main

adithya-s-k · web-flow · commit 078010c7f083 · 2025-07-23T22:46:12.000+05:30
Changed the Gemini model to gemini-2.5-flash
diff --git a/demo.py b/demo.py
@@ -59,15 +59,15 @@
 # Initialize VLM
 gemini_api_key = os.getenv("GEMINI_API_KEY")
 
-# Initialize LLM and VLM with Groq by default
+# Initialize LLM and VLM with LiteLLM by default
 if gemini_api_key:
-    gemini_model = "gemini/gemini-2.5-flash-preview-04-17"
+    gemini_model = "gemini/gemini-2.5-flash"
     gem_llm = LiteLLM(model=gemini_model, api_key=gemini_api_key, verbose=False)
     gem_vlm = LiteLLMVLM(model=gemini_model, api_key=gemini_api_key, verbose=False)
 
     llm = gem_llm
     vlm = gem_vlm
-    print(f"Using Groq with model: {gemini_model}")
+    print(f"Using LiteLLM with model: {gemini_model}")
 else:
     # For backward compatibility, use the existing initialization
     vlm = OpenAI()
diff --git a/docs/demo.ipynb b/docs/demo.ipynb
@@ -37,7 +37,7 @@
       },
       "outputs": [],
       "source": [
-        "!git clone https://github.com/akashmadisetty/VARAG\n",
+        "!git clone https://github.com/adithya-s-k/VARAG\n",
         "%cd VARAG\n",
         "%pwd"
       ]
@@ -148,6 +148,7 @@
       },
       "outputs": [],
       "source": [
+        "#This is the code to run the VARAG Demo without any Interpretability maps\n",
         "!python demo.py --share"
       ]
     },
@@ -159,6 +160,21 @@
       "source": [
         "For exploring colpali interpretation check out codes in examples/inference_colpali"
       ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "X6n3jqsV7aVG",
+        "vscode": {
+          "languageId": "plaintext"
+        }
+      },
+      "outputs": [],
+      "source": [
+        "#Run this to load the Interpretability maps feature for colpali\n",
+        "!python examples/inference_colpali/demo_with_heatmaps.py --share"
+      ]
     }
   ],
   "metadata": {
diff --git a/examples/inference_colpali/demo_with_heatmaps.py b/examples/inference_colpali/demo_with_heatmaps.py
@@ -89,7 +89,7 @@
 
 # Initialize LLM and VLM with Groq by default
 if gemini_api_key:
-    gemini_model = "gemini/gemini-2.5-flash-preview-04-17"
+    gemini_model = "gemini/gemini-2.5-flash"
     gem_llm = LiteLLM(model=gemini_model, api_key=gemini_api_key, verbose=False)
     gem_vlm = LiteLLMVLM(model=gemini_model, api_key=gemini_api_key, verbose=False)
 
diff --git a/examples/inference_colpali/modal_demo_heatmaps_comparing_colpali_models.py b/examples/inference_colpali/modal_demo_heatmaps_comparing_colpali_models.py
@@ -25,7 +25,7 @@
     modal.Image.from_registry(f"nvidia/cuda:{tag}", add_python="3.11")
     .apt_install("git")
     .run_commands([
-        "git clone https://github.com/akashmadisetty/VARAG",
+        "git clone https://github.com/adithya-s-k/VARAG",
         "cd VARAG && pip install -e ."
     ])
     .pip_install("colpali-engine[interpretability]")
@@ -53,7 +53,7 @@
 @app.function(
     image=inference_image,
     gpu="L4",  # Use powerful GPU for unoptimized version
-    timeout=7200,  # 2 hour timeout
+    timeout=3600,  # 1 hour timeout
     volumes={
         VOLUME_PATH: col_vol,
     },
@@ -695,7 +695,7 @@ def compare_colpali_models(query, colpali_images):
             
             # Generate aggregated comparison heatmaps for each image
             aggregated_comparison_images = []
-            for i, image in enumerate(colpali_images[:3]):  # Limit to first 3 images for aggregated view
+            for i, image in enumerate(colpali_images[:10]):  # Limit to first 3 images for aggregated view
                 print(f"🔄 Generating aggregated heatmaps for image {i+1}...")
                 comparison_result, base_meta, finetuned_meta = create_aggregated_comparison_heatmaps(
                     base_generator, finetuned_generator, image, query