| 
7 | 7 |     "### Hybrid Colpali RAG using [VARAG](https://github.com/adithya-s-k/VARAG)\n",  | 
8 | 8 |     "\n",  | 
9 | 9 |     "\n",  | 
10 |  | -    "[](https://colab.research.google.com/github/adithya-s-k/CompanionLLadithya-s-k/VARAG/blob/main/docs/hybridColpaliRAG.ipynb)\n",  | 
 | 10 | +    "[](https://colab.research.google.com/github/adithya-s-k/VARAG/blob/main/docs/hybridColpaliRAG.ipynb)\n",  | 
11 | 11 |     "\n",  | 
12 | 12 |     "Requirement to RUN this notebook - Min T4 GPU"  | 
13 | 13 |    ]  | 
14 | 14 |   },  | 
 | 15 | +  {  | 
 | 16 | +   "cell_type": "code",  | 
 | 17 | +   "execution_count": null,  | 
 | 18 | +   "metadata": {},  | 
 | 19 | +   "outputs": [],  | 
 | 20 | +   "source": [  | 
 | 21 | +    "!git clone https://github.com/adithya-s-k/VARAG\n",  | 
 | 22 | +    "%cd VARAG\n",  | 
 | 23 | +    "%pwd"  | 
 | 24 | +   ]  | 
 | 25 | +  },  | 
 | 26 | +  {  | 
 | 27 | +   "cell_type": "code",  | 
 | 28 | +   "execution_count": null,  | 
 | 29 | +   "metadata": {},  | 
 | 30 | +   "outputs": [],  | 
 | 31 | +   "source": [  | 
 | 32 | +    "!apt-get update && apt-get install -y && apt-get install -y poppler-utils"  | 
 | 33 | +   ]  | 
 | 34 | +  },  | 
 | 35 | +  {  | 
 | 36 | +   "cell_type": "code",  | 
 | 37 | +   "execution_count": null,  | 
 | 38 | +   "metadata": {},  | 
 | 39 | +   "outputs": [],  | 
 | 40 | +   "source": [  | 
 | 41 | +    "%pip install -e ."  | 
 | 42 | +   ]  | 
 | 43 | +  },  | 
 | 44 | +  {  | 
 | 45 | +   "cell_type": "code",  | 
 | 46 | +   "execution_count": null,  | 
 | 47 | +   "metadata": {},  | 
 | 48 | +   "outputs": [],  | 
 | 49 | +   "source": [  | 
 | 50 | +    "from sentence_transformers import SentenceTransformer\n",  | 
 | 51 | +    "from varag.rag import HybridColpaliRAG\n",  | 
 | 52 | +    "from varag.llms import OpenAI\n",  | 
 | 53 | +    "from varag.utils import get_model_colpali\n",  | 
 | 54 | +    "import lancedb\n",  | 
 | 55 | +    "import os\n",  | 
 | 56 | +    "from dotenv import load_dotenv\n",  | 
 | 57 | +    "\n",  | 
 | 58 | +    "os.environ[\"OPENAI_API_KEY\"] = \"api-key\"\n",  | 
 | 59 | +    "\n",  | 
 | 60 | +    "load_dotenv()"  | 
 | 61 | +   ]  | 
 | 62 | +  },  | 
 | 63 | +  {  | 
 | 64 | +   "cell_type": "code",  | 
 | 65 | +   "execution_count": null,  | 
 | 66 | +   "metadata": {},  | 
 | 67 | +   "outputs": [],  | 
 | 68 | +   "source": [  | 
 | 69 | +    "shared_db = lancedb.connect(\"~/shared_rag_db\")\n",  | 
 | 70 | +    "\n",  | 
 | 71 | +    "model, processor = get_model_colpali(\"vidore/colpali-v1.2\")\n",  | 
 | 72 | +    "embedding_model = SentenceTransformer(\"jinaai/jina-clip-v1\", trust_remote_code=True)\n",  | 
 | 73 | +    "\n",  | 
 | 74 | +    "colpali_hybrid_rag = HybridColpaliRAG(\n",  | 
 | 75 | +    "    colpali_model=model,\n",  | 
 | 76 | +    "    colpali_processor=processor,\n",  | 
 | 77 | +    "    db=shared_db,\n",  | 
 | 78 | +    "    image_embedding_model=embedding_model,\n",  | 
 | 79 | +    "    table_name=\"hybridColpaliDemo\",\n",  | 
 | 80 | +    ")\n",  | 
 | 81 | +    "\n",  | 
 | 82 | +    "vlm = OpenAI()"  | 
 | 83 | +   ]  | 
 | 84 | +  },  | 
 | 85 | +  {  | 
 | 86 | +   "cell_type": "code",  | 
 | 87 | +   "execution_count": null,  | 
 | 88 | +   "metadata": {},  | 
 | 89 | +   "outputs": [],  | 
 | 90 | +   "source": [  | 
 | 91 | +    "colpali_hybrid_rag.index(\n",  | 
 | 92 | +    "        \"./examples/data\", \n",  | 
 | 93 | +    "        overwrite=False, \n",  | 
 | 94 | +    "        recursive=False, \n",  | 
 | 95 | +    "        verbose=True\n",  | 
 | 96 | +    "    )"  | 
 | 97 | +   ]  | 
 | 98 | +  },  | 
 | 99 | +  {  | 
 | 100 | +   "cell_type": "code",  | 
 | 101 | +   "execution_count": null,  | 
 | 102 | +   "metadata": {},  | 
 | 103 | +   "outputs": [],  | 
 | 104 | +   "source": [  | 
 | 105 | +    "query = \"What is Colpali\"\n",  | 
 | 106 | +    "num_results = 5\n",  | 
 | 107 | +    "\n",  | 
 | 108 | +    "results = colpali_hybrid_rag.search(query, k=5)\n",  | 
 | 109 | +    "\n",  | 
 | 110 | +    "images = [result[\"image\"] for result in results]\n",  | 
 | 111 | +    "\n",  | 
 | 112 | +    "# Display the images\n",  | 
 | 113 | +    "for i, img in enumerate(images, 1):\n",  | 
 | 114 | +    "    print(f\"Image {i}:\")\n",  | 
 | 115 | +    "    display(img)"  | 
 | 116 | +   ]  | 
 | 117 | +  },  | 
 | 118 | +  {  | 
 | 119 | +   "cell_type": "code",  | 
 | 120 | +   "execution_count": null,  | 
 | 121 | +   "metadata": {},  | 
 | 122 | +   "outputs": [],  | 
 | 123 | +   "source": [  | 
 | 124 | +    "from IPython.display import display, Markdown\n",  | 
 | 125 | +    "\n",  | 
 | 126 | +    "\n",  | 
 | 127 | +    "response = vlm.query(query, images, max_tokens=1000)\n",  | 
 | 128 | +    "\n",  | 
 | 129 | +    "\n",  | 
 | 130 | +    "display(Markdown(response))"  | 
 | 131 | +   ]  | 
 | 132 | +  },  | 
15 | 133 |   {  | 
16 | 134 |    "cell_type": "markdown",  | 
17 | 135 |    "metadata": {},  | 
18 |  | -   "source": []  | 
 | 136 | +   "source": [  | 
 | 137 | +    "### Run Gradio Demo"  | 
 | 138 | +   ]  | 
 | 139 | +  },  | 
 | 140 | +  {  | 
 | 141 | +   "cell_type": "code",  | 
 | 142 | +   "execution_count": null,  | 
 | 143 | +   "metadata": {},  | 
 | 144 | +   "outputs": [],  | 
 | 145 | +   "source": [  | 
 | 146 | +    "%cd examples\n",  | 
 | 147 | +    "!python hybridColpaliDemo.py --share"  | 
 | 148 | +   ]  | 
19 | 149 |   }  | 
20 | 150 |  ],  | 
21 | 151 |  "metadata": {  | 
 | 152 | +  "kernelspec": {  | 
 | 153 | +   "display_name": "base",  | 
 | 154 | +   "language": "python",  | 
 | 155 | +   "name": "python3"  | 
 | 156 | +  },  | 
22 | 157 |   "language_info": {  | 
23 |  | -   "name": "python"  | 
 | 158 | +   "name": "python",  | 
 | 159 | +   "version": "3.11.9"  | 
24 | 160 |   }  | 
25 | 161 |  },  | 
26 | 162 |  "nbformat": 4,  | 
 | 
0 commit comments