|
4 | 4 | "cell_type": "markdown",
|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 |
| - "# Structured Generation from Documents Using Vision Language Models\n", |
| 7 | + "# Structured Generation from Images or Documents Using Vision Language Models\n", |
8 | 8 | "\n",
|
9 | 9 | "We will be using the SmolVLM-Instruct model from HuggingFaceTB to extract structured information from documents We will run the VLM using the HuggingFace Transformers library and the [Outlines library](https://github.com/dottxt-ai/outlines), which facilitates structured generation based on limiting token sampling probabilities. \n",
|
10 | 10 | "\n",
|
|
132 | 132 | },
|
133 | 133 | {
|
134 | 134 | "cell_type": "code",
|
135 |
| - "execution_count": 16, |
| 135 | + "execution_count": 19, |
136 | 136 | "metadata": {},
|
137 | 137 | "outputs": [],
|
138 | 138 | "source": [
|
|
188 | 188 | },
|
189 | 189 | {
|
190 | 190 | "cell_type": "code",
|
191 |
| - "execution_count": 17, |
| 191 | + "execution_count": 20, |
192 | 192 | "metadata": {},
|
193 | 193 | "outputs": [
|
| 194 | + { |
| 195 | + "name": "stderr", |
| 196 | + "output_type": "stream", |
| 197 | + "text": [ |
| 198 | + "/Users/davidberenstein/Documents/programming/huggingface/cookbook/.venv/lib/python3.11/site-packages/dill/_dill.py:414: PicklingWarning: Cannot locate reference to <class '__main__.ImageData'>.\n", |
| 199 | + " StockPickler.save(self, obj, save_persistent_id)\n", |
| 200 | + "/Users/davidberenstein/Documents/programming/huggingface/cookbook/.venv/lib/python3.11/site-packages/dill/_dill.py:414: PicklingWarning: Cannot pickle <class '__main__.ImageData'>: __main__.ImageData has recursive self-references that trigger a RecursionError.\n", |
| 201 | + " StockPickler.save(self, obj, save_persistent_id)\n" |
| 202 | + ] |
| 203 | + }, |
194 | 204 | {
|
195 | 205 | "data": {
|
196 | 206 | "application/vnd.jupyter.widget-view+json": {
|
197 |
| - "model_id": "1caa96c32bc7416ea43c192c0cd88c20", |
| 207 | + "model_id": "e1d431b922334b0297195415a11cf68a", |
198 | 208 | "version_major": 2,
|
199 | 209 | "version_minor": 0
|
200 | 210 | },
|
|
214 | 224 | "})"
|
215 | 225 | ]
|
216 | 226 | },
|
217 |
| - "execution_count": 17, |
| 227 | + "execution_count": 20, |
218 | 228 | "metadata": {},
|
219 | 229 | "output_type": "execute_result"
|
220 | 230 | }
|
|
252 | 262 | },
|
253 | 263 | {
|
254 | 264 | "cell_type": "code",
|
255 |
| - "execution_count": 18, |
| 265 | + "execution_count": 21, |
256 | 266 | "metadata": {},
|
257 | 267 | "outputs": [
|
258 | 268 | {
|
259 | 269 | "data": {
|
260 | 270 | "application/vnd.jupyter.widget-view+json": {
|
261 |
| - "model_id": "843b9c88cab54402812f1b936a2dc6e0", |
| 271 | + "model_id": "ab88b1b3bb1441498788bdc2c2b4cf30", |
262 | 272 | "version_major": 2,
|
263 | 273 | "version_minor": 0
|
264 | 274 | },
|
|
272 | 282 | {
|
273 | 283 | "data": {
|
274 | 284 | "application/vnd.jupyter.widget-view+json": {
|
275 |
| - "model_id": "57e5dea4ae504866b2d93863bcfa4408", |
| 285 | + "model_id": "e5e359d02ede43959e92a9e5626f9ffd", |
276 | 286 | "version_major": 2,
|
277 | 287 | "version_minor": 0
|
278 | 288 | },
|
|
286 | 296 | {
|
287 | 297 | "data": {
|
288 | 298 | "application/vnd.jupyter.widget-view+json": {
|
289 |
| - "model_id": "b811febb7c044100bb74bf67016f0d0d", |
| 299 | + "model_id": "9f7f07dad09f47c5a8dfdeba403845f6", |
290 | 300 | "version_major": 2,
|
291 | 301 | "version_minor": 0
|
292 | 302 | },
|
|
300 | 310 | {
|
301 | 311 | "data": {
|
302 | 312 | "application/vnd.jupyter.widget-view+json": {
|
303 |
| - "model_id": "1fa44296ea00459b8cbb22e56739117c", |
| 313 | + "model_id": "e47600c765b64b55aa6f93e9cf5d077e", |
304 | 314 | "version_major": 2,
|
305 | 315 | "version_minor": 0
|
306 | 316 | },
|
|
314 | 324 | {
|
315 | 325 | "data": {
|
316 | 326 | "text/plain": [
|
317 |
| - "CommitInfo(commit_url='https://huggingface.co/datasets/davidberenstein1957/structured-generation-information-extraction-vlms-openbmb-RLAIF-V-Dataset/commit/f72002df2d9aef403afeaf6e27f4407ddd82c89c', commit_message='Upload dataset', commit_description='', oid='f72002df2d9aef403afeaf6e27f4407ddd82c89c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/davidberenstein1957/structured-generation-information-extraction-vlms-openbmb-RLAIF-V-Dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='davidberenstein1957/structured-generation-information-extraction-vlms-openbmb-RLAIF-V-Dataset'), pr_revision=None, pr_num=None)" |
| 327 | + "CommitInfo(commit_url='https://huggingface.co/datasets/davidberenstein1957/structured-generation-information-extraction-vlms-openbmb-RLAIF-V-Dataset/commit/373d6a25e8301077773fc6a37899b1598cf6f8cd', commit_message='Upload dataset', commit_description='', oid='373d6a25e8301077773fc6a37899b1598cf6f8cd', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/davidberenstein1957/structured-generation-information-extraction-vlms-openbmb-RLAIF-V-Dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='davidberenstein1957/structured-generation-information-extraction-vlms-openbmb-RLAIF-V-Dataset'), pr_revision=None, pr_num=None)" |
318 | 328 | ]
|
319 | 329 | },
|
320 |
| - "execution_count": 18, |
| 330 | + "execution_count": 21, |
321 | 331 | "metadata": {},
|
322 | 332 | "output_type": "execute_result"
|
323 | 333 | }
|
|
0 commit comments