Skip to content

Commit b834e8c

Browse files
jerryjliuJerry Liu
and
Jerry Liu
authored
refactors: index to query mapping, composability (#328)
Co-authored-by: Jerry Liu <jerry@robustintelligence.com>
1 parent 64c6480 commit b834e8c

File tree

31 files changed

+648
-262
lines changed

31 files changed

+648
-262
lines changed

docs/how_to/composability.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,28 @@ list_index = GPTListIndex([index1, index2, index3])
6060
![](/_static/composability/diagram.png)
6161

6262

63+
### Defining a Graph Structure
64+
65+
66+
Finally, we define a `ComposableGraph` to "wrap" the composed index graph.
67+
We can do this by simply feeding in the top-level index.
68+
This wrapper allows us to query, save, and load the graph to/from disk.
69+
70+
```python
71+
72+
from gpt_index.composability import ComposableGraph
73+
74+
graph = ComposableGraph(list_index)
75+
76+
# [Optional] save to disk
77+
graph.save_to_disk("save_path.json")
78+
79+
# [Optional] load from disk
80+
graph = ComposableGraph.load_from_disk("save_path.json")
81+
82+
```
83+
84+
6385
### Querying the Top-Level Index
6486

6587
During a query, we would start with the top-level list index. Each node in the list corresponds to an underlying tree index.
@@ -86,7 +108,7 @@ query_configs = [
86108
},
87109
...
88110
]
89-
response = list_index.query("Where did the author grow up?", mode="recursive", query_configs=query_configs)
111+
response = graph.query("Where did the author grow up?", query_configs=query_configs)
90112
```
91113

92114
![](/_static/composability/diagram_q1.png)

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ At the core of GPT Index is a **data structure**. Instead of relying on world kn
7878

7979
reference/indices.rst
8080
reference/query.rst
81+
reference/composability.rst
8182
reference/readers.rst
8283
reference/prompts.rst
8384
reference/llm_predictor.rst

docs/reference/composability.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
.. _Ref-Composability:
2+
3+
Composability
4+
=============
5+
6+
Below we show the API reference for composable data structures.
7+
8+
.. automodule:: gpt_index.composability
9+
:members:
10+
:inherited-members:

examples/composable_indices/ComposableIndices.ipynb

Lines changed: 104 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,17 @@
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": null,
15+
"execution_count": 1,
1616
"id": "e27b0473-4bda-47f0-b6ed-fd482eac1a13",
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
20-
"from gpt_index import GPTTreeIndex, GPTSimpleKeywordTableIndex, GPTListIndex, SimpleDirectoryReader"
20+
"from gpt_index import (\n",
21+
" GPTSimpleVectorIndex, \n",
22+
" GPTSimpleKeywordTableIndex, \n",
23+
" GPTListIndex, \n",
24+
" SimpleDirectoryReader\n",
25+
")"
2126
]
2227
},
2328
{
@@ -102,7 +107,7 @@
102107
"outputs": [],
103108
"source": [
104109
"# build NYC index\n",
105-
"nyc_index = GPTTreeIndex(nyc_documents)"
110+
"nyc_index = GPTSimpleVectorIndex(nyc_documents)"
106111
]
107112
},
108113
{
@@ -112,7 +117,7 @@
112117
"metadata": {},
113118
"outputs": [],
114119
"source": [
115-
"nyc_index.save_to_disk('../test_wiki/index.json')"
120+
"nyc_index.save_to_disk('index_nyc.json')"
116121
]
117122
},
118123
{
@@ -123,7 +128,7 @@
123128
"outputs": [],
124129
"source": [
125130
"# build essay index\n",
126-
"essay_index = GPTTreeIndex(essay_documents)"
131+
"essay_index = GPTSimpleVectorIndex(essay_documents)"
127132
]
128133
},
129134
{
@@ -133,7 +138,7 @@
133138
"metadata": {},
134139
"outputs": [],
135140
"source": [
136-
"essay_index.save_to_disk('../paul_graham_essay/index.json')"
141+
"essay_index.save_to_disk('index_pg.json')"
137142
]
138143
},
139144
{
@@ -147,14 +152,14 @@
147152
},
148153
{
149154
"cell_type": "code",
150-
"execution_count": 2,
155+
"execution_count": 8,
151156
"id": "98068ef8-aead-46e7-8dac-0d05b5a86e6a",
152157
"metadata": {},
153158
"outputs": [],
154159
"source": [
155160
"# try loading\n",
156-
"nyc_index = GPTTreeIndex.load_from_disk('../test_wiki/index.json')\n",
157-
"essay_index = GPTTreeIndex.load_from_disk('../paul_graham_essay/index.json')"
161+
"nyc_index = GPTSimpleVectorIndex.load_from_disk('index_nyc.json')\n",
162+
"essay_index = GPTSimpleVectorIndex.load_from_disk('index_pg.json')"
158163
]
159164
},
160165
{
@@ -171,7 +176,7 @@
171176
},
172177
{
173178
"cell_type": "code",
174-
"execution_count": 3,
179+
"execution_count": 9,
175180
"id": "4149cbbd-7d0b-48c4-8c47-7d67ae0c55f0",
176181
"metadata": {},
177182
"outputs": [],
@@ -228,18 +233,18 @@
228233
},
229234
{
230235
"cell_type": "code",
231-
"execution_count": 4,
236+
"execution_count": 10,
232237
"id": "76c251ca-b06b-42e9-ac99-aa0a0a5187d4",
233238
"metadata": {},
234239
"outputs": [],
235240
"source": [
236241
"# set query config\n",
237242
"query_configs = [\n",
238243
" {\n",
239-
" \"index_struct_type\": \"tree\",\n",
244+
" \"index_struct_type\": \"simple_dict\",\n",
240245
" \"query_mode\": \"default\",\n",
241246
" \"query_kwargs\": {\n",
242-
" \"child_branch_factor\": 2\n",
247+
" \"similarity_top_k\": 1\n",
243248
" }\n",
244249
" },\n",
245250
" {\n",
@@ -260,6 +265,56 @@
260265
"keyword_table = GPTSimpleKeywordTableIndex([nyc_index, essay_index], max_keywords_per_chunk=50)"
261266
]
262267
},
268+
{
269+
"cell_type": "markdown",
270+
"id": "eebbc448-1e0b-402c-b37e-f93bfcc0bf4f",
271+
"metadata": {},
272+
"source": [
273+
"### Define Graph"
274+
]
275+
},
276+
{
277+
"cell_type": "code",
278+
"execution_count": 18,
279+
"id": "6d68750c-e5ae-481a-8b03-6173020c9bf3",
280+
"metadata": {},
281+
"outputs": [],
282+
"source": [
283+
"from gpt_index.composability import ComposableGraph"
284+
]
285+
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": 19,
289+
"id": "822ada9f-fb43-472e-95ce-0036d508e528",
290+
"metadata": {},
291+
"outputs": [],
292+
"source": [
293+
"graph = ComposableGraph.build_from_index(keyword_table)"
294+
]
295+
},
296+
{
297+
"cell_type": "code",
298+
"execution_count": 32,
299+
"id": "ae127943-afac-48b4-b22d-84a37e553e4b",
300+
"metadata": {},
301+
"outputs": [],
302+
"source": [
303+
"# [optional] save to disk\n",
304+
"graph.save_to_disk(\"index_graph.json\")"
305+
]
306+
},
307+
{
308+
"cell_type": "code",
309+
"execution_count": 33,
310+
"id": "dca2b64b-9af1-456f-8dab-822bfdc5d0ac",
311+
"metadata": {},
312+
"outputs": [],
313+
"source": [
314+
"# [optional] load from disk\n",
315+
"graph = ComposableGraph.load_from_disk(\"index_graph.json\")"
316+
]
317+
},
263318
{
264319
"cell_type": "code",
265320
"execution_count": null,
@@ -268,24 +323,26 @@
268323
"outputs": [],
269324
"source": [
270325
"# ask it a question about NYC \n",
271-
"response = keyword_table.query(\n",
326+
"response = graph.query(\n",
272327
" \"What is the climate of New York City like? How cold is it during the winter?\", \n",
273-
" mode=\"recursive\", \n",
274-
" query_configs=query_configs\n",
328+
" query_configs=query_configs,\n",
329+
" verbose=True\n",
275330
")"
276331
]
277332
},
278333
{
279334
"cell_type": "code",
280-
"execution_count": 7,
335+
"execution_count": 27,
281336
"id": "c0a43443-3e00-4e48-b3ab-f6369191d53a",
282337
"metadata": {},
283338
"outputs": [
284339
{
285340
"name": "stdout",
286341
"output_type": "stream",
287342
"text": [
288-
"The climate of New York City is generally mild with hot and humid summers and cool to cold winters. The average temperature in the coldest winter month is 16°F (or -9°C). Nighttime temperatures can be especially cold due to the urban heat island effect. Temperatures can reach 0°F (-18°C) on rare occasions, with the coldest recorded wind chill being -37°F (-38°C). The city receives 49.5 inches (1,260 mm) of precipitation annually, which is relatively evenly spread throughout the year. Average winter snowfall between 1991 and 2020 has been 29.8 inches (76 cm); this varies considerably between years. Hurricanes and tropical storms are rare in the New York area, with the coldest month on record being January 1857, with a mean temperature of 19.6 °F (−6.9 °C). The warmest months on record are July 1825 and July 1999, both with a mean temperature of 81.4 °F (27.4 °C). The warmest years on record are 2012 and 2020, both with mean temperatures of 57.1 °F (13.9 °C). The coldest year is 1836, with a mean temperature of 47.\n"
343+
"\n",
344+
"\n",
345+
"New York City has a humid subtropical climate (Cfa) under the Köppen climate classification. Winters are typically chilly and damp, with temperatures usually dropping to 10 °F (−12 °C) several times per winter, yet can also reach 60 °F (16 °C) for several days even in the coldest winter month. The daily mean temperature in January, the area's coldest month, is 33.3 °F (0.7 °C). The city receives an average of 46.9 inches (1,194 mm) of rainfall annually, with the wettest month being August 2011, with 18.95 inches (481 mm) of rainfall. The snowiest month on record is February 2010, with 36.9 inches (94 cm) of snowfall. The snowiest season (Jul–Jun) on record is 1995–1996, with 75.6 inches (192 cm) of snowfall.\n"
289346
]
290347
}
291348
],
@@ -295,25 +352,19 @@
295352
},
296353
{
297354
"cell_type": "code",
298-
"execution_count": 8,
355+
"execution_count": 28,
299356
"id": "c78bc3da-6bad-4998-9a81-90a3fa9200a9",
300357
"metadata": {},
301358
"outputs": [
302359
{
303360
"name": "stdout",
304361
"output_type": "stream",
305362
"text": [
306-
">Source (Doc id: 6eb00cc4-27e9-4ba7-a7b9-bbb2467116a7): \n",
363+
"> Source (Doc id: 4e8c9bbc-b42f-479f-8fb1-83d0b6198f1d): \n",
307364
" New York, often called New York City or NYC, \n",
308365
" is the most populous city in the United St...\n",
309366
"\n",
310-
">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): °F (16 °C) for several days even in the coldest winter month. Spring and autumn are unpredictable...\n",
311-
"\n",
312-
">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): in August.The city receives 49.5 inches (1,260 mm) of precipitation annually, which is relatively...\n",
313-
"\n",
314-
">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): the National Hockey League, and Major League Soccer. The New York metropolitan area hosts the mos...\n",
315-
"\n",
316-
">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): any city in North America. New York City is the host of Climate Week NYC, the largest Climate Wee...\n"
367+
"> Source (Doc id: 77f3b3ea-93ab-49c8-b938-2bd3c870a602): has been altered substantially by human intervention, with considerable land reclamation along th...\n"
317368
]
318369
}
319370
],
@@ -330,28 +381,26 @@
330381
"outputs": [],
331382
"source": [
332383
"# ask it a question about PG's essay\n",
333-
"response = keyword_table.query(\n",
384+
"response = graph.query(\n",
334385
" \"What did the author do growing up, before his time at Y Combinator?\", \n",
335-
" mode=\"recursive\", \n",
336386
" query_configs=query_configs\n",
337387
")"
338388
]
339389
},
340390
{
341391
"cell_type": "code",
342-
"execution_count": 10,
392+
"execution_count": 30,
343393
"id": "06dc71bb-882d-49f5-8566-69b0ea5019dd",
344394
"metadata": {},
345395
"outputs": [
346396
{
347-
"data": {
348-
"text/plain": [
349-
"'The author was a writer and programmer who studied art and worked on building a WYSIWYG site builder.'"
350-
]
351-
},
352-
"execution_count": 10,
353-
"metadata": {},
354-
"output_type": "execute_result"
397+
"name": "stdout",
398+
"output_type": "stream",
399+
"text": [
400+
"\n",
401+
"\n",
402+
"The author grew up in England and attended college in the United States. He studied computer science and art, and worked on a variety of projects, including writing essays, hacking, and working on a Lisp interpreter. He also worked on a startup called Viaweb, which was eventually acquired by Yahoo. He also worked on Interleaf, a high-end, special-purpose hardware and software company, and sought out signature styles at RISD. He also lived in a rent-stabilized apartment in New York, and worked on software projects that could be launched as soon as they were done.\n"
403+
]
355404
}
356405
],
357406
"source": [
@@ -360,10 +409,25 @@
360409
},
361410
{
362411
"cell_type": "code",
363-
"execution_count": null,
412+
"execution_count": 31,
364413
"id": "b0894565-2b2c-4987-a891-17ba44d775b5",
365414
"metadata": {},
366-
"outputs": [],
415+
"outputs": [
416+
{
417+
"name": "stdout",
418+
"output_type": "stream",
419+
"text": [
420+
"> Source (Doc id: ae92ab9a-c6ed-48c3-b333-d459908dec3f): \n",
421+
" Author: Paul Graham. \n",
422+
" The author grew up painting and writing essays. \n",
423+
" He wrote a bo...\n",
424+
"\n",
425+
"> Source (Doc id: a63ad7c4-87f2-42fd-a32b-f682a022af90): get their initial set of customers almost entirely from among their batchmates.\n",
426+
"\n",
427+
"I had not origin...\n"
428+
]
429+
}
430+
],
367431
"source": [
368432
"# Get source of response\n",
369433
"print(response.get_formatted_sources())"

examples/composable_indices/index_graph.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

examples/composable_indices/index_nyc.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

examples/composable_indices/index_pg.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

gpt_index/composability/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""This module contains all classes used for composing graphs over indices."""
2+
3+
4+
from gpt_index.composability.graph import ComposableGraph
5+
6+
__all__ = ["ComposableGraph"]

0 commit comments

Comments
 (0)