|
12 | 12 | },
|
13 | 13 | {
|
14 | 14 | "cell_type": "code",
|
15 |
| - "execution_count": null, |
| 15 | + "execution_count": 1, |
16 | 16 | "id": "e27b0473-4bda-47f0-b6ed-fd482eac1a13",
|
17 | 17 | "metadata": {},
|
18 | 18 | "outputs": [],
|
19 | 19 | "source": [
|
20 |
| - "from gpt_index import GPTTreeIndex, GPTSimpleKeywordTableIndex, GPTListIndex, SimpleDirectoryReader" |
| 20 | + "from gpt_index import (\n", |
| 21 | + " GPTSimpleVectorIndex, \n", |
| 22 | + " GPTSimpleKeywordTableIndex, \n", |
| 23 | + " GPTListIndex, \n", |
| 24 | + " SimpleDirectoryReader\n", |
| 25 | + ")" |
21 | 26 | ]
|
22 | 27 | },
|
23 | 28 | {
|
|
102 | 107 | "outputs": [],
|
103 | 108 | "source": [
|
104 | 109 | "# build NYC index\n",
|
105 |
| - "nyc_index = GPTTreeIndex(nyc_documents)" |
| 110 | + "nyc_index = GPTSimpleVectorIndex(nyc_documents)" |
106 | 111 | ]
|
107 | 112 | },
|
108 | 113 | {
|
|
112 | 117 | "metadata": {},
|
113 | 118 | "outputs": [],
|
114 | 119 | "source": [
|
115 |
| - "nyc_index.save_to_disk('../test_wiki/index.json')" |
| 120 | + "nyc_index.save_to_disk('index_nyc.json')" |
116 | 121 | ]
|
117 | 122 | },
|
118 | 123 | {
|
|
123 | 128 | "outputs": [],
|
124 | 129 | "source": [
|
125 | 130 | "# build essay index\n",
|
126 |
| - "essay_index = GPTTreeIndex(essay_documents)" |
| 131 | + "essay_index = GPTSimpleVectorIndex(essay_documents)" |
127 | 132 | ]
|
128 | 133 | },
|
129 | 134 | {
|
|
133 | 138 | "metadata": {},
|
134 | 139 | "outputs": [],
|
135 | 140 | "source": [
|
136 |
| - "essay_index.save_to_disk('../paul_graham_essay/index.json')" |
| 141 | + "essay_index.save_to_disk('index_pg.json')" |
137 | 142 | ]
|
138 | 143 | },
|
139 | 144 | {
|
|
147 | 152 | },
|
148 | 153 | {
|
149 | 154 | "cell_type": "code",
|
150 |
| - "execution_count": 2, |
| 155 | + "execution_count": 8, |
151 | 156 | "id": "98068ef8-aead-46e7-8dac-0d05b5a86e6a",
|
152 | 157 | "metadata": {},
|
153 | 158 | "outputs": [],
|
154 | 159 | "source": [
|
155 | 160 | "# try loading\n",
|
156 |
| - "nyc_index = GPTTreeIndex.load_from_disk('../test_wiki/index.json')\n", |
157 |
| - "essay_index = GPTTreeIndex.load_from_disk('../paul_graham_essay/index.json')" |
| 161 | + "nyc_index = GPTSimpleVectorIndex.load_from_disk('index_nyc.json')\n", |
| 162 | + "essay_index = GPTSimpleVectorIndex.load_from_disk('index_pg.json')" |
158 | 163 | ]
|
159 | 164 | },
|
160 | 165 | {
|
|
171 | 176 | },
|
172 | 177 | {
|
173 | 178 | "cell_type": "code",
|
174 |
| - "execution_count": 3, |
| 179 | + "execution_count": 9, |
175 | 180 | "id": "4149cbbd-7d0b-48c4-8c47-7d67ae0c55f0",
|
176 | 181 | "metadata": {},
|
177 | 182 | "outputs": [],
|
|
228 | 233 | },
|
229 | 234 | {
|
230 | 235 | "cell_type": "code",
|
231 |
| - "execution_count": 4, |
| 236 | + "execution_count": 10, |
232 | 237 | "id": "76c251ca-b06b-42e9-ac99-aa0a0a5187d4",
|
233 | 238 | "metadata": {},
|
234 | 239 | "outputs": [],
|
235 | 240 | "source": [
|
236 | 241 | "# set query config\n",
|
237 | 242 | "query_configs = [\n",
|
238 | 243 | " {\n",
|
239 |
| - " \"index_struct_type\": \"tree\",\n", |
| 244 | + " \"index_struct_type\": \"simple_dict\",\n", |
240 | 245 | " \"query_mode\": \"default\",\n",
|
241 | 246 | " \"query_kwargs\": {\n",
|
242 |
| - " \"child_branch_factor\": 2\n", |
| 247 | + " \"similarity_top_k\": 1\n", |
243 | 248 | " }\n",
|
244 | 249 | " },\n",
|
245 | 250 | " {\n",
|
|
260 | 265 | "keyword_table = GPTSimpleKeywordTableIndex([nyc_index, essay_index], max_keywords_per_chunk=50)"
|
261 | 266 | ]
|
262 | 267 | },
|
| 268 | + { |
| 269 | + "cell_type": "markdown", |
| 270 | + "id": "eebbc448-1e0b-402c-b37e-f93bfcc0bf4f", |
| 271 | + "metadata": {}, |
| 272 | + "source": [ |
| 273 | + "### Define Graph" |
| 274 | + ] |
| 275 | + }, |
| 276 | + { |
| 277 | + "cell_type": "code", |
| 278 | + "execution_count": 18, |
| 279 | + "id": "6d68750c-e5ae-481a-8b03-6173020c9bf3", |
| 280 | + "metadata": {}, |
| 281 | + "outputs": [], |
| 282 | + "source": [ |
| 283 | + "from gpt_index.composability import ComposableGraph" |
| 284 | + ] |
| 285 | + }, |
| 286 | + { |
| 287 | + "cell_type": "code", |
| 288 | + "execution_count": 19, |
| 289 | + "id": "822ada9f-fb43-472e-95ce-0036d508e528", |
| 290 | + "metadata": {}, |
| 291 | + "outputs": [], |
| 292 | + "source": [ |
| 293 | + "graph = ComposableGraph.build_from_index(keyword_table)" |
| 294 | + ] |
| 295 | + }, |
| 296 | + { |
| 297 | + "cell_type": "code", |
| 298 | + "execution_count": 32, |
| 299 | + "id": "ae127943-afac-48b4-b22d-84a37e553e4b", |
| 300 | + "metadata": {}, |
| 301 | + "outputs": [], |
| 302 | + "source": [ |
| 303 | + "# [optional] save to disk\n", |
| 304 | + "graph.save_to_disk(\"index_graph.json\")" |
| 305 | + ] |
| 306 | + }, |
| 307 | + { |
| 308 | + "cell_type": "code", |
| 309 | + "execution_count": 33, |
| 310 | + "id": "dca2b64b-9af1-456f-8dab-822bfdc5d0ac", |
| 311 | + "metadata": {}, |
| 312 | + "outputs": [], |
| 313 | + "source": [ |
| 314 | + "# [optional] load from disk\n", |
| 315 | + "graph = ComposableGraph.load_from_disk(\"index_graph.json\")" |
| 316 | + ] |
| 317 | + }, |
263 | 318 | {
|
264 | 319 | "cell_type": "code",
|
265 | 320 | "execution_count": null,
|
|
268 | 323 | "outputs": [],
|
269 | 324 | "source": [
|
270 | 325 | "# ask it a question about NYC \n",
|
271 |
| - "response = keyword_table.query(\n", |
| 326 | + "response = graph.query(\n", |
272 | 327 | " \"What is the climate of New York City like? How cold is it during the winter?\", \n",
|
273 |
| - " mode=\"recursive\", \n", |
274 |
| - " query_configs=query_configs\n", |
| 328 | + " query_configs=query_configs,\n", |
| 329 | + " verbose=True\n", |
275 | 330 | ")"
|
276 | 331 | ]
|
277 | 332 | },
|
278 | 333 | {
|
279 | 334 | "cell_type": "code",
|
280 |
| - "execution_count": 7, |
| 335 | + "execution_count": 27, |
281 | 336 | "id": "c0a43443-3e00-4e48-b3ab-f6369191d53a",
|
282 | 337 | "metadata": {},
|
283 | 338 | "outputs": [
|
284 | 339 | {
|
285 | 340 | "name": "stdout",
|
286 | 341 | "output_type": "stream",
|
287 | 342 | "text": [
|
288 |
| - "The climate of New York City is generally mild with hot and humid summers and cool to cold winters. The average temperature in the coldest winter month is 16°F (or -9°C). Nighttime temperatures can be especially cold due to the urban heat island effect. Temperatures can reach 0°F (-18°C) on rare occasions, with the coldest recorded wind chill being -37°F (-38°C). The city receives 49.5 inches (1,260 mm) of precipitation annually, which is relatively evenly spread throughout the year. Average winter snowfall between 1991 and 2020 has been 29.8 inches (76 cm); this varies considerably between years. Hurricanes and tropical storms are rare in the New York area, with the coldest month on record being January 1857, with a mean temperature of 19.6 °F (−6.9 °C). The warmest months on record are July 1825 and July 1999, both with a mean temperature of 81.4 °F (27.4 °C). The warmest years on record are 2012 and 2020, both with mean temperatures of 57.1 °F (13.9 °C). The coldest year is 1836, with a mean temperature of 47.\n" |
| 343 | + "\n", |
| 344 | + "\n", |
| 345 | + "New York City has a humid subtropical climate (Cfa) under the Köppen climate classification. Winters are typically chilly and damp, with temperatures usually dropping to 10 °F (−12 °C) several times per winter, yet can also reach 60 °F (16 °C) for several days even in the coldest winter month. The daily mean temperature in January, the area's coldest month, is 33.3 °F (0.7 °C). The city receives an average of 46.9 inches (1,194 mm) of rainfall annually, with the wettest month being August 2011, with 18.95 inches (481 mm) of rainfall. The snowiest month on record is February 2010, with 36.9 inches (94 cm) of snowfall. The snowiest season (Jul–Jun) on record is 1995–1996, with 75.6 inches (192 cm) of snowfall.\n" |
289 | 346 | ]
|
290 | 347 | }
|
291 | 348 | ],
|
|
295 | 352 | },
|
296 | 353 | {
|
297 | 354 | "cell_type": "code",
|
298 |
| - "execution_count": 8, |
| 355 | + "execution_count": 28, |
299 | 356 | "id": "c78bc3da-6bad-4998-9a81-90a3fa9200a9",
|
300 | 357 | "metadata": {},
|
301 | 358 | "outputs": [
|
302 | 359 | {
|
303 | 360 | "name": "stdout",
|
304 | 361 | "output_type": "stream",
|
305 | 362 | "text": [
|
306 |
| - ">Source (Doc id: 6eb00cc4-27e9-4ba7-a7b9-bbb2467116a7): \n", |
| 363 | + "> Source (Doc id: 4e8c9bbc-b42f-479f-8fb1-83d0b6198f1d): \n", |
307 | 364 | " New York, often called New York City or NYC, \n",
|
308 | 365 | " is the most populous city in the United St...\n",
|
309 | 366 | "\n",
|
310 |
| - ">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): °F (16 °C) for several days even in the coldest winter month. Spring and autumn are unpredictable...\n", |
311 |
| - "\n", |
312 |
| - ">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): in August.The city receives 49.5 inches (1,260 mm) of precipitation annually, which is relatively...\n", |
313 |
| - "\n", |
314 |
| - ">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): the National Hockey League, and Major League Soccer. The New York metropolitan area hosts the mos...\n", |
315 |
| - "\n", |
316 |
| - ">Source (Doc id: d4cf39c8-e10b-4552-9f6d-60bb37356769): any city in North America. New York City is the host of Climate Week NYC, the largest Climate Wee...\n" |
| 367 | + "> Source (Doc id: 77f3b3ea-93ab-49c8-b938-2bd3c870a602): has been altered substantially by human intervention, with considerable land reclamation along th...\n" |
317 | 368 | ]
|
318 | 369 | }
|
319 | 370 | ],
|
|
330 | 381 | "outputs": [],
|
331 | 382 | "source": [
|
332 | 383 | "# ask it a question about PG's essay\n",
|
333 |
| - "response = keyword_table.query(\n", |
| 384 | + "response = graph.query(\n", |
334 | 385 | " \"What did the author do growing up, before his time at Y Combinator?\", \n",
|
335 |
| - " mode=\"recursive\", \n", |
336 | 386 | " query_configs=query_configs\n",
|
337 | 387 | ")"
|
338 | 388 | ]
|
339 | 389 | },
|
340 | 390 | {
|
341 | 391 | "cell_type": "code",
|
342 |
| - "execution_count": 10, |
| 392 | + "execution_count": 30, |
343 | 393 | "id": "06dc71bb-882d-49f5-8566-69b0ea5019dd",
|
344 | 394 | "metadata": {},
|
345 | 395 | "outputs": [
|
346 | 396 | {
|
347 |
| - "data": { |
348 |
| - "text/plain": [ |
349 |
| - "'The author was a writer and programmer who studied art and worked on building a WYSIWYG site builder.'" |
350 |
| - ] |
351 |
| - }, |
352 |
| - "execution_count": 10, |
353 |
| - "metadata": {}, |
354 |
| - "output_type": "execute_result" |
| 397 | + "name": "stdout", |
| 398 | + "output_type": "stream", |
| 399 | + "text": [ |
| 400 | + "\n", |
| 401 | + "\n", |
| 402 | + "The author grew up in England and attended college in the United States. He studied computer science and art, and worked on a variety of projects, including writing essays, hacking, and working on a Lisp interpreter. He also worked on a startup called Viaweb, which was eventually acquired by Yahoo. He also worked on Interleaf, a high-end, special-purpose hardware and software company, and sought out signature styles at RISD. He also lived in a rent-stabilized apartment in New York, and worked on software projects that could be launched as soon as they were done.\n" |
| 403 | + ] |
355 | 404 | }
|
356 | 405 | ],
|
357 | 406 | "source": [
|
|
360 | 409 | },
|
361 | 410 | {
|
362 | 411 | "cell_type": "code",
|
363 |
| - "execution_count": null, |
| 412 | + "execution_count": 31, |
364 | 413 | "id": "b0894565-2b2c-4987-a891-17ba44d775b5",
|
365 | 414 | "metadata": {},
|
366 |
| - "outputs": [], |
| 415 | + "outputs": [ |
| 416 | + { |
| 417 | + "name": "stdout", |
| 418 | + "output_type": "stream", |
| 419 | + "text": [ |
| 420 | + "> Source (Doc id: ae92ab9a-c6ed-48c3-b333-d459908dec3f): \n", |
| 421 | + " Author: Paul Graham. \n", |
| 422 | + " The author grew up painting and writing essays. \n", |
| 423 | + " He wrote a bo...\n", |
| 424 | + "\n", |
| 425 | + "> Source (Doc id: a63ad7c4-87f2-42fd-a32b-f682a022af90): get their initial set of customers almost entirely from among their batchmates.\n", |
| 426 | + "\n", |
| 427 | + "I had not origin...\n" |
| 428 | + ] |
| 429 | + } |
| 430 | + ], |
367 | 431 | "source": [
|
368 | 432 | "# Get source of response\n",
|
369 | 433 | "print(response.get_formatted_sources())"
|
|
0 commit comments