|
7 | 7 | https://python.langchain.com/docs/integrations/retrievers/pinecone_hybrid_search
|
8 | 8 | """
|
9 | 9 |
|
| 10 | +# document loading |
10 | 11 | import glob
|
11 | 12 | import os
|
12 | 13 | import textwrap
|
13 |
| -from typing import List # ClassVar |
| 14 | +from typing import List |
14 | 15 |
|
15 | 16 | # pinecone integration
|
16 | 17 | import pinecone
|
17 | 18 | from langchain.cache import InMemoryCache
|
18 |
| - |
19 |
| -# prompting and chat |
20 | 19 | from langchain.chat_models import ChatOpenAI
|
21 |
| - |
22 |
| -# document loading |
23 | 20 | from langchain.document_loaders import PyPDFLoader
|
24 | 21 |
|
25 | 22 | # embedding
|
26 | 23 | from langchain.embeddings import OpenAIEmbeddings
|
27 |
| - |
28 |
| -# vector database |
29 | 24 | from langchain.globals import set_llm_cache
|
| 25 | + |
| 26 | +# prompting and chat |
30 | 27 | from langchain.llms.openai import OpenAI
|
31 | 28 | from langchain.prompts import PromptTemplate
|
| 29 | + |
| 30 | +# hybrid search capability |
32 | 31 | from langchain.retrievers import PineconeHybridSearchRetriever
|
33 | 32 | from langchain.schema import HumanMessage, SystemMessage
|
34 | 33 | from langchain.text_splitter import Document
|
|
39 | 38 | from models.const import Credentials
|
40 | 39 |
|
41 | 40 |
|
42 |
| -# from pydantic import BaseModel, ConfigDict, Field |
43 |
| - |
44 |
| - |
45 | 41 | ###############################################################################
|
46 | 42 | # initializations
|
47 | 43 | ###############################################################################
|
@@ -107,11 +103,6 @@ def prompt_with_template(self, prompt: PromptTemplate, concept: str, model: str
|
107 | 103 | retval = llm(prompt.format(concept=concept))
|
108 | 104 | return retval
|
109 | 105 |
|
110 |
| - def split_text(self, text: str) -> List[Document]: |
111 |
| - """Split text. Leaving this here for now, since it exposes the return type.""" |
112 |
| - retval = self.text_splitter.create_documents([text]) |
113 |
| - return retval |
114 |
| - |
115 | 106 | def fit_tf_idf_values(self, corpus: List[str]):
|
116 | 107 | """Fit TF-IDF values.
|
117 | 108 | 1. Fit the BM25 encoder on the corpus
|
|
0 commit comments