@@ -59,9 +59,15 @@ class SalesSupportModel:
59
59
)
60
60
61
61
# embeddings
62
- texts_splitter_results : List [Document ]
62
+ text_splitter = RecursiveCharacterTextSplitter (
63
+ chunk_size = 100 ,
64
+ chunk_overlap = 0 ,
65
+ )
63
66
openai_embedding = OpenAIEmbeddings ()
64
- query_result : List [float ]
67
+ pinecone_search = Pinecone .from_existing_index (
68
+ Credentials .PINECONE_INDEX_NAME ,
69
+ embedding = openai_embedding ,
70
+ )
65
71
66
72
def cached_chat_request (self , system_message : str , human_message : str ) -> SystemMessage :
67
73
"""Cached chat request."""
@@ -87,23 +93,6 @@ def split_text(self, text: str) -> List[Document]:
87
93
retval = text_splitter .create_documents ([text ])
88
94
return retval
89
95
90
- def embed (self , text : str ) -> List [float ]:
91
- """Embed."""
92
- text_splitter = RecursiveCharacterTextSplitter (
93
- chunk_size = 100 ,
94
- chunk_overlap = 0 ,
95
- )
96
- texts_splitter_results = text_splitter .create_documents ([text ])
97
- embedding = texts_splitter_results [0 ].page_content
98
- # pylint: disable=no-member
99
- self .openai_embedding .embed_query (embedding )
100
-
101
- Pinecone .from_documents (
102
- documents = texts_splitter_results ,
103
- embedding = self .openai_embedding ,
104
- index_name = Credentials .PINECONE_INDEX_NAME ,
105
- )
106
-
107
96
def load (self , filepath : str ):
108
97
"""
109
98
Embed PDF.
@@ -118,14 +107,20 @@ def load(self, filepath: str):
118
107
for pdf_file in pdf_files :
119
108
i += 1
120
109
j = len (pdf_files )
121
- print (f"Loading PDF { i } of { j } : " )
110
+ print (f"Loading PDF { i } of { j } : " , pdf_file )
122
111
loader = PyPDFLoader (file_path = pdf_file )
123
112
docs = loader .load ()
124
113
k = 0
125
114
for doc in docs :
126
115
k += 1
127
116
print (k * "-" , end = "\r " )
128
- self .embed (doc .page_content )
117
+ texts_splitter_results = self .text_splitter .create_documents ([doc .page_content ])
118
+ self .pinecone_search .from_existing_index (
119
+ index_name = Credentials .PINECONE_INDEX_NAME ,
120
+ embedding = self .openai_embedding ,
121
+ text_key = texts_splitter_results ,
122
+ )
123
+
129
124
print ("Finished loading PDFs" )
130
125
131
126
def rag (self , prompt : str ):
@@ -142,11 +137,7 @@ def format_docs(docs):
142
137
"""Format docs."""
143
138
return "\n \n " .join (doc .page_content for doc in docs )
144
139
145
- pinecone_search = Pinecone .from_existing_index (
146
- Credentials .PINECONE_INDEX_NAME ,
147
- embedding = self .openai_embedding ,
148
- )
149
- retriever = pinecone_search .as_retriever ()
140
+ retriever = self .pinecone_search .as_retriever ()
150
141
151
142
# Use the retriever to get relevant documents
152
143
documents = retriever .get_relevant_documents (query = prompt )
0 commit comments