54
54
# initializations
55
55
###############################################################################
56
56
DEFAULT_MODEL_NAME = Config .OPENAI_PROMPT_MODEL_NAME
57
- pinecone .init (api_key = Credentials .PINECONE_API_KEY , environment = Credentials .PINECONE_ENVIRONMENT )
57
+ pinecone .init (api_key = Credentials .PINECONE_API_KEY , environment = Config .PINECONE_ENVIRONMENT )
58
58
set_llm_cache (InMemoryCache ())
59
59
logging .basicConfig (level = logging .DEBUG if Config .DEBUG_MODE else logging .INFO )
60
60
@@ -92,8 +92,10 @@ class HybridSearchRetriever:
92
92
openai_embeddings = OpenAIEmbeddings (
93
93
api_key = Credentials .OPENAI_API_KEY , organization = Credentials .OPENAI_API_ORGANIZATION
94
94
)
95
- pinecone_index = pinecone .Index (index_name = Credentials .PINECONE_INDEX_NAME )
96
- vector_store = Pinecone (index = pinecone_index , embedding = openai_embeddings , text_key = "lc_id" )
95
+ pinecone_index = pinecone .Index (index_name = Config .PINECONE_INDEX_NAME )
96
+ vector_store = Pinecone (
97
+ index = pinecone_index , embedding = openai_embeddings , text_key = Config .PINECONE_VECTORSTORE_TEXT_KEY
98
+ )
97
99
98
100
text_splitter = TextSplitter ()
99
101
bm25_encoder = BM25Encoder ().default ()
@@ -135,17 +137,17 @@ def load(self, filepath: str):
135
137
"""
136
138
try :
137
139
logging .debug ("Deleting index..." )
138
- pinecone .delete_index (Credentials .PINECONE_INDEX_NAME )
140
+ pinecone .delete_index (Config .PINECONE_INDEX_NAME )
139
141
except pinecone .exceptions .PineconeException :
140
142
logging .debug ("Index does not exist. Continuing..." )
141
143
142
144
metadata_config = {
143
- "indexed" : ["lc_id" , "lc_type" ],
145
+ "indexed" : [Config . PINECONE_VECTORSTORE_TEXT_KEY , "lc_type" ],
144
146
"context" : ["lc_text" ],
145
147
}
146
148
logging .debug ("Creating index. This may take a few minutes..." )
147
149
pinecone .create_index (
148
- Credentials .PINECONE_INDEX_NAME , dimension = 1536 , metric = "dotproduct" , metadata_config = metadata_config
150
+ Config .PINECONE_INDEX_NAME , dimension = 1536 , metric = "dotproduct" , metadata_config = metadata_config
149
151
)
150
152
151
153
pdf_files = glob .glob (os .path .join (filepath , "*.pdf" ))
@@ -187,11 +189,13 @@ def rag(self, human_message: Union[str, HumanMessage]):
187
189
logging .debug ("Converting human_message to HumanMessage" )
188
190
human_message = HumanMessage (content = human_message )
189
191
192
+ # ---------------------------------------------------------------------
193
+ # 1.) Retrieve relevant documents from Pinecone vector database
194
+ # ---------------------------------------------------------------------
190
195
retriever = PineconeHybridSearchRetriever (
191
196
embeddings = self .openai_embeddings , sparse_encoder = self .bm25_encoder , index = self .pinecone_index
192
197
)
193
198
documents = retriever .get_relevant_documents (query = human_message .content )
194
- logging .debug ("Retrieved %i related documents from Pinecone" , len (documents ))
195
199
196
200
# Extract the text from the documents
197
201
document_texts = [doc .page_content for doc in documents ]
@@ -202,13 +206,19 @@ def rag(self, human_message: Union[str, HumanMessage]):
202
206
into your responses:\n \n
203
207
"""
204
208
)
205
- system_message = f"{ leader } { '. ' .join (document_texts )} "
209
+ system_message_content = f"{ leader } { '. ' .join (document_texts )} "
210
+ system_message = SystemMessage (content = system_message_content )
211
+ # ---------------------------------------------------------------------
212
+ # finished with hybrid search setup
213
+ # ---------------------------------------------------------------------
206
214
207
- logging .debug ("System messages contains %i words" , len (system_message .split ()))
208
- logging .debug ("Prompt: %s" , system_message )
209
- system_message = SystemMessage (content = system_message )
215
+ # 2.) get a response from the chat model
210
216
response = self .cached_chat_request (system_message = system_message , human_message = human_message )
211
217
218
+ logging .debug ("------------------------------------------------------" )
219
+ logging .debug ("Retrieved %i related documents from Pinecone" , len (documents ))
220
+ logging .debug ("System messages contains %i words" , len (system_message .content .split ()))
221
+ logging .debug ("Prompt: %s" , system_message .content )
212
222
logging .debug ("Response:" )
213
223
logging .debug ("------------------------------------------------------" )
214
224
return response .content
0 commit comments