18
18
19
19
# document loading
20
20
import glob
21
+ import logging
21
22
import os
22
23
import textwrap
23
24
52
53
DEFAULT_MODEL_NAME = Config .OPENAI_PROMPT_MODEL_NAME
53
54
pinecone .init (api_key = Credentials .PINECONE_API_KEY , environment = Credentials .PINECONE_ENVIRONMENT )
54
55
set_llm_cache (InMemoryCache ())
56
+ logging .basicConfig (level = logging .DEBUG if Config .DEBUG_MODE else logging .INFO )
55
57
56
58
57
59
class TextSplitter :
@@ -123,16 +125,16 @@ def load(self, filepath: str):
123
125
https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
124
126
"""
125
127
try :
126
- print ("Deleting index..." )
128
+ logging . debug ("Deleting index..." )
127
129
pinecone .delete_index (Credentials .PINECONE_INDEX_NAME )
128
130
except pinecone .exceptions .PineconeException :
129
- print ("Index does not exist. Continuing..." )
131
+ logging . debug ("Index does not exist. Continuing..." )
130
132
131
133
metadata_config = {
132
134
"indexed" : ["lc_id" , "lc_type" ],
133
135
"context" : ["lc_text" ],
134
136
}
135
- print ("Creating index. This may take a few minutes..." )
137
+ logging . debug ("Creating index. This may take a few minutes..." )
136
138
pinecone .create_index (
137
139
Credentials .PINECONE_INDEX_NAME , dimension = 1536 , metric = "dotproduct" , metadata_config = metadata_config
138
140
)
@@ -142,19 +144,19 @@ def load(self, filepath: str):
142
144
for pdf_file in pdf_files :
143
145
i += 1
144
146
j = len (pdf_files )
145
- print ( f "Loading PDF { i } of { j } : " , pdf_file )
147
+ logging . debug ( "Loading PDF %s of %s: %s" , i , j , pdf_file )
146
148
loader = PyPDFLoader (file_path = pdf_file )
147
149
docs = loader .load ()
148
150
k = 0
149
151
for doc in docs :
150
152
k += 1
151
- print (k * "-" , end = "\r " )
153
+ logging . debug (k * "-" , end = "\r " )
152
154
documents = self .text_splitter .create_documents ([doc .page_content ])
153
155
document_texts = [doc .page_content for doc in documents ]
154
156
embeddings = self .openai_embeddings .embed_documents (document_texts )
155
157
self .vector_store .add_documents (documents = documents , embeddings = embeddings )
156
158
157
- print ("Finished loading PDFs" )
159
+ logging . debug ("Finished loading PDFs" )
158
160
159
161
def rag (self , prompt : str ):
160
162
"""
@@ -176,7 +178,7 @@ def rag(self, prompt: str):
176
178
embeddings = self .openai_embeddings , sparse_encoder = self .bm25_encoder , index = self .pinecone_index
177
179
)
178
180
documents = retriever .get_relevant_documents (query = prompt )
179
- print ( f "Retrieved { len ( documents ) } related documents from Pinecone" )
181
+ logging . debug ( "Retrieved %i related documents from Pinecone" , len ( documents ) )
180
182
181
183
# Extract the text from the documents
182
184
document_texts = [doc .page_content for doc in documents ]
@@ -191,14 +193,14 @@ def rag(self, prompt: str):
191
193
# Create a prompt that includes the document texts
192
194
prompt_with_relevant_documents = f"{ prompt + leader } { '. ' .join (document_texts )} "
193
195
194
- print ( f "Prompt contains { len (prompt_with_relevant_documents .split ())} words" )
195
- print ("Prompt:" , prompt_with_relevant_documents )
196
+ logging . debug ( "Prompt contains %i words" , len (prompt_with_relevant_documents .split ()))
197
+ logging . debug ("Prompt: %s " , prompt_with_relevant_documents )
196
198
197
199
# Get a response from the GPT-3.5-turbo model
198
200
response = self .cached_chat_request (
199
201
system_message = "You are a helpful assistant." , human_message = prompt_with_relevant_documents
200
202
)
201
203
202
- print ("Response:" )
203
- print ("------------------------------------------------------" )
204
+ logging . debug ("Response:" )
205
+ logging . debug ("------------------------------------------------------" )
204
206
return response
0 commit comments