refactor: load() logging should be info instead of debug

lpm0073 · lpm0073 · commit 2eaeba3a0e50 · 2023-12-02T13:26:19.000-06:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,10 @@
 ## [1.1.3](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.2...v1.1.3) (2023-12-02)
 
-
 ### Bug Fixes
 
-* add langchain-experimental for yt example ([f9d6d6d](https://github.com/lpm0073/hybrid-search-retriever/commit/f9d6d6d0b11ff9c1f06faf7eb69511bc5702066d))
-* correct type error with DEBUG_MODE ([a96bdfd](https://github.com/lpm0073/hybrid-search-retriever/commit/a96bdfdb5a0b015740110e02f9f9b06917cd31c7))
-* move retriever results to system_message ([203c8b3](https://github.com/lpm0073/hybrid-search-retriever/commit/203c8b300cda156ac44a0c6e02510c2ab6a2b074))
+- add langchain-experimental for yt example ([f9d6d6d](https://github.com/lpm0073/hybrid-search-retriever/commit/f9d6d6d0b11ff9c1f06faf7eb69511bc5702066d))
+- correct type error with DEBUG_MODE ([a96bdfd](https://github.com/lpm0073/hybrid-search-retriever/commit/a96bdfdb5a0b015740110e02f9f9b06917cd31c7))
+- move retriever results to system_message ([203c8b3](https://github.com/lpm0073/hybrid-search-retriever/commit/203c8b300cda156ac44a0c6e02510c2ab6a2b074))
 
 ## [1.1.2](https://github.com/lpm0073/hybrid-search-retriever/compare/v1.1.1...v1.1.2) (2023-12-01)
 
diff --git a/models/hybrid_search_retreiver.py b/models/hybrid_search_retreiver.py
@@ -73,7 +73,7 @@ def create_documents(self, texts):
 
 
 class HybridSearchRetriever:
-    """Hybrid Search Retriever (OpenAI + Pinecone)"""
+    """Hybrid Search Retriever"""
 
     _chat: ChatOpenAI = None
     _openai_embeddings: OpenAIEmbeddings = None
@@ -182,16 +182,16 @@ def load(self, filepath: str):
         https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
         """
         try:
-            logging.debug("Deleting index...")
+            logging.info("Deleting index...")
             pinecone.delete_index(Config.PINECONE_INDEX_NAME)
         except pinecone.exceptions.PineconeException:
-            logging.debug("Index does not exist. Continuing...")
+            logging.info("Index does not exist. Continuing...")
 
         metadata_config = {
             "indexed": [Config.PINECONE_VECTORSTORE_TEXT_KEY, "lc_type"],
             "context": ["lc_text"],
         }
-        logging.debug("Creating index. This may take a few minutes...")
+        logging.info("Creating index. This may take a few minutes...")
         pinecone.create_index(
             Config.PINECONE_INDEX_NAME,
             dimension=Config.PINECONE_DIMENSIONS,
@@ -204,23 +204,23 @@ def load(self, filepath: str):
         for pdf_file in pdf_files:
             i += 1
             j = len(pdf_files)
-            logging.debug("Loading PDF %s of %s: %s", i, j, pdf_file)
+            logging.info("Loading PDF %s of %s: %s", i, j, pdf_file)
             loader = PyPDFLoader(file_path=pdf_file)
             docs = loader.load()
             k = 0
             for doc in docs:
                 k += 1
-                logging.debug(k * "-", end="\r")
+                logging.info(k * "-", end="\r")
                 documents = self.text_splitter.create_documents([doc.page_content])
                 document_texts = [doc.page_content for doc in documents]
                 embeddings = self.openai_embeddings.embed_documents(document_texts)
                 self.vector_store.add_documents(documents=documents, embeddings=embeddings)
 
-        logging.debug("Finished loading PDFs")
+        logging.info("Finished loading PDFs")
 
     def rag(self, human_message: Union[str, HumanMessage]):
         """
-        Embedded prompt.
+        Retrieval Augmented Generation prompt.
         1. Retrieve human message prompt: Given a user input, relevant splits are retrieved
            from storage using a Retriever.
         2. Generate: A ChatModel / LLM produces an answer using a prompt that includes
@@ -265,9 +265,10 @@ def rag(self, human_message: Union[str, HumanMessage]):
         response = self.cached_chat_request(system_message=system_message, human_message=human_message)
 
         logging.debug("------------------------------------------------------")
-        logging.debug("Retrieved %i related documents from Pinecone", len(documents))
-        logging.debug("System messages contains %i words", len(system_message.content.split()))
-        logging.debug("Prompt: %s", system_message.content)
-        logging.debug("Response:")
+        logging.debug("rag() Retrieval Augmented Generation prompt")
+        logging.debug("Diagnostic information:")
+        logging.debug("  Retrieved %i related documents from Pinecone", len(documents))
+        logging.debug("  System messages contains %i words", len(system_message.content.split()))
+        logging.debug("  Prompt: %s", system_message.content)
         logging.debug("------------------------------------------------------")
         return response.content