Merge pull request #15 from lpm0073/next

lpm0073 · web-flow · commit 80a0897c0b5b · 2023-12-01T19:16:12.000-06:00
refactor: switch print statements to logging.debug()
diff --git a/models/const.py b/models/const.py
@@ -21,13 +21,15 @@
     OPENAI_CHAT_TEMPERATURE = float(os.environ.get("OPENAI_CHAT_TEMPERATURE", 0.0))
     OPENAI_CHAT_MAX_RETRIES = int(os.environ.get("OPENAI_CHAT_MAX_RETRIES", 3))
     OPENAI_CHAT_CACHE = bool(os.environ.get("OPENAI_CHAT_CACHE", True))
+    DEBUG_MODE = bool(os.environ.get("DEBUG_MODE", False))
 else:
     raise FileNotFoundError("No .env file found in root directory of repository")
 
 
 class Config:
     """Configuration parameters."""
 
+    DEBUG_MODE: bool = DEBUG_MODE
     OPENAI_CHAT_MODEL_NAME: str = OPENAI_CHAT_MODEL_NAME
     OPENAI_PROMPT_MODEL_NAME: str = OPENAI_PROMPT_MODEL_NAME
     OPENAI_CHAT_TEMPERATURE: float = OPENAI_CHAT_TEMPERATURE
diff --git a/models/hybrid_search_retreiver.py b/models/hybrid_search_retreiver.py
@@ -18,6 +18,7 @@
 
 # document loading
 import glob
+import logging
 import os
 import textwrap
 
@@ -52,6 +53,7 @@
 DEFAULT_MODEL_NAME = Config.OPENAI_PROMPT_MODEL_NAME
 pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Credentials.PINECONE_ENVIRONMENT)
 set_llm_cache(InMemoryCache())
+logging.basicConfig(level=logging.DEBUG if Config.DEBUG_MODE else logging.INFO)
 
 
 class TextSplitter:
@@ -123,16 +125,16 @@ def load(self, filepath: str):
         https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
         """
         try:
-            print("Deleting index...")
+            logging.debug("Deleting index...")
             pinecone.delete_index(Credentials.PINECONE_INDEX_NAME)
         except pinecone.exceptions.PineconeException:
-            print("Index does not exist. Continuing...")
+            logging.debug("Index does not exist. Continuing...")
 
         metadata_config = {
             "indexed": ["lc_id", "lc_type"],
             "context": ["lc_text"],
         }
-        print("Creating index. This may take a few minutes...")
+        logging.debug("Creating index. This may take a few minutes...")
         pinecone.create_index(
             Credentials.PINECONE_INDEX_NAME, dimension=1536, metric="dotproduct", metadata_config=metadata_config
         )
@@ -142,19 +144,19 @@ def load(self, filepath: str):
         for pdf_file in pdf_files:
             i += 1
             j = len(pdf_files)
-            print(f"Loading PDF {i} of {j}: ", pdf_file)
+            logging.debug("Loading PDF %s of %s: %s", i, j, pdf_file)
             loader = PyPDFLoader(file_path=pdf_file)
             docs = loader.load()
             k = 0
             for doc in docs:
                 k += 1
-                print(k * "-", end="\r")
+                logging.debug(k * "-", end="\r")
                 documents = self.text_splitter.create_documents([doc.page_content])
                 document_texts = [doc.page_content for doc in documents]
                 embeddings = self.openai_embeddings.embed_documents(document_texts)
                 self.vector_store.add_documents(documents=documents, embeddings=embeddings)
 
-        print("Finished loading PDFs")
+        logging.debug("Finished loading PDFs")
 
     def rag(self, prompt: str):
         """
@@ -176,7 +178,7 @@ def rag(self, prompt: str):
             embeddings=self.openai_embeddings, sparse_encoder=self.bm25_encoder, index=self.pinecone_index
         )
         documents = retriever.get_relevant_documents(query=prompt)
-        print(f"Retrieved {len(documents)} related documents from Pinecone")
+        logging.debug("Retrieved %i related documents from Pinecone", len(documents))
 
         # Extract the text from the documents
         document_texts = [doc.page_content for doc in documents]
@@ -191,14 +193,14 @@ def rag(self, prompt: str):
         # Create a prompt that includes the document texts
         prompt_with_relevant_documents = f"{prompt + leader} {'. '.join(document_texts)}"
 
-        print(f"Prompt contains {len(prompt_with_relevant_documents.split())} words")
-        print("Prompt:", prompt_with_relevant_documents)
+        logging.debug("Prompt contains %i words", len(prompt_with_relevant_documents.split()))
+        logging.debug("Prompt: %s", prompt_with_relevant_documents)
 
         # Get a response from the GPT-3.5-turbo model
         response = self.cached_chat_request(
             system_message="You are a helpful assistant.", human_message=prompt_with_relevant_documents
         )
 
-        print("Response:")
-        print("------------------------------------------------------")
+        logging.debug("Response:")
+        logging.debug("------------------------------------------------------")
         return response
diff --git a/models/tests/test_prompts.py b/models/tests/test_prompts.py
@@ -30,5 +30,5 @@ def test_training_services(self):
         prompt = self.templates.training_services
         result = self.hsr.prompt_with_template(prompt=prompt, concept="Microsoft certified Azure AI engineer associate")
         assert result
-        assert "Microsoft" in result
+        assert "Microsoft" in result or "Azure" in result
         assert "training" in result