Skip to content

Commit 5b85e82

Browse files
committed
refactor: parameterize PINECONE_VECTORSTORE_TEXT_KEY
1 parent e0f8fa5 commit 5b85e82

File tree

4 files changed

+31
-21
lines changed

4 files changed

+31
-21
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ SHELL := /bin/bash
33
ifneq ("$(wildcard .env)","")
44
include .env
55
else
6-
$(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nPINECONE_INDEX_NAME=hsr\nOPENAI_CHAT_MODEL_NAME=gpt-3.5-turbo\nOPENAI_PROMPT_MODEL_NAME=text-davinci-003\nOPENAI_CHAT_TEMPERATURE=0.0\nOPENAI_CHAT_MAX_RETRIES=3\nDEBUG_MODE=True\n" >> .env)
6+
$(shell echo -e "OPENAI_API_ORGANIZATION=PLEASE-ADD-ME\nOPENAI_API_KEY=PLEASE-ADD-ME\nPINECONE_API_KEY=PLEASE-ADD-ME\nPINECONE_ENVIRONMENT=gcp-starter\nPINECONE_INDEX_NAME=hsr\nPINECONE_VECTORSTORE_TEXT_KEY=lc_id\nOPENAI_CHAT_MODEL_NAME=gpt-3.5-turbo\nOPENAI_PROMPT_MODEL_NAME=text-davinci-003\nOPENAI_CHAT_TEMPERATURE=0.0\nOPENAI_CHAT_MAX_RETRIES=3\nDEBUG_MODE=True\n" >> .env)
77
endif
88

99
.PHONY: analyze init activate test lint clean

models/const.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
1717
PINECONE_ENVIRONMENT = os.environ["PINECONE_ENVIRONMENT"]
1818
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME", "hsr")
19+
PINECONE_VECTORSTORE_TEXT_KEY = os.environ.get("PINECONE_VECTORSTORE_TEXT_KEY", "lc_id")
1920
OPENAI_CHAT_MODEL_NAME = os.environ.get("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo")
2021
OPENAI_PROMPT_MODEL_NAME = os.environ.get("OPENAI_PROMPT_MODEL_NAME", "text-davinci-003")
2122
OPENAI_CHAT_TEMPERATURE = float(os.environ.get("OPENAI_CHAT_TEMPERATURE", 0.0))
@@ -35,6 +36,9 @@ class Config:
3536
OPENAI_CHAT_TEMPERATURE: float = OPENAI_CHAT_TEMPERATURE
3637
OPENAI_CHAT_MAX_RETRIES: int = OPENAI_CHAT_MAX_RETRIES
3738
OPENAI_CHAT_CACHE: bool = OPENAI_CHAT_CACHE
39+
PINECONE_ENVIRONMENT = PINECONE_ENVIRONMENT
40+
PINECONE_INDEX_NAME = PINECONE_INDEX_NAME
41+
PINECONE_VECTORSTORE_TEXT_KEY: str = PINECONE_VECTORSTORE_TEXT_KEY
3842

3943

4044
class Credentials:
@@ -43,5 +47,3 @@ class Credentials:
4347
OPENAI_API_KEY = OPENAI_API_KEY
4448
OPENAI_API_ORGANIZATION = OPENAI_API_ORGANIZATION
4549
PINECONE_API_KEY = PINECONE_API_KEY
46-
PINECONE_ENVIRONMENT = PINECONE_ENVIRONMENT
47-
PINECONE_INDEX_NAME = PINECONE_INDEX_NAME

models/hybrid_search_retreiver.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# initializations
5555
###############################################################################
5656
DEFAULT_MODEL_NAME = Config.OPENAI_PROMPT_MODEL_NAME
57-
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Credentials.PINECONE_ENVIRONMENT)
57+
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Config.PINECONE_ENVIRONMENT)
5858
set_llm_cache(InMemoryCache())
5959
logging.basicConfig(level=logging.DEBUG if Config.DEBUG_MODE else logging.INFO)
6060

@@ -92,8 +92,10 @@ class HybridSearchRetriever:
9292
openai_embeddings = OpenAIEmbeddings(
9393
api_key=Credentials.OPENAI_API_KEY, organization=Credentials.OPENAI_API_ORGANIZATION
9494
)
95-
pinecone_index = pinecone.Index(index_name=Credentials.PINECONE_INDEX_NAME)
96-
vector_store = Pinecone(index=pinecone_index, embedding=openai_embeddings, text_key="lc_id")
95+
pinecone_index = pinecone.Index(index_name=Config.PINECONE_INDEX_NAME)
96+
vector_store = Pinecone(
97+
index=pinecone_index, embedding=openai_embeddings, text_key=Config.PINECONE_VECTORSTORE_TEXT_KEY
98+
)
9799

98100
text_splitter = TextSplitter()
99101
bm25_encoder = BM25Encoder().default()
@@ -135,17 +137,17 @@ def load(self, filepath: str):
135137
"""
136138
try:
137139
logging.debug("Deleting index...")
138-
pinecone.delete_index(Credentials.PINECONE_INDEX_NAME)
140+
pinecone.delete_index(Config.PINECONE_INDEX_NAME)
139141
except pinecone.exceptions.PineconeException:
140142
logging.debug("Index does not exist. Continuing...")
141143

142144
metadata_config = {
143-
"indexed": ["lc_id", "lc_type"],
145+
"indexed": [Config.PINECONE_VECTORSTORE_TEXT_KEY, "lc_type"],
144146
"context": ["lc_text"],
145147
}
146148
logging.debug("Creating index. This may take a few minutes...")
147149
pinecone.create_index(
148-
Credentials.PINECONE_INDEX_NAME, dimension=1536, metric="dotproduct", metadata_config=metadata_config
150+
Config.PINECONE_INDEX_NAME, dimension=1536, metric="dotproduct", metadata_config=metadata_config
149151
)
150152

151153
pdf_files = glob.glob(os.path.join(filepath, "*.pdf"))
@@ -187,11 +189,13 @@ def rag(self, human_message: Union[str, HumanMessage]):
187189
logging.debug("Converting human_message to HumanMessage")
188190
human_message = HumanMessage(content=human_message)
189191

192+
# ---------------------------------------------------------------------
193+
# 1.) Retrieve relevant documents from Pinecone vector database
194+
# ---------------------------------------------------------------------
190195
retriever = PineconeHybridSearchRetriever(
191196
embeddings=self.openai_embeddings, sparse_encoder=self.bm25_encoder, index=self.pinecone_index
192197
)
193198
documents = retriever.get_relevant_documents(query=human_message.content)
194-
logging.debug("Retrieved %i related documents from Pinecone", len(documents))
195199

196200
# Extract the text from the documents
197201
document_texts = [doc.page_content for doc in documents]
@@ -202,13 +206,19 @@ def rag(self, human_message: Union[str, HumanMessage]):
202206
into your responses:\n\n
203207
"""
204208
)
205-
system_message = f"{leader} {'. '.join(document_texts)}"
209+
system_message_content = f"{leader} {'. '.join(document_texts)}"
210+
system_message = SystemMessage(content=system_message_content)
211+
# ---------------------------------------------------------------------
212+
# finished with hybrid search setup
213+
# ---------------------------------------------------------------------
206214

207-
logging.debug("System messages contains %i words", len(system_message.split()))
208-
logging.debug("Prompt: %s", system_message)
209-
system_message = SystemMessage(content=system_message)
215+
# 2.) get a response from the chat model
210216
response = self.cached_chat_request(system_message=system_message, human_message=human_message)
211217

218+
logging.debug("------------------------------------------------------")
219+
logging.debug("Retrieved %i related documents from Pinecone", len(documents))
220+
logging.debug("System messages contains %i words", len(system_message.content.split()))
221+
logging.debug("Prompt: %s", system_message.content)
212222
logging.debug("Response:")
213223
logging.debug("------------------------------------------------------")
214224
return response.content

models/tests/test_pinecone.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from langchain.embeddings import OpenAIEmbeddings
1010
from langchain.vectorstores.pinecone import Pinecone
1111

12-
from ..const import Credentials
12+
from ..const import Config, Credentials
1313

1414

1515
class TestPinecone:
@@ -19,22 +19,20 @@ def test_01_test_pinecone_connectivity(self):
1919
"""Ensure that we have connectivity to Pinecone."""
2020
# pylint: disable=broad-except
2121
try:
22-
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Credentials.PINECONE_ENVIRONMENT)
22+
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Config.PINECONE_ENVIRONMENT)
2323
except Exception as e:
2424
assert False, f"pinecone.init() failed with exception: {e}"
2525

2626
def test_02_test_pinecone_index(self):
2727
"""Ensure that the Pinecone index exists and that we can connect to it."""
28-
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Credentials.PINECONE_ENVIRONMENT)
28+
pinecone.init(api_key=Credentials.PINECONE_API_KEY, environment=Config.PINECONE_ENVIRONMENT)
2929
openai_embedding = OpenAIEmbeddings()
3030

3131
# pylint: disable=broad-except
3232
try:
3333
Pinecone.from_existing_index(
34-
Credentials.PINECONE_INDEX_NAME,
34+
Config.PINECONE_INDEX_NAME,
3535
embedding=openai_embedding,
3636
)
3737
except Exception as e:
38-
assert (
39-
False
40-
), f"Pinecone initialization of index {Credentials.PINECONE_INDEX_NAME,} failed with exception: {e}"
38+
assert False, f"Pinecone initialization of index {Config.PINECONE_INDEX_NAME,} failed with exception: {e}"

0 commit comments

Comments
 (0)