Skip to content

Commit 62cd18f

Browse files
committed
fix: fix load problem with existing index
1 parent 8de793d commit 62cd18f

File tree

1 file changed

+17
-26
lines changed

1 file changed

+17
-26
lines changed

models/ssm.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,15 @@ class SalesSupportModel:
5959
)
6060

6161
# embeddings
62-
texts_splitter_results: List[Document]
62+
text_splitter = RecursiveCharacterTextSplitter(
63+
chunk_size=100,
64+
chunk_overlap=0,
65+
)
6366
openai_embedding = OpenAIEmbeddings()
64-
query_result: List[float]
67+
pinecone_search = Pinecone.from_existing_index(
68+
Credentials.PINECONE_INDEX_NAME,
69+
embedding=openai_embedding,
70+
)
6571

6672
def cached_chat_request(self, system_message: str, human_message: str) -> SystemMessage:
6773
"""Cached chat request."""
@@ -87,23 +93,6 @@ def split_text(self, text: str) -> List[Document]:
8793
retval = text_splitter.create_documents([text])
8894
return retval
8995

90-
def embed(self, text: str) -> List[float]:
91-
"""Embed."""
92-
text_splitter = RecursiveCharacterTextSplitter(
93-
chunk_size=100,
94-
chunk_overlap=0,
95-
)
96-
texts_splitter_results = text_splitter.create_documents([text])
97-
embedding = texts_splitter_results[0].page_content
98-
# pylint: disable=no-member
99-
self.openai_embedding.embed_query(embedding)
100-
101-
Pinecone.from_documents(
102-
documents=texts_splitter_results,
103-
embedding=self.openai_embedding,
104-
index_name=Credentials.PINECONE_INDEX_NAME,
105-
)
106-
10796
def load(self, filepath: str):
10897
"""
10998
Embed PDF.
@@ -118,14 +107,20 @@ def load(self, filepath: str):
118107
for pdf_file in pdf_files:
119108
i += 1
120109
j = len(pdf_files)
121-
print(f"Loading PDF {i} of {j}: ")
110+
print(f"Loading PDF {i} of {j}: ", pdf_file)
122111
loader = PyPDFLoader(file_path=pdf_file)
123112
docs = loader.load()
124113
k = 0
125114
for doc in docs:
126115
k += 1
127116
print(k * "-", end="\r")
128-
self.embed(doc.page_content)
117+
texts_splitter_results = self.text_splitter.create_documents([doc.page_content])
118+
self.pinecone_search.from_existing_index(
119+
index_name=Credentials.PINECONE_INDEX_NAME,
120+
embedding=self.openai_embedding,
121+
text_key=texts_splitter_results,
122+
)
123+
129124
print("Finished loading PDFs")
130125

131126
def rag(self, prompt: str):
@@ -142,11 +137,7 @@ def format_docs(docs):
142137
"""Format docs."""
143138
return "\n\n".join(doc.page_content for doc in docs)
144139

145-
pinecone_search = Pinecone.from_existing_index(
146-
Credentials.PINECONE_INDEX_NAME,
147-
embedding=self.openai_embedding,
148-
)
149-
retriever = pinecone_search.as_retriever()
140+
retriever = self.pinecone_search.as_retriever()
150141

151142
# Use the retriever to get relevant documents
152143
documents = retriever.get_relevant_documents(query=prompt)

0 commit comments

Comments
 (0)