Skip to content

Commit 8795715

Browse files
committed
refactor: fix breaking changes for langchain==0.1.14 and pinecone-client==3.2.2
1 parent 8c2f8db commit 8795715

File tree

8 files changed

+82
-56
lines changed

8 files changed

+82
-56
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PINECONE_VECTORSTORE_TEXT_KEY=lc_id\n\
2020
PINECONE_METRIC=dotproduct\n\
2121
PINECONE_DIMENSIONS=1536\n\
2222
OPENAI_CHAT_MODEL_NAME=gpt-3.5-turbo\n\
23-
OPENAI_PROMPT_MODEL_NAME=text-davinci-003\n\
23+
OPENAI_PROMPT_MODEL_NAME=gpt-3.5-turbo-instruct\n\
2424
OPENAI_CHAT_TEMPERATURE=0.0\n\
2525
OPENAI_CHAT_MAX_RETRIES=3\n\
2626
DEBUG_MODE=True\n" >> .env)

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ OPENAI_API_KEY=PLEASE-ADD-ME
136136
OPENAI_CHAT_MAX_RETRIES=3
137137
OPENAI_CHAT_MODEL_NAME=gpt-3.5-turbo
138138
OPENAI_CHAT_TEMPERATURE=0.0
139-
OPENAI_PROMPT_MODEL_NAME=text-davinci-003
139+
OPENAI_PROMPT_MODEL_NAME=gpt-3.5-turbo-instruct
140140

141141
# Pinecone API
142142
PINECONE_API_KEY=PLEASE-ADD-ME

models/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class SettingsDefaults:
8787
OPENAI_ENDPOINT_IMAGE_SIZE = "1024x768"
8888
OPENAI_CHAT_CACHE = True
8989
OPENAI_CHAT_MODEL_NAME = "gpt-3.5-turbo"
90-
OPENAI_PROMPT_MODEL_NAME = "text-davinci-003"
90+
OPENAI_PROMPT_MODEL_NAME = "gpt-3.5-turbo"
9191
OPENAI_CHAT_TEMPERATURE = 0.0
9292
OPENAI_CHAT_MAX_RETRIES = 3
9393

models/hybrid_search_retreiver.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
# pylint: disable=E0611,E1101
23
"""
34
Hybrid Search Retriever. A class that combines the following:
45
- OpenAI prompting and ChatModel
@@ -22,18 +23,20 @@
2223

2324
# pinecone integration
2425
from langchain.cache import InMemoryCache
25-
from langchain.chat_models import ChatOpenAI
2626

2727
# embedding
2828
from langchain.globals import set_llm_cache
2929

3030
# prompting and chat
3131
from langchain.llms.openai import OpenAI
3232
from langchain.prompts import PromptTemplate
33+
from langchain.schema import BaseMessage, HumanMessage, SystemMessage
34+
from langchain_community.chat_models import ChatOpenAI
3335

3436
# hybrid search capability
35-
from langchain.retrievers import PineconeHybridSearchRetriever
36-
from langchain.schema import BaseMessage, HumanMessage, SystemMessage
37+
from langchain_community.retrievers.pinecone_hybrid_search import (
38+
PineconeHybridSearchRetriever,
39+
)
3740
from pinecone_text.sparse import BM25Encoder # pylint: disable=import-error
3841

3942
# this project

models/pinecone.py

Lines changed: 62 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
# pylint: disable=E0611,E1101
23
"""A class to manage the lifecycle of Pinecone vector database indexes."""
34

45
# document loading
@@ -9,12 +10,16 @@
910
import logging
1011
import os
1112

12-
# pinecone integration
13-
import pinecone
14-
from langchain.document_loaders import PyPDFLoader
15-
from langchain.embeddings import OpenAIEmbeddings
16-
from langchain.text_splitter import Document
13+
# from langchain.text_splitter import Document
14+
from langchain.text_splitter import RecursiveCharacterTextSplitter
1715
from langchain.vectorstores.pinecone import Pinecone as LCPinecone
16+
from langchain_community.document_loaders import PyPDFLoader
17+
from langchain_community.embeddings import OpenAIEmbeddings
18+
19+
# pinecone integration
20+
# import pinecone
21+
from pinecone import Pinecone, ServerlessSpec
22+
from pinecone.core.client.exceptions import PineconeApiException
1823

1924
# this project
2025
from models.conf import settings
@@ -24,28 +29,29 @@
2429

2530

2631
# pylint: disable=too-few-public-methods
27-
class TextSplitter:
28-
"""
29-
Custom text splitter that adds metadata to the Document object
30-
which is required by PineconeHybridSearchRetriever.
31-
"""
32-
33-
def create_documents(self, texts):
34-
"""Create documents"""
35-
documents = []
36-
for text in texts:
37-
# Create a Document object with the text and metadata
38-
document = Document(page_content=text, metadata={"context": text})
39-
documents.append(document)
40-
return documents
32+
# class TextSplitter:
33+
# """
34+
# Custom text splitter that adds metadata to the Document object
35+
# which is required by PineconeHybridSearchRetriever.
36+
# """
37+
38+
# def create_documents(self, texts):
39+
# """Create documents"""
40+
# documents = []
41+
# for text in texts:
42+
# # Create a Document object with the text and metadata
43+
# document = Document(page_content=text, metadata={"context": text})
44+
# documents.append(document)
45+
# return documents
4146

4247

4348
class PineconeIndex:
4449
"""Pinecone helper class."""
4550

46-
_index: pinecone.Index = None
51+
_pinecone = None
52+
_index: Pinecone.Index = None
4753
_index_name: str = None
48-
_text_splitter: TextSplitter = None
54+
_text_splitter: RecursiveCharacterTextSplitter = None
4955
_openai_embeddings: OpenAIEmbeddings = None
5056
_vector_store: LCPinecone = None
5157

@@ -69,11 +75,11 @@ def index_name(self, value: str) -> None:
6975
self.init_index()
7076

7177
@property
72-
def index(self) -> pinecone.Index:
78+
def index(self) -> Pinecone.Index:
7379
"""pinecone.Index lazy read-only property."""
7480
if self._index is None:
7581
self.init_index()
76-
self._index = pinecone.Index(index_name=self.index_name)
82+
self._index = self.pinecone.Index(name=self.index_name)
7783
return self._index
7884

7985
@property
@@ -85,7 +91,7 @@ def index_stats(self) -> dict:
8591
@property
8692
def initialized(self) -> bool:
8793
"""initialized read-only property."""
88-
indexes = pinecone.manage.list_indexes()
94+
indexes = self.pinecone.list_indexes()
8995
return self.index_name in indexes
9096

9197
@property
@@ -113,23 +119,31 @@ def openai_embeddings(self) -> OpenAIEmbeddings:
113119
return self._openai_embeddings
114120

115121
@property
116-
def text_splitter(self) -> TextSplitter:
117-
"""TextSplitter lazy read-only property."""
122+
def pinecone(self):
123+
"""Pinecone lazy read-only property."""
124+
if self._pinecone is None:
125+
self._pinecone = Pinecone(api_key=settings.pinecone_api_key.get_secret_value())
126+
return self._pinecone
127+
128+
@property
129+
def text_splitter(self) -> RecursiveCharacterTextSplitter:
130+
"""lazy read-only property."""
118131
if self._text_splitter is None:
119-
self._text_splitter = TextSplitter()
132+
self._text_splitter = RecursiveCharacterTextSplitter()
120133
return self._text_splitter
121134

122135
def init_index(self):
123136
"""Verify that an index named self.index_name exists in Pinecone. If not, create it."""
124-
indexes = pinecone.manage.list_indexes()
137+
indexes = self.pinecone.list_indexes()
125138
if self.index_name not in indexes:
126139
logging.debug("Index does not exist.")
127140
self.create()
128141

129142
def init(self):
130143
"""Initialize Pinecone."""
131144
# pylint: disable=no-member
132-
pinecone.init(api_key=settings.pinecone_api_key.get_secret_value(), environment=settings.pinecone_environment)
145+
146+
# pinecone.init(api_key=settings.pinecone_api_key.get_secret_value(), environment=settings.pinecone_environment)
133147
self._index = None
134148
self._index_name = None
135149
self._text_splitter = None
@@ -142,23 +156,30 @@ def delete(self):
142156
logging.debug("Index does not exist. Nothing to delete.")
143157
return
144158
print("Deleting index...")
145-
pinecone.delete_index(self.index_name)
159+
self.pinecone.delete_index(self.index_name)
146160

147161
def create(self):
148162
"""Create index."""
149-
metadata_config = {
150-
"indexed": [settings.pinecone_vectorstore_text_key, "lc_type"],
151-
"context": ["lc_text"],
152-
}
163+
# deprecated?
164+
# metadata_config = {
165+
# "indexed": [settings.pinecone_vectorstore_text_key, "lc_type"],
166+
# "context": ["lc_text"],
167+
# }
153168
print("Creating index. This may take a few minutes...")
154-
155-
pinecone.create_index(
156-
name=self.index_name,
157-
dimension=settings.pinecone_dimensions,
158-
metric=settings.pinecone_metric,
159-
metadata_config=metadata_config,
169+
serverless_spec = ServerlessSpec(
170+
cloud="aws",
171+
region="us-west-2",
160172
)
161-
print("Index created.")
173+
try:
174+
self.pinecone.create_index(
175+
name=self.index_name,
176+
dimension=settings.pinecone_dimensions,
177+
metric=settings.pinecone_metric,
178+
spec=serverless_spec,
179+
)
180+
print("Index created.")
181+
except PineconeApiException:
182+
pass
162183

163184
def initialize(self):
164185
"""Initialize index."""

models/tests/mock_data/.env.test_01

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
OPENAI_ENDPOINT_IMAGE_SIZE = "TEST_1024x768"
1111
OPENAI_CHAT_CACHE = False
1212
OPENAI_CHAT_MODEL_NAME = "TEST_gpt-3.5-turbo"
13-
OPENAI_PROMPT_MODEL_NAME = "TEST_text-davinci-003"
13+
OPENAI_PROMPT_MODEL_NAME = "TEST_gpt-3.5-turbo-instruct"
1414
OPENAI_CHAT_TEMPERATURE = 1.0
1515
OPENAI_CHAT_MAX_RETRIES = 5

models/tests/test_hsr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# -*- coding: utf-8 -*-
2+
# pylint: disable=E0611,E1101
23
# flake8: noqa: F401
34
"""
45
Test integrity of base class.
56
"""
67
import pytest # pylint: disable=unused-import
7-
from langchain.chat_models import ChatOpenAI
8+
from langchain_community.chat_models import ChatOpenAI
89

910
from models.hybrid_search_retreiver import HybridSearchRetriever
1011
from models.pinecone import PineconeIndex

models/yt.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
# pylint: disable=E0611
23
"""
34
LangChain Quickstart
45
~~~~~~~~~~~~~~~~~~~~
@@ -15,8 +16,6 @@
1516
# 5.) sequential chains
1617
# 4.) chains
1718
from langchain.chains import LLMChain, SimpleSequentialChain
18-
from langchain.chat_models import ChatOpenAI
19-
from langchain.embeddings import OpenAIEmbeddings
2019

2120
# 1.) wrappers
2221
from langchain.llms.openai import OpenAI
@@ -33,6 +32,8 @@
3332

3433
# 7.) pinecode client
3534
from langchain.vectorstores.pinecone import Pinecone
35+
from langchain_community.chat_models import ChatOpenAI
36+
from langchain_community.embeddings import OpenAIEmbeddings
3637

3738
# 8.) LangChain agents
3839
from langchain_experimental.agents.agent_toolkits.python.base import create_python_agent
@@ -75,14 +76,14 @@ class LangChainDev:
7576
def test_01_basic(self):
7677
"""Test a basic request"""
7778

78-
llm = OpenAI(model_name="text-davinci-003")
79+
llm = OpenAI(model_name="gpt-3.5-turbo-instruct")
7980
retval = llm("explain large language models in one sentence")
8081
print(retval)
8182

8283
# 2.) models and messages. minute 6:08
8384
def test_02_chat_model(self):
8485
"""Test a chat model"""
85-
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
86+
chat = ChatOpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.3)
8687
messages = [
8788
SystemMessage(content="You are an expert data scientist"),
8889
HumanMessage(content="Write a Python script that trains a neural network on simulated data"),
@@ -102,7 +103,7 @@ def get_prompt(self):
102103

103104
def test_03_prompt_templates(self):
104105
"""Test prompt templates"""
105-
llm = OpenAI(model_name="text-davinci-003")
106+
llm = OpenAI(model_name="gpt-3.5-turbo-instruct")
106107
prompt = self.get_prompt()
107108
retval = llm(prompt.format(concept="regularization"))
108109
print(retval)
@@ -115,7 +116,7 @@ def get_chain(self, llm, prompt):
115116

116117
def test_04_chain(self):
117118
"""Test a chain"""
118-
llm = OpenAI(model_name="text-davinci-003")
119+
llm = OpenAI(model_name="gpt-3.5-turbo-instruct")
119120
prompt = self.get_prompt()
120121
chain = self.get_chain(llm=llm, prompt=prompt)
121122
print(chain.run("autoencoder"))
@@ -137,7 +138,7 @@ def get_prompt_two(self):
137138

138139
def get_explanation(self):
139140
"""Get an explanation"""
140-
llm = OpenAI(model_name="text-davinci-003")
141+
llm = OpenAI(model_name="gpt-3.5-turbo-instruct")
141142
prompt = self.get_prompt()
142143
chain_one = self.get_chain(llm=llm, prompt=prompt)
143144

0 commit comments

Comments
 (0)