1
1
# -*- coding: utf-8 -*-
2
+ # pylint: disable=E0611,E1101
2
3
"""A class to manage the lifecycle of Pinecone vector database indexes."""
3
4
4
5
# document loading
9
10
import logging
10
11
import os
11
12
12
- # pinecone integration
13
- import pinecone
14
- from langchain .document_loaders import PyPDFLoader
15
- from langchain .embeddings import OpenAIEmbeddings
16
- from langchain .text_splitter import Document
13
+ # from langchain.text_splitter import Document
14
+ from langchain .text_splitter import RecursiveCharacterTextSplitter
17
15
from langchain .vectorstores .pinecone import Pinecone as LCPinecone
16
+ from langchain_community .document_loaders import PyPDFLoader
17
+ from langchain_community .embeddings import OpenAIEmbeddings
18
+
19
+ # pinecone integration
20
+ # import pinecone
21
+ from pinecone import Pinecone , ServerlessSpec
22
+ from pinecone .core .client .exceptions import PineconeApiException
18
23
19
24
# this project
20
25
from models .conf import settings
24
29
25
30
26
31
# pylint: disable=too-few-public-methods
27
- class TextSplitter :
28
- """
29
- Custom text splitter that adds metadata to the Document object
30
- which is required by PineconeHybridSearchRetriever.
31
- """
32
-
33
- def create_documents (self , texts ):
34
- """Create documents"""
35
- documents = []
36
- for text in texts :
37
- # Create a Document object with the text and metadata
38
- document = Document (page_content = text , metadata = {"context" : text })
39
- documents .append (document )
40
- return documents
32
+ # class TextSplitter:
33
+ # """
34
+ # Custom text splitter that adds metadata to the Document object
35
+ # which is required by PineconeHybridSearchRetriever.
36
+ # """
37
+
38
+ # def create_documents(self, texts):
39
+ # """Create documents"""
40
+ # documents = []
41
+ # for text in texts:
42
+ # # Create a Document object with the text and metadata
43
+ # document = Document(page_content=text, metadata={"context": text})
44
+ # documents.append(document)
45
+ # return documents
41
46
42
47
43
48
class PineconeIndex :
44
49
"""Pinecone helper class."""
45
50
46
- _index : pinecone .Index = None
51
+ _pinecone = None
52
+ _index : Pinecone .Index = None
47
53
_index_name : str = None
48
- _text_splitter : TextSplitter = None
54
+ _text_splitter : RecursiveCharacterTextSplitter = None
49
55
_openai_embeddings : OpenAIEmbeddings = None
50
56
_vector_store : LCPinecone = None
51
57
@@ -69,11 +75,11 @@ def index_name(self, value: str) -> None:
69
75
self .init_index ()
70
76
71
77
@property
72
- def index (self ) -> pinecone .Index :
78
+ def index (self ) -> Pinecone .Index :
73
79
"""pinecone.Index lazy read-only property."""
74
80
if self ._index is None :
75
81
self .init_index ()
76
- self ._index = pinecone .Index (index_name = self .index_name )
82
+ self ._index = self . pinecone .Index (name = self .index_name )
77
83
return self ._index
78
84
79
85
@property
@@ -85,7 +91,7 @@ def index_stats(self) -> dict:
85
91
@property
86
92
def initialized (self ) -> bool :
87
93
"""initialized read-only property."""
88
- indexes = pinecone . manage .list_indexes ()
94
+ indexes = self . pinecone .list_indexes ()
89
95
return self .index_name in indexes
90
96
91
97
@property
@@ -113,23 +119,31 @@ def openai_embeddings(self) -> OpenAIEmbeddings:
113
119
return self ._openai_embeddings
114
120
115
121
@property
116
- def text_splitter (self ) -> TextSplitter :
117
- """TextSplitter lazy read-only property."""
122
+ def pinecone (self ):
123
+ """Pinecone lazy read-only property."""
124
+ if self ._pinecone is None :
125
+ self ._pinecone = Pinecone (api_key = settings .pinecone_api_key .get_secret_value ())
126
+ return self ._pinecone
127
+
128
+ @property
129
+ def text_splitter (self ) -> RecursiveCharacterTextSplitter :
130
+ """lazy read-only property."""
118
131
if self ._text_splitter is None :
119
- self ._text_splitter = TextSplitter ()
132
+ self ._text_splitter = RecursiveCharacterTextSplitter ()
120
133
return self ._text_splitter
121
134
122
135
def init_index (self ):
123
136
"""Verify that an index named self.index_name exists in Pinecone. If not, create it."""
124
- indexes = pinecone . manage .list_indexes ()
137
+ indexes = self . pinecone .list_indexes ()
125
138
if self .index_name not in indexes :
126
139
logging .debug ("Index does not exist." )
127
140
self .create ()
128
141
129
142
def init (self ):
130
143
"""Initialize Pinecone."""
131
144
# pylint: disable=no-member
132
- pinecone .init (api_key = settings .pinecone_api_key .get_secret_value (), environment = settings .pinecone_environment )
145
+
146
+ # pinecone.init(api_key=settings.pinecone_api_key.get_secret_value(), environment=settings.pinecone_environment)
133
147
self ._index = None
134
148
self ._index_name = None
135
149
self ._text_splitter = None
@@ -142,23 +156,30 @@ def delete(self):
142
156
logging .debug ("Index does not exist. Nothing to delete." )
143
157
return
144
158
print ("Deleting index..." )
145
- pinecone .delete_index (self .index_name )
159
+ self . pinecone .delete_index (self .index_name )
146
160
147
161
def create (self ):
148
162
"""Create index."""
149
- metadata_config = {
150
- "indexed" : [settings .pinecone_vectorstore_text_key , "lc_type" ],
151
- "context" : ["lc_text" ],
152
- }
163
+ # deprecated?
164
+ # metadata_config = {
165
+ # "indexed": [settings.pinecone_vectorstore_text_key, "lc_type"],
166
+ # "context": ["lc_text"],
167
+ # }
153
168
print ("Creating index. This may take a few minutes..." )
154
-
155
- pinecone .create_index (
156
- name = self .index_name ,
157
- dimension = settings .pinecone_dimensions ,
158
- metric = settings .pinecone_metric ,
159
- metadata_config = metadata_config ,
169
+ serverless_spec = ServerlessSpec (
170
+ cloud = "aws" ,
171
+ region = "us-west-2" ,
160
172
)
161
- print ("Index created." )
173
+ try :
174
+ self .pinecone .create_index (
175
+ name = self .index_name ,
176
+ dimension = settings .pinecone_dimensions ,
177
+ metric = settings .pinecone_metric ,
178
+ spec = serverless_spec ,
179
+ )
180
+ print ("Index created." )
181
+ except PineconeApiException :
182
+ pass
162
183
163
184
def initialize (self ):
164
185
"""Initialize index."""
0 commit comments