Skip to content

Python: updating pinecone client #6021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/python-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ jobs:
Bing__ApiKey: ${{ secrets.BING__APIKEY }}
OpenAI__ApiKey: ${{ secrets.OPENAI__APIKEY }}
Pinecone__ApiKey: ${{ secrets.PINECONE__APIKEY }}
Pinecone__Environment: ${{ secrets.PINECONE__ENVIRONMENT }}
Postgres__Connectionstr: ${{secrets.POSTGRES__CONNECTIONSTR}}
AZURE_COGNITIVE_SEARCH_ADMIN_KEY: ${{secrets.AZURE_COGNITIVE_SEARCH_ADMIN_KEY}}
AZURE_COGNITIVE_SEARCH_ENDPOINT: ${{secrets.AZURE_COGNITIVE_SEARCH_ENDPOINT}}
Expand Down Expand Up @@ -157,7 +156,6 @@ jobs:
Bing__ApiKey: ${{ secrets.BING__APIKEY }}
OpenAI__ApiKey: ${{ secrets.OPENAI__APIKEY }}
Pinecone__ApiKey: ${{ secrets.PINECONE__APIKEY }}
Pinecone__Environment: ${{ secrets.PINECONE__ENVIRONMENT }}
Postgres__Connectionstr: ${{secrets.POSTGRES__CONNECTIONSTR}}
AZURE_COGNITIVE_SEARCH_ADMIN_KEY: ${{secrets.AZURE_COGNITIVE_SEARCH_ADMIN_KEY}}
AZURE_COGNITIVE_SEARCH_ENDPOINT: ${{secrets.AZURE_COGNITIVE_SEARCH_ENDPOINT}}
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ repos:
- id: mixed-line-ending
files: \.py$
- repo: https://github.com/psf/black
rev: 24.4.0
rev: 24.4.2
hooks:
- id: black
files: \.py$
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.1
rev: v0.4.2
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
Expand Down
832 changes: 404 additions & 428 deletions python/poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ milvus = [
{ version = ">=2.3,<2.3.8", markers = 'python_version > "3.8" and sys_platform != "win32"', optional = true}
]
weaviate-client = { version = ">=3.18,<5.0", optional = true}
pinecone-client = { version = "^2.2.2", optional = true}
pinecone-client = { version = ">=3.0.0", optional = true}
psycopg = { version="^3.1.9", extras=["binary","pool"], optional = true}
redis = { version = "^4.6.0", optional = true}
azure-search-documents = {version = "11.6.0b1", allow-prereleases = true, optional = true}
Expand Down Expand Up @@ -110,7 +110,7 @@ milvus = [
{ version = ">=2.3,<2.3.8", markers = 'python_version > "3.8" and sys_platform != "win32"'}
]
weaviate-client = ">=3.18,<5.0"
pinecone-client = "^2.2.2"
pinecone-client = ">=3.0.0"
psycopg = { version="^3.1.9", extras=["binary","pool"]}
redis = "^4.6.0"
azure-search-documents = {version = "11.6.0b1", allow-prereleases = true}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Copyright (c) Microsoft. All rights reserved.

import logging
from typing import List, Optional, Tuple
from typing import List, NamedTuple, Optional, Tuple

import pinecone
from numpy import ndarray
from pinecone import FetchResponse, IndexDescription
from pinecone import FetchResponse, IndexDescription, IndexList, Pinecone, ServerlessSpec

from semantic_kernel.connectors.memory.pinecone.utils import (
build_payload,
Expand All @@ -20,7 +19,7 @@
from semantic_kernel.memory.memory_record import MemoryRecord
from semantic_kernel.memory.memory_store_base import MemoryStoreBase

# Limitations set by Pinecone at https://docs.pinecone.io/docs/limits
# Limitations set by Pinecone at https://docs.pinecone.io/reference/known-limitations
MAX_DIMENSIONALITY = 20000
MAX_UPSERT_BATCH_SIZE = 100
MAX_QUERY_WITHOUT_METADATA_BATCH_SIZE = 10000
Expand All @@ -35,21 +34,23 @@ class PineconeMemoryStore(MemoryStoreBase):
"""A memory store that uses Pinecone as the backend."""

_pinecone_api_key: str
_pinecone_environment: str
_default_dimensionality: int

DEFAULT_INDEX_SPEC: ServerlessSpec = ServerlessSpec(
cloud="aws",
region="us-east-1",
)

def __init__(
self,
api_key: str,
environment: str,
default_dimensionality: int,
**kwargs,
) -> None:
"""Initializes a new instance of the PineconeMemoryStore class.

Arguments:
pinecone_api_key {str} -- The Pinecone API key.
pinecone_environment {str} -- The Pinecone environment.
default_dimensionality {int} -- The default dimensionality to use for new collections.
"""
if kwargs.get("logger"):
Expand All @@ -60,25 +61,21 @@ def __init__(
+ f"the maximum allowed value of {MAX_DIMENSIONALITY}."
)
self._pinecone_api_key = api_key
self._pinecone_environment = environment
self._default_dimensionality = default_dimensionality

pinecone.init(api_key=self._pinecone_api_key, environment=self._pinecone_environment)
self.pinecone = Pinecone(api_key=self._pinecone_api_key)
self.collection_names_cache = set()

async def create_collection(
self,
collection_name: str,
dimension_num: Optional[int] = None,
distance_type: Optional[str] = "cosine",
num_of_pods: Optional[int] = 1,
replica_num: Optional[int] = 0,
type_of_pod: Optional[str] = "p1.x1",
metadata_config: Optional[dict] = None,
index_spec: NamedTuple = DEFAULT_INDEX_SPEC,
) -> None:
"""Creates a new collection in Pinecone if it does not exist.
This function creates an index, by default the following index
settings are used: metric = cosine, pods = 1, replicas = 0,
pod_type = p1.x1, metadata_config = None.
settings are used: metric = cosine, cloud = aws, region = us-east-1.

Arguments:
collection_name {str} -- The name of the collection to create.
Expand All @@ -95,16 +92,11 @@ async def create_collection(
f"Dimensionality of {dimension_num} exceeds " + f"the maximum allowed value of {MAX_DIMENSIONALITY}."
)

if collection_name not in pinecone.list_indexes():
pinecone.create_index(
name=collection_name,
dimension=dimension_num,
metric=distance_type,
pods=num_of_pods,
replicas=replica_num,
pod_type=type_of_pod,
metadata_config=metadata_config,
if not await self.does_collection_exist(collection_name):
self.pinecone.create_index(
name=collection_name, dimension=dimension_num, metric=distance_type, spec=index_spec
)
self.collection_names_cache.add(collection_name)

async def describe_collection(self, collection_name: str) -> Optional[IndexDescription]:
"""Gets the description of the index.
Expand All @@ -113,19 +105,19 @@ async def describe_collection(self, collection_name: str) -> Optional[IndexDescr
Returns:
Optional[dict] -- The index.
"""
if collection_name in pinecone.list_indexes():
return pinecone.describe_index(collection_name)
if await self.does_collection_exist(collection_name):
return self.pinecone.describe_index(collection_name)
return None

async def get_collections(
self,
) -> List[str]:
) -> IndexList:
"""Gets the list of collections.

Returns:
List[str] -- The list of collections.
IndexList -- The list of collections.
"""
return list(pinecone.list_indexes())
return self.pinecone.list_indexes()

async def delete_collection(self, collection_name: str) -> None:
"""Deletes a collection.
Expand All @@ -136,8 +128,9 @@ async def delete_collection(self, collection_name: str) -> None:
Returns:
None
"""
if collection_name in pinecone.list_indexes():
pinecone.delete_index(collection_name)
if await self.does_collection_exist(collection_name):
self.pinecone.delete_index(collection_name)
self.collection_names_cache.discard(collection_name)

async def does_collection_exist(self, collection_name: str) -> bool:
"""Checks if a collection exists.
Expand All @@ -148,7 +141,13 @@ async def does_collection_exist(self, collection_name: str) -> bool:
Returns:
bool -- True if the collection exists; otherwise, False.
"""
return collection_name in pinecone.list_indexes()
if collection_name in self.collection_names_cache:
return True

index_collection_names = self.pinecone.list_indexes().names()
self.collection_names_cache |= set(index_collection_names)

return collection_name in index_collection_names

async def upsert(self, collection_name: str, record: MemoryRecord) -> str:
"""Upserts a record.
Expand All @@ -160,10 +159,10 @@ async def upsert(self, collection_name: str, record: MemoryRecord) -> str:
Returns:
str -- The unique database key of the record. In Pinecone, this is the record ID.
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

collection = pinecone.Index(collection_name)
collection = self.pinecone.Index(collection_name)

upsert_response = collection.upsert(
vectors=[(record._id, record.embedding.tolist(), build_payload(record))],
Expand All @@ -185,10 +184,10 @@ async def upsert_batch(self, collection_name: str, records: List[MemoryRecord])
Returns:
List[str] -- The unique database keys of the records.
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

collection = pinecone.Index(collection_name)
collection = self.pinecone.Index(collection_name)

vectors = [
(
Expand Down Expand Up @@ -217,10 +216,10 @@ async def get(self, collection_name: str, key: str, with_embedding: bool = False
Returns:
MemoryRecord -- The record.
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

collection = pinecone.Index(collection_name)
collection = self.pinecone.Index(collection_name)
fetch_response = collection.fetch([key])

if len(fetch_response.vectors) == 0:
Expand All @@ -241,7 +240,7 @@ async def get_batch(
Returns:
List[MemoryRecord] -- The records.
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

fetch_response = await self.__get_batch(collection_name, keys, with_embeddings)
Expand All @@ -257,10 +256,10 @@ async def remove(self, collection_name: str, key: str) -> None:
Returns:
None
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

collection = pinecone.Index(collection_name)
collection = self.pinecone.Index(collection_name)
collection.delete([key])

async def remove_batch(self, collection_name: str, keys: List[str]) -> None:
Expand All @@ -273,10 +272,10 @@ async def remove_batch(self, collection_name: str, keys: List[str]) -> None:
Returns:
None
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

collection = pinecone.Index(collection_name)
collection = self.pinecone.Index(collection_name)
for i in range(0, len(keys), MAX_DELETE_BATCH_SIZE):
collection.delete(keys[i : i + MAX_DELETE_BATCH_SIZE])
collection.delete(keys)
Expand Down Expand Up @@ -328,10 +327,10 @@ async def get_nearest_matches(
Returns:
List[Tuple[MemoryRecord, float]] -- The records and their relevance scores.
"""
if collection_name not in pinecone.list_indexes():
if not await self.does_collection_exist(collection_name):
raise ServiceResourceNotFoundError(f"Collection '{collection_name}' does not exist")

collection = pinecone.Index(collection_name)
collection = self.pinecone.Index(collection_name)

if limit > MAX_QUERY_WITHOUT_METADATA_BATCH_SIZE:
raise ServiceInvalidRequestError(
Expand Down Expand Up @@ -375,7 +374,7 @@ async def get_nearest_matches(
async def __get_batch(
self, collection_name: str, keys: List[str], with_embeddings: bool = False
) -> "FetchResponse":
index = pinecone.Index(collection_name)
index = self.pinecone.Index(collection_name)
if len(keys) > MAX_FETCH_BATCH_SIZE:
fetch_response = index.fetch(keys[0:MAX_FETCH_BATCH_SIZE])
for i in range(MAX_FETCH_BATCH_SIZE, len(keys), MAX_FETCH_BATCH_SIZE):
Expand Down
25 changes: 6 additions & 19 deletions python/semantic_kernel/utils/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,32 +102,19 @@ def postgres_settings_from_dot_env() -> str:
return connection_string


def pinecone_settings_from_dot_env() -> Tuple[str, Optional[str]]:
def pinecone_settings_from_dot_env() -> str:
"""
Reads the Pinecone API key and Environment from the .env file.
Reads the Pinecone API key from the .env file.
Returns:
Tuple[str, str]: The Pinecone API key, the Pinecone Environment
str: The Pinecone API key
"""

api_key, environment = None, None
with open(".env", "r") as f:
lines = f.readlines()

for line in lines:
if line.startswith("PINECONE_API_KEY"):
parts = line.split("=")[1:]
api_key = "=".join(parts).strip().strip('"')
continue

if line.startswith("PINECONE_ENVIRONMENT"):
parts = line.split("=")[1:]
environment = "=".join(parts).strip().strip('"')
continue
config = dotenv_values(".env")
api_key = config.get("PINECONE_API_KEY", None)

assert api_key, "Pinecone API key not found in .env file"
assert environment, "Pinecone environment not found in .env file"

return api_key, environment
return api_key


def astradb_settings_from_dot_env() -> Tuple[str, Optional[str]]:
Expand Down
Loading