microsoft · moonbox3 · Aug 6, 2024 · Jul 31, 2024 · Aug 1, 2024 · Aug 2, 2024
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
@@ -131,6 +131,7 @@ jobs:
           VERTEX_AI_PROJECT_ID: ${{ vars.VERTEX_AI_PROJECT_ID }}
           VERTEX_AI_GEMINI_MODEL_ID: ${{ vars.VERTEX_AI_GEMINI_MODEL_ID }}
           VERTEX_AI_EMBEDDING_MODEL_ID: ${{ vars.VERTEX_AI_EMBEDDING_MODEL_ID }}
+          REDIS_CONNECTION_STRING: ${{ vars.REDIS_CONNECTION_STRING }}
         run: |
           cd python
           poetry run pytest ./tests/integration ./tests/samples -v --junitxml=pytest.xml
@@ -242,6 +243,7 @@ jobs:
           VERTEX_AI_PROJECT_ID: ${{ vars.VERTEX_AI_PROJECT_ID }}
           VERTEX_AI_GEMINI_MODEL_ID: ${{ vars.VERTEX_AI_GEMINI_MODEL_ID }}
           VERTEX_AI_EMBEDDING_MODEL_ID: ${{ vars.VERTEX_AI_EMBEDDING_MODEL_ID }}
+          REDIS_CONNECTION_STRING: ${{ vars.REDIS_CONNECTION_STRING }}
         run: |
           if ${{ matrix.os == 'ubuntu-latest' }}; then
             docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest

@@ -10,8 +10,8 @@ omit =
     semantic_kernel/connectors/memory/mongodb_atlas/*
     semantic_kernel/connectors/memory/pinecone/*
     semantic_kernel/connectors/memory/postgres/*
-    semantic_kernel/connectors/memory/qdrant/*
-    semantic_kernel/connectors/memory/redis/*
+    semantic_kernel/connectors/memory/qdrant/qdrant_memory_store.py
+    semantic_kernel/connectors/memory/redis/redis_memory_store.py
     semantic_kernel/connectors/memory/usearch/*
     semantic_kernel/connectors/memory/weaviate/*
     semantic_kernel/reliability/*
@@ -33,4 +33,4 @@ exclude_lines =
     # TYPE_CHECKING and @overload blocks are never executed during pytest run
     if TYPE_CHECKING:
     @overload
-    @abstractmethod
+    @abstractmethod
@@ -47,6 +47,10 @@
         "protos",
         "endregion",
         "vertexai",
-        "aiplatform"
+        "aiplatform",
+        "serde",
+        "datamodel",
+        "vectorstoremodel",
+        "qdrant"
     ]
 }
@@ -26,6 +26,8 @@ ignore_errors = true
 [mypy-semantic_kernel.connectors.memory.astradb.*]
 ignore_errors = true
 
+[mypy-semantic_kernel.connectors.memory.azure_ai_search.*]
+ignore_errors = false
 [mypy-semantic_kernel.connectors.memory.azure_cognitive_search.*]
 ignore_errors = true
 
@@ -50,9 +52,13 @@ ignore_errors = true
 [mypy-semantic_kernel.connectors.memory.postgres.*]
 ignore_errors = true
 
+[mypy-semantic_kernel.connectors.memory.qdrant.qdrant_vector_record_store.*]
+ignore_errors = true
 [mypy-semantic_kernel.connectors.memory.qdrant.*]
 ignore_errors = true
 
+[mypy-semantic_kernel.connectors.memory.redis.redis_vector_record_store.*]
+ignore_errors = true
 [mypy-semantic_kernel.connectors.memory.redis.*]
 ignore_errors = true
 

@@ -57,8 +57,9 @@ chromadb = { version = ">=0.4.13,<0.6.0", optional = true}
 google-cloud-aiplatform = { version = "^1.60.0", optional = true}
 google-generativeai = { version = "^0.7.2", optional = true}
 # hugging face
-transformers = { version = "^4.28.1", extras=["torch"], optional = true}
+transformers = { version = "^4.28.1", extras=['torch'], optional = true}
 sentence-transformers = { version = "^2.2.2", optional = true}
+torch = {version = "2.2.2", optional = true}
 # mongo
 motor = { version = "^3.3.2", optional = true }
 # notebooks
@@ -73,20 +74,20 @@ ollama = { version = "^0.2.1", optional = true}
 # pinecone
 pinecone-client = { version = ">=3.0.0", optional = true}
 # postgres
-psycopg = { version="^3.1.9", extras=["binary","pool"], optional = true}
+psycopg = { version="^3.2.1", extras=["binary","pool"], optional = true}
 # qdrant
 qdrant-client = { version = '^1.9', optional = true}
 # redis
-redis = { version = "^4.6.0", optional = true}
+redis = { version = "^5.0.7", extras=['hiredis'], optional = true}
+types-redis = { version="^4.6.0.20240425", optional = true }
 # usearch
 usearch = { version = "^2.9", optional = true}
 pyarrow = { version = ">=12.0.1,<18.0.0", optional = true}
 weaviate-client = { version = ">=3.18,<5.0", optional = true}
-ruff = "0.5.2"
+pandas = {version = "^2.2.2", optional = true}
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = ">=3.7.1"
-ruff = ">=0.5"
 ipykernel = "^6.29.4"
 nbconvert = "^7.16.4"
 pytest = "^8.2.1"
@@ -96,6 +97,7 @@ pytest-asyncio = "^0.23.7"
 snoop = "^0.4.3"
 mypy = ">=1.10.0"
 types-PyYAML = "^6.0.12.20240311"
+ruff = "^0.5.2"
 
 [tool.poetry.group.unit-tests]
 optional = true
@@ -109,8 +111,14 @@ mistralai = "^0.4.1"
 ollama = "^0.2.1"
 google-cloud-aiplatform = "^1.60.0"
 google-generativeai = "^0.7.2"
-transformers = { version = "^4.28.1", extras=["torch"]}
-sentence-transformers = "^2.2.2"
+transformers = { version = "^4.28.1", extras=['torch']}
+sentence-transformers = { version = "^2.2.2"}
+torch = {version = "2.2.2"}
+# qdrant
+qdrant-client = '^1.9'
+# redis
+redis = { version = "^5.0.7", extras=['hiredis']}
+pandas = {version = "^2.2.2"}
 
 [tool.poetry.group.tests]
 optional = true
@@ -129,8 +137,9 @@ chromadb = ">=0.4.13,<0.6.0"
 google-cloud-aiplatform = "^1.60.0"
 google-generativeai = "^0.7.2"
 # hugging face
-transformers = { version = "^4.28.1", extras=["torch"]}
-sentence-transformers = "^2.2.2"
+transformers = { version = "^4.28.1", extras=['torch']}
+sentence-transformers = { version = "^2.2.2"}
+torch = {version = "2.2.2"}
 # milvus
 pymilvus = ">=2.3,<2.4.4"
 milvus = { version = ">=2.3,<2.3.8", markers = 'sys_platform != "win32"'}
@@ -147,21 +156,23 @@ psycopg = { version="^3.1.9", extras=["binary","pool"]}
 # qdrant
 qdrant-client = '^1.9'
 # redis
-redis = "^4.6.0"
+redis = { version="^5.0.7", extras=['hiredis']}
+types-redis = { version="^4.6.0.20240425" }
 # usearch
 usearch = "^2.9"
 pyarrow = ">=12.0.1,<18.0.0"
 # weaviate
 weaviate-client = ">=3.18,<5.0"
+pandas = {version = "^2.2.2"}
 
 # Extras are exposed to pip, this allows a user to easily add the right dependencies to their environment
 [tool.poetry.extras]
-all = ["transformers", "sentence-transformers", "qdrant-client", "chromadb", "pymilvus", "milvus", "mistralai", "ollama", "google", "weaviate-client", "pinecone-client", "psycopg", "redis", "azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "usearch", "pyarrow", "ipykernel", "motor"]
+all = ["transformers", "sentence-transformers", "torch", "qdrant-client", "chromadb", "pymilvus", "milvus", "mistralai", "ollama", "google", "weaviate-client", "pinecone-client", "psycopg", "redis", "azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "usearch", "pyarrow", "ipykernel", "motor"]
 
 azure = ["azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "msgraph-sdk"]
 chromadb = ["chromadb"]
 google = ["google-cloud-aiplatform", "google-generativeai"]
-hugging_face = ["transformers", "sentence-transformers"]
+hugging_face = ["transformers", "sentence-transformers", "torch"]
 milvus = ["pymilvus", "milvus"]
 mistralai = ["mistralai"]
 ollama = ["ollama"]
@@ -170,7 +181,7 @@ notebooks = ["ipykernel"]
 pinecone = ["pinecone-client"]
 postgres = ["psycopg"]
 qdrant = ["qdrant-client"]
-redis = ["redis"]
+redis = ["redis", "types-redis"]
 usearch = ["usearch", "pyarrow"]
 weaviate = ["weaviate-client"]
 

@@ -0,0 +1,160 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from dataclasses import dataclass, field
+from typing import Annotated, Any
+from uuid import uuid4
+
+from pandas import DataFrame
+from pydantic import Field
+
+from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel
+from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition
+from semantic_kernel.data.vector_store_record_fields import (
+    VectorStoreRecordDataField,
+    VectorStoreRecordKeyField,
+    VectorStoreRecordVectorField,
+)
+from semantic_kernel.kernel_pydantic import KernelBaseModel
+
+# This concept shows the different ways you can create a vector store data model
+# using dataclasses, Pydantic, and Python classes.
+# As well as using types like Pandas Dataframes.
+
+# There are a number of universal things about these data models:
+# they must specify the type of field through the annotation (or the definition).
+# there must be at least one field of type VectorStoreRecordKeyField.
+# If you set the embedding_property_name in the VectorStoreRecordDataField, that field must exist and be a vector field.
+# A unannotated field is allowed but must have a default value.
+
+# The purpose of these models is to be what you pass to and get back from a vector store.
+# There maybe limitations to data types that the vector store can handle,
+# so not every store will be able to handle completely the same model.
+# for instance, some stores only allow a string as the keyfield, while others allow str and int,
+# so defining the key with a int, might make some stores unusable.
+
+# The decorator takes the class and pulls out the fields and annotations to create a definition,
+# of type VectorStoreRecordDefinition.
+# This definition is used for the vector store to know how to handle the data model.
+
+# You can also create the definition yourself, and pass it to the vector stores together with a standard type,
+# like a dict or list.
+# Or you can use the definition in container mode with something like a Pandas Dataframe.
+
+
+# Data model using built-in Python dataclasses
+@vectorstoremodel
+@dataclass
+class DataModelDataclass:
+    vector: Annotated[list[float], VectorStoreRecordVectorField]
+    key: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
+    content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = (
+        "content1"
+    )
+    other: str | None = None
+
+
+# Data model using Pydantic BaseModels
+@vectorstoremodel
+class DataModelPydantic(KernelBaseModel):
+    vector: Annotated[list[float], VectorStoreRecordVectorField]
+    key: Annotated[str, VectorStoreRecordKeyField()] = Field(default_factory=lambda: str(uuid4()))
+    content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = (
+        "content1"
+    )
+    other: str | None = None
+
+
+# Data model using Pydantic BaseModels with mixed annotations (from pydantic and SK)
+@vectorstoremodel
+class DataModelPydanticComplex(KernelBaseModel):
+    vector: Annotated[list[float], VectorStoreRecordVectorField]
+    key: Annotated[str, Field(default_factory=lambda: str(uuid4())), VectorStoreRecordKeyField()]
+    content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = (
+        "content1"
+    )
+    other: str | None = None
+
+
+# Data model using Python classes
+# This one includes a custom serialize and deserialize method
+@vectorstoremodel
+class DataModelPython:
+    def __init__(
+        self,
+        vector: Annotated[list[float], VectorStoreRecordVectorField],
+        key: Annotated[str, VectorStoreRecordKeyField] = None,
+        content: Annotated[
+            str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")
+        ] = "content1",
+        other: str | None = None,
+    ):
+        self.vector = vector
+        self.other = other
+        self.key = key or str(uuid4())
+        self.content = content
+
+    def __str__(self) -> str:
+        return f"DataModelPython(vector={self.vector}, key={self.key}, content={self.content}, other={self.other})"
+
+    def serialize(self) -> dict[str, Any]:
+        return {
+            "vector": self.vector,
+            "key": self.key,
+            "content": self.content,
+        }
+
+    @classmethod
+    def deserialize(cls, obj: dict[str, Any]) -> "DataModelDataclass":
+        return cls(
+            vector=obj["vector"],
+            key=obj["key"],
+            content=obj["content"],
+        )
+
+
+# Data model definition for use with Pandas
+# note the container mode flag, which makes sure that records that are returned are in a container
+# even when requesting a batch of records.
+# There is also a to_dict and from_dict method, which are used to convert the data model to and from a dict,
+# these should be specific to the type used, if using dict as type then these can be left off.
+data_model_definition_pandas = VectorStoreRecordDefinition(
+    fields={
+        "vector": VectorStoreRecordVectorField(property_type="list[float]"),
+        "key": VectorStoreRecordKeyField(property_type="str"),
+        "content": VectorStoreRecordDataField(
+            property_type="str", has_embedding=True, embedding_property_name="vector"
+        ),
+    },
+    container_mode=True,
+    to_dict=lambda record, **_: record.to_dict(orient="records"),
+    from_dict=lambda records, **_: DataFrame(records),
+)
+
+
+if __name__ == "__main__":
+    data_item1 = DataModelDataclass(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
+    data_item2 = DataModelPydantic(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
+    data_item3 = DataModelPydanticComplex(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
+    data_item4 = DataModelPython(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None)
+    print("Example records:")
+    print(f"DataClass:\n  {data_item1}", end="\n\n")
+    print(f"Pydantic:\n  {data_item2}", end="\n\n")
+    print(f"Pydantic with annotations:\n  {data_item3}", end="\n\n")
+    print(f"Python:\n  {data_item4}", end="\n\n")
+
+    print("Item definitions:")
+    print(f"DataClass:\n  {data_item1.__kernel_vectorstoremodel_definition__}", end="\n\n")
+    print(f"Pydantic:\n  {data_item2.__kernel_vectorstoremodel_definition__}", end="\n\n")
+    print(f"Pydantic with annotations:\n  {data_item3.__kernel_vectorstoremodel_definition__}", end="\n\n")
+    print(f"Python:\n  {data_item4.__kernel_vectorstoremodel_definition__}", end="\n\n")
+    print(f"Definition for use with Pandas:\n  {data_model_definition_pandas}", end="\n\n")
+    if (
+        data_item1.__kernel_vectorstoremodel_definition__.fields
+        == data_item2.__kernel_vectorstoremodel_definition__.fields
+        == data_item3.__kernel_vectorstoremodel_definition__.fields
+        == data_item4.__kernel_vectorstoremodel_definition__.fields
+        == data_model_definition_pandas.fields
+    ):
+        print("All data models are the same")
+    else:
+        print("Data models are not the same")