microsoft · moonbox3 · May 26, 2025 · May 23, 2025 · May 23, 2025 · May 26, 2025
diff --git a/python/samples/concepts/caching/semantic_caching.py b/python/samples/concepts/caching/semantic_caching.py
@@ -10,15 +10,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding
 from semantic_kernel.connectors.memory.in_memory import InMemoryStore
-from semantic_kernel.data import (
-    VectorSearchOptions,
-    VectorStore,
-    VectorStoreDataField,
-    VectorStoreKeyField,
-    VectorStoreRecordCollection,
-    VectorStoreVectorField,
-    vectorstoremodel,
-)
+from semantic_kernel.data import VectorStore, VectorStoreField, VectorStoreRecordCollection, vectorstoremodel
 from semantic_kernel.filters import FilterTypes, FunctionInvocationContext, PromptRenderContext
 from semantic_kernel.functions import FunctionResult
 
@@ -32,9 +24,9 @@
 @vectorstoremodel(collection_name=COLLECTION_NAME)
 @dataclass
 class CacheRecord:
-    result: Annotated[str, VectorStoreDataField(is_full_text_indexed=True)]
-    prompt: Annotated[str | None, VectorStoreVectorField(dimensions=1536)] = None
-    id: Annotated[str, VectorStoreKeyField] = field(default_factory=lambda: str(uuid4()))
+    result: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
+    prompt: Annotated[str | None, VectorStoreField("vector", dimensions=1536)] = None
+    id: Annotated[str, VectorStoreField("key")] = field(default_factory=lambda: str(uuid4()))
 
 
 # Define the filters, one for caching the results and one for using the cache.
@@ -66,9 +58,7 @@ async def on_prompt_render(
         """
         await next(context)
         await self.collection.ensure_collection_exists()
-        results = await self.collection.search(
-            context.rendered_prompt, options=VectorSearchOptions(vector_property_name="prompt", top=1)
-        )
+        results = await self.collection.search(context.rendered_prompt, vector_property_name="prompt", top=1)
         async for result in results.results:
             if result.score and result.score < self.score_threshold:
                 context.function_result = FunctionResult(

diff --git a/python/samples/concepts/chat_history/store_chat_history_in_cosmosdb.py b/python/samples/concepts/chat_history/store_chat_history_in_cosmosdb.py
@@ -11,13 +11,7 @@
 from semantic_kernel.contents import ChatHistory, ChatMessageContent
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
-from semantic_kernel.data import (
-    VectorStore,
-    VectorStoreDataField,
-    VectorStoreKeyField,
-    VectorStoreRecordCollection,
-    vectorstoremodel,
-)
+from semantic_kernel.data import VectorStore, VectorStoreField, VectorStoreRecordCollection, vectorstoremodel
 
 """
 This sample demonstrates how to build a conversational chatbot
@@ -39,9 +33,9 @@
 @vectorstoremodel
 @dataclass
 class ChatHistoryModel:
-    session_id: Annotated[str, VectorStoreKeyField]
-    user_id: Annotated[str, VectorStoreDataField(is_indexed=True)]
-    messages: Annotated[list[dict[str, str]], VectorStoreDataField(is_indexed=True)]
+    session_id: Annotated[str, VectorStoreField("key")]
+    user_id: Annotated[str, VectorStoreField("data", is_indexed=True)]
+    messages: Annotated[list[dict[str, str]], VectorStoreField("data", is_indexed=True)]
 
 
 # 2. We then create a class that extends the ChatHistory class

diff --git a/python/samples/concepts/memory/azure_ai_search_hotel_samples/data_model.py b/python/samples/concepts/memory/azure_ai_search_hotel_samples/data_model.py
@@ -15,7 +15,7 @@
 )
 from pydantic import BaseModel, ConfigDict
 
-from semantic_kernel.data import VectorStoreDataField, VectorStoreKeyField, VectorStoreVectorField, vectorstoremodel
+from semantic_kernel.data import VectorStoreField, vectorstoremodel
 
 """
 The data model used for this sample is based on the hotel data model from the Azure AI Search samples.
@@ -55,29 +55,20 @@ class Address(BaseModel):
 
 @vectorstoremodel(collection_name="hotel-index")
 class HotelSampleClass(BaseModel):
-    HotelId: Annotated[str, VectorStoreKeyField]
-    HotelName: Annotated[str | None, VectorStoreDataField()] = None
-    Description: Annotated[
-        str,
-        VectorStoreDataField(is_full_text_indexed=True),
-    ]
-    DescriptionVector: Annotated[
-        list[float] | str | None,
-        VectorStoreVectorField(dimensions=1536),
-    ] = None
-    Description_fr: Annotated[str, VectorStoreDataField(is_full_text_indexed=True)]
-    DescriptionFrVector: Annotated[
-        list[float] | str | None,
-        VectorStoreVectorField(dimensions=1536),
-    ] = None
-    Category: Annotated[str, VectorStoreDataField()]
-    Tags: Annotated[list[str], VectorStoreDataField(is_indexed=True)]
-    ParkingIncluded: Annotated[bool | None, VectorStoreDataField()] = None
-    LastRenovationDate: Annotated[str | None, VectorStoreDataField(type=SearchFieldDataType.DateTimeOffset)] = None
-    Rating: Annotated[float, VectorStoreDataField()]
-    Location: Annotated[dict[str, Any], VectorStoreDataField(type=SearchFieldDataType.GeographyPoint)]
-    Address: Annotated[Address, VectorStoreDataField()]
-    Rooms: Annotated[list[Rooms], VectorStoreDataField()]
+    HotelId: Annotated[str, VectorStoreField("key")]
+    HotelName: Annotated[str | None, VectorStoreField("data")] = None
+    Description: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
+    DescriptionVector: Annotated[list[float] | str | None, VectorStoreField("vector", dimensions=1536)] = None
+    Description_fr: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
+    DescriptionFrVector: Annotated[list[float] | str | None, VectorStoreField("vector", dimensions=1536)] = None
+    Category: Annotated[str, VectorStoreField("data")]
+    Tags: Annotated[list[str], VectorStoreField("data", is_indexed=True)]
+    ParkingIncluded: Annotated[bool | None, VectorStoreField("data")] = None
+    LastRenovationDate: Annotated[str | None, VectorStoreField("data", type=SearchFieldDataType.DateTimeOffset)] = None
+    Rating: Annotated[float, VectorStoreField("data")]
+    Location: Annotated[dict[str, Any], VectorStoreField("data", type=SearchFieldDataType.GeographyPoint)]
+    Address: Annotated[Address, VectorStoreField("data")]
+    Rooms: Annotated[list[Rooms], VectorStoreField("data")]
 
     model_config = ConfigDict(extra="ignore")
 

diff --git a/python/samples/concepts/memory/complex_memory.py b/python/samples/concepts/memory/complex_memory.py
@@ -26,13 +26,8 @@
     SqlServerCollection,
     WeaviateCollection,
 )
-from semantic_kernel.data import (
-    VectorStoreDataField,
-    VectorStoreKeyField,
-    VectorStoreRecordCollection,
-    VectorStoreVectorField,
-    vectorstoremodel,
-)
+from semantic_kernel.data import VectorStoreRecordCollection, vectorstoremodel
+from semantic_kernel.data.definitions import VectorStoreField
 from semantic_kernel.data.vectors import SearchType, VectorSearch
 
 # This is a rather complex sample, showing how to use the vector store
@@ -48,14 +43,19 @@
 @vectorstoremodel(collection_name="test")
 @dataclass
 class DataModel:
-    title: Annotated[str, VectorStoreDataField(is_full_text_indexed=True)]
-    content: Annotated[str, VectorStoreDataField(is_full_text_indexed=True)]
+    title: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
+    content: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
     embedding: Annotated[
         str | None,
-        VectorStoreVectorField(dimensions=1536, type_="float"),
+        VectorStoreField("vector", dimensions=1536, type_="float"),
     ] = None
-    id: Annotated[str, VectorStoreKeyField()] = field(default_factory=lambda: str(uuid4()))
-    tag: Annotated[str | None, VectorStoreDataField(type_="str", is_indexed=True)] = None
+    id: Annotated[
+        str,
+        VectorStoreField(
+            "key",
+        ),
+    ] = field(default_factory=lambda: str(uuid4()))
+    tag: Annotated[str | None, VectorStoreField("data", type_="str", is_indexed=True)] = None
 
     def __post_init__(self, **kwargs):
         if self.embedding is None:

diff --git a/python/samples/concepts/memory/data_models.py b/python/samples/concepts/memory/data_models.py
@@ -7,22 +7,15 @@
 from pandas import DataFrame
 from pydantic import BaseModel, Field
 
-from semantic_kernel.data import (
-    VectorStoreCollectionDefinition,
-    VectorStoreDataField,
-    VectorStoreKeyField,
-    VectorStoreVectorField,
-    vectorstoremodel,
-)
+from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField, vectorstoremodel
 
 # This concept shows the different ways you can create a vector store data model
 # using dataclasses, Pydantic, and Python classes.
 # As well as using types like Pandas Dataframes.
 
 # There are a number of universal things about these data models:
 # they must specify the type of field through the annotation (or the definition).
-# there must be at least one field of type VectorStoreRecordKeyField.
-# If you set the embedding_property_name in the VectorStoreRecordDataField, that field must exist and be a vector field.
+# there must be at least one field of type `key`.
 # A unannotated field is allowed but must have a default value.
 
 # The purpose of these models is to be what you pass to and get back from a vector store.
@@ -32,7 +25,7 @@
 # so defining the key with a int, might make some stores unusable.
 
 # The decorator takes the class and pulls out the fields and annotations to create a definition,
-# of type VectorStoreRecordDefinition.
+# of type VectorStoreCollectionDefinition.
 # This definition is used for the vector store to know how to handle the data model.
 
 # You can also create the definition yourself, and pass it to the vector stores together with a standard type,
@@ -44,18 +37,18 @@
 @vectorstoremodel
 @dataclass
 class DataModelDataclass:
-    vector: Annotated[list[float], VectorStoreVectorField]
-    key: Annotated[str, VectorStoreKeyField()] = field(default_factory=lambda: str(uuid4()))
-    content: Annotated[str, VectorStoreDataField(has_embedding=True, embedding_property_name="vector")] = "content1"
+    vector: Annotated[list[float] | None, VectorStoreField("vector", dimensions=3)] = None
+    key: Annotated[str, VectorStoreField("key")] = field(default_factory=lambda: str(uuid4()))
+    content: Annotated[str, VectorStoreField("data")] = "content1"
     other: str | None = None
 
 
 # Data model using Pydantic BaseModels
 @vectorstoremodel
 class DataModelPydantic(BaseModel):
-    id: Annotated[str, VectorStoreKeyField()] = Field(default_factory=lambda: str(uuid4()))
-    content: Annotated[str, VectorStoreDataField(has_embedding=True, embedding_property_name="vector")] = "content1"
-    vector: Annotated[list[float], VectorStoreVectorField]
+    id: Annotated[str, VectorStoreField("key")] = Field(default_factory=lambda: str(uuid4()))
+    content: Annotated[str, VectorStoreField("data")] = "content1"
+    vector: Annotated[list[float] | None, VectorStoreField("vector", dimensions=3)] = None
     other: str | None = None
 
 
@@ -65,11 +58,9 @@ class DataModelPydantic(BaseModel):
 class DataModelPython:
     def __init__(
         self,
-        vector: Annotated[list[float], VectorStoreVectorField],
-        key: Annotated[str, VectorStoreKeyField] = None,
-        content: Annotated[
-            str, VectorStoreDataField(has_embedding=True, embedding_property_name="vector")
-        ] = "content1",
+        key: Annotated[str | None, VectorStoreField("key")] = None,
+        vector: Annotated[list[float], VectorStoreField("vector", dimensions=3)] = None,
+        content: Annotated[str, VectorStoreField("data")] = "content1",
         other: str | None = None,
     ):
         self.vector = vector
@@ -88,7 +79,7 @@ def serialize(self) -> dict[str, Any]:
         }
 
     @classmethod
-    def deserialize(cls, obj: dict[str, Any]) -> "DataModelDataclass":
+    def deserialize(cls, obj: dict[str, Any]) -> "DataModelPython":
         return cls(
             vector=obj["vector"],
             key=obj["key"],
@@ -102,11 +93,11 @@ def deserialize(cls, obj: dict[str, Any]) -> "DataModelDataclass":
 # There is also a to_dict and from_dict method, which are used to convert the data model to and from a dict,
 # these should be specific to the type used, if using dict as type then these can be left off.
 definition_pandas = VectorStoreCollectionDefinition(
-    fields={
-        "vector": VectorStoreVectorField(type_="list[float]"),
-        "key": VectorStoreKeyField(type_="str"),
-        "content": VectorStoreDataField(type_="str", has_embedding=True, embedding_property_name="vector"),
-    },
+    fields=[
+        VectorStoreField("vector", name="vector", type="float", dimensions=3),
+        VectorStoreField("key", name="key", type="str"),
+        VectorStoreField("data", name="content", type="str"),
+    ],
     container_mode=True,
     to_dict=lambda record, **_: record.to_dict(orient="records"),
     from_dict=lambda records, **_: DataFrame(records),

diff --git a/python/samples/concepts/memory/memory_with_pandas.py b/python/samples/concepts/memory/memory_with_pandas.py
@@ -5,62 +5,67 @@
 
 import pandas as pd
 
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.open_ai import OpenAIEmbeddingPromptExecutionSettings, OpenAITextEmbedding
+from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
 from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
-from semantic_kernel.data import (
-    VectorStoreCollectionDefinition,
-    VectorStoreDataField,
-    VectorStoreKeyField,
-    VectorStoreVectorField,
-)
-from semantic_kernel.data.vectors import add_vector_to_records
+from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField
 
-model_fields = VectorStoreCollectionDefinition(
-    container_mode=True,
-    fields={
-        "content": VectorStoreDataField(has_embedding=True, embedding_property_name="vector"),
-        "id": VectorStoreKeyField(),
-        "vector": VectorStoreVectorField(
-            embedding_settings={"embedding": OpenAIEmbeddingPromptExecutionSettings(dimensions=1536)}
+definition = VectorStoreCollectionDefinition(
+    collection_name="pandas_test_index",
+    fields=[
+        VectorStoreField("key", name="id", type="str"),
+        VectorStoreField("data", name="title", type="str"),
+        VectorStoreField("data", name="content", type="str", is_full_text_indexed=True),
+        VectorStoreField(
+            "vector",
+            name="vector",
+            type="float",
+            dimensions=1536,
+            embedding_generator=OpenAITextEmbedding(ai_model_id="text-embedding-3-small"),
         ),
-    },
+    ],
     to_dict=lambda record, **_: record.to_dict(orient="records"),
     from_dict=lambda records, **_: pd.DataFrame(records),
+    container_mode=True,
 )
 
 
 async def main():
-    # setup the kernel
-    kernel = Kernel()
-    kernel.add_service(OpenAITextEmbedding(service_id="embedding", ai_model_id="text-embedding-3-small"))
-
     # create the record collection
-    async with AzureAISearchCollection[pd.DataFrame](
+    async with AzureAISearchCollection[str, pd.DataFrame](
         record_type=pd.DataFrame,
-        definition=model_fields,
-    ) as record_collection:
+        definition=definition,
+    ) as collection:
+        await collection.ensure_collection_exists()
         # create some records
         records = [
-            {"id": str(uuid4()), "content": "my dict text", "vector": None},
-            {"id": str(uuid4()), "content": "my second text", "vector": None},
+            {
+                "id": str(uuid4()),
+                "title": "Document about Semantic Kernel.",
+                "content": "Semantic Kernel is a framework for building AI applications.",
+            },
+            {
+                "id": str(uuid4()),
+                "title": "Document about Python",
+                "content": "Python is a programming language that lets you work quickly.",
+            },
         ]
 
-        # create the dataframe and add the embeddings
+        # create the dataframe and add the content you want to embed to a new column
         df = pd.DataFrame(records)
-        df = await add_vector_to_records(kernel, df, None, definition=model_fields)
-        print("Records with embeddings:")
-        print(df.shape)
-        print(df.head(5))
-
+        df["vector"] = df.apply(lambda row: f"title: {row['title']}, content: {row['content']}", axis=1)
+        print(df.head(1))
         # upsert the records (for a container, upsert and upsert_batch are equivalent)
-        await record_collection.upsert_batch(df)
+        await collection.upsert(df)
 
         # retrieve a record
-        result = await record_collection.get(records[0]["id"])
-        print("Retrieved records:")
-        print(result.shape)
-        print(result.head(5))
+        result = await collection.get(top=2)
+        if result is None:
+            print("No records found, this is sometimes because the get is too fast and the index is not ready yet.")
+        else:
+            print("Retrieved records:")
+            print(result.to_string())
+
+        await collection.ensure_collection_deleted()
 
 
 if __name__ == "__main__":