diff --git a/python/samples/concepts/caching/semantic_caching.py b/python/samples/concepts/caching/semantic_caching.py
index 2a175dd4ca8a..c2353cde3a7c 100644
--- a/python/samples/concepts/caching/semantic_caching.py
+++ b/python/samples/concepts/caching/semantic_caching.py
@@ -10,7 +10,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding
 from semantic_kernel.connectors.memory.in_memory import InMemoryStore
-from semantic_kernel.data import VectorStore, VectorStoreField, VectorStoreRecordCollection, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStore, VectorStoreCollection, VectorStoreField, vectorstoremodel
 from semantic_kernel.filters import FilterTypes, FunctionInvocationContext, PromptRenderContext
 from semantic_kernel.functions import FunctionResult
 
@@ -41,9 +41,7 @@ def __init__(
         if vector_store.embedding_generator is None:
             raise ValueError("The vector store must have an embedding generator.")
         self.vector_store = vector_store
-        self.collection: VectorStoreRecordCollection[str, CacheRecord] = vector_store.get_collection(
-            record_type=CacheRecord
-        )
+        self.collection: VectorStoreCollection[str, CacheRecord] = vector_store.get_collection(record_type=CacheRecord)
         self.score_threshold = score_threshold
 
     async def on_prompt_render(
diff --git a/python/samples/concepts/chat_history/store_chat_history_in_cosmosdb.py b/python/samples/concepts/chat_history/store_chat_history_in_cosmosdb.py
index c20ea1841e28..3dd30e6c74ac 100644
--- a/python/samples/concepts/chat_history/store_chat_history_in_cosmosdb.py
+++ b/python/samples/concepts/chat_history/store_chat_history_in_cosmosdb.py
@@ -11,7 +11,7 @@
 from semantic_kernel.contents import ChatHistory, ChatMessageContent
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
-from semantic_kernel.data import VectorStore, VectorStoreField, VectorStoreRecordCollection, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStore, VectorStoreCollection, VectorStoreField, vectorstoremodel
 
 """
 This sample demonstrates how to build a conversational chatbot
@@ -49,7 +49,7 @@ class ChatHistoryInCosmosDB(ChatHistory):
     session_id: str
     user_id: str
     store: VectorStore
-    collection: VectorStoreRecordCollection[str, ChatHistoryModel] | None = None
+    collection: VectorStoreCollection[str, ChatHistoryModel] | None = None
 
     async def create_collection(self, collection_name: str) -> None:
         """Create a collection with the inbuild data model using the vector store.
diff --git a/python/samples/concepts/memory/azure_ai_search_hotel_samples/data_model.py b/python/samples/concepts/memory/azure_ai_search_hotel_samples/data_model.py
index 6c290b4b4257..578c3661fb1a 100644
--- a/python/samples/concepts/memory/azure_ai_search_hotel_samples/data_model.py
+++ b/python/samples/concepts/memory/azure_ai_search_hotel_samples/data_model.py
@@ -15,7 +15,7 @@
 )
 from pydantic import BaseModel, ConfigDict
 
-from semantic_kernel.data import VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreField, vectorstoremodel
 
 """
 The data model used for this sample is based on the hotel data model from the Azure AI Search samples.
diff --git a/python/samples/concepts/memory/complex_memory.py b/python/samples/concepts/memory/complex_memory.py
index 16ebc3cf92af..12cdd49d6966 100644
--- a/python/samples/concepts/memory/complex_memory.py
+++ b/python/samples/concepts/memory/complex_memory.py
@@ -26,9 +26,13 @@
     SqlServerCollection,
     WeaviateCollection,
 )
-from semantic_kernel.data import VectorStoreRecordCollection, vectorstoremodel
-from semantic_kernel.data.definitions import VectorStoreField
-from semantic_kernel.data.vectors import SearchType, VectorSearch
+from semantic_kernel.data.vectors import (
+    SearchType,
+    VectorSearchProtocol,
+    VectorStoreCollection,
+    VectorStoreField,
+    vectorstoremodel,
+)
 
 # This is a rather complex sample, showing how to use the vector store
 # with a number of different collections.
@@ -46,8 +50,8 @@ class DataModel:
     title: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
     content: Annotated[str, VectorStoreField("data", is_full_text_indexed=True)]
     embedding: Annotated[
-        str | None,
-        VectorStoreField("vector", dimensions=1536, type_="float"),
+        list[float] | str | None,
+        VectorStoreField("vector", dimensions=1536, type="float"),
     ] = None
     id: Annotated[
         str,
@@ -55,7 +59,7 @@ class DataModel:
             "key",
         ),
     ] = field(default_factory=lambda: str(uuid4()))
-    tag: Annotated[str | None, VectorStoreField("data", type_="str", is_indexed=True)] = None
+    tag: Annotated[str | None, VectorStoreField("data", type="str", is_indexed=True)] = None
 
     def __post_init__(self, **kwargs):
         if self.embedding is None:
@@ -94,7 +98,7 @@ def __post_init__(self, **kwargs):
 # function which returns the collection.
 # Using a function allows for lazy initialization of the collection,
 # so that settings for unused collections do not cause validation errors.
-collections: dict[str, Callable[[], VectorStoreRecordCollection]] = {
+collections: dict[str, Callable[[], VectorStoreCollection]] = {
     "ai_search": lambda: AzureAISearchCollection[str, DataModel](record_type=DataModel),
     "postgres": lambda: PostgresCollection[str, DataModel](record_type=DataModel),
     "redis_json": lambda: RedisJsonCollection[str, DataModel](
@@ -143,6 +147,7 @@ async def main(collection: str, use_azure_openai: bool):
     )
     kernel.add_service(embedder)
     async with collections[collection]() as record_collection:
+        assert isinstance(record_collection, VectorSearchProtocol)  # nosec
         record_collection.embedding_generator = embedder
         print_with_color(f"Creating {collection} collection!", Colors.CGREY)
         # cleanup any existing collection
@@ -172,7 +177,7 @@ async def main(collection: str, use_azure_openai: bool):
         keys = await record_collection.upsert(records)
         print(f"    Upserted {keys=}")
         print_with_color("Getting records!", Colors.CBLUE)
-        results = await record_collection.get(top=10, order_by={"field": "content"})
+        results = await record_collection.get(top=10, order_by="content")
         if results:
             [print_record(record=result) for result in results]
         else:
@@ -187,7 +192,6 @@ async def main(collection: str, use_azure_openai: bool):
         print_with_color("Now we can start searching.", Colors.CBLUE)
         print_with_color("  For each type of search, enter a search term, for instance `python`.", Colors.CBLUE)
         print_with_color("  Enter exit to exit, and skip or nothing to skip this search.", Colors.CBLUE)
-        assert isinstance(record_collection, VectorSearch)  # nosec
         print("-" * 30)
         print_with_color(
             "This collection supports the following search types: "
diff --git a/python/samples/concepts/memory/data_models.py b/python/samples/concepts/memory/data_models.py
index 4aaf8dfc74c4..e2f539a8bb23 100644
--- a/python/samples/concepts/memory/data_models.py
+++ b/python/samples/concepts/memory/data_models.py
@@ -7,7 +7,7 @@
 from pandas import DataFrame
 from pydantic import BaseModel, Field
 
-from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreCollectionDefinition, VectorStoreField, vectorstoremodel
 
 # This concept shows the different ways you can create a vector store data model
 # using dataclasses, Pydantic, and Python classes.
diff --git a/python/samples/concepts/memory/memory_with_pandas.py b/python/samples/concepts/memory/memory_with_pandas.py
index 956b643cd23c..82a42ae46aa9 100644
--- a/python/samples/concepts/memory/memory_with_pandas.py
+++ b/python/samples/concepts/memory/memory_with_pandas.py
@@ -7,7 +7,7 @@
 
 from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
 from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
-from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField
+from semantic_kernel.data.vectors import VectorStoreCollectionDefinition, VectorStoreField
 
 definition = VectorStoreCollectionDefinition(
     collection_name="pandas_test_index",
diff --git a/python/samples/concepts/memory/simple_memory.py b/python/samples/concepts/memory/simple_memory.py
index 85ee6d7cb2a6..117e411761db 100644
--- a/python/samples/concepts/memory/simple_memory.py
+++ b/python/samples/concepts/memory/simple_memory.py
@@ -10,7 +10,7 @@
 from samples.concepts.resources.utils import Colors, print_with_color
 from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
 from semantic_kernel.connectors.memory import InMemoryCollection
-from semantic_kernel.data import VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreField, vectorstoremodel
 
 # This is the most basic example of a vector store and collection
 # For a more complex example, using different collection types, see "complex_memory.py"
diff --git a/python/samples/concepts/on_your_data/azure_chat_gpt_with_data_api.py b/python/samples/concepts/on_your_data/azure_chat_gpt_with_data_api.py
index f2a382511988..3f3e9ce58863 100644
--- a/python/samples/concepts/on_your_data/azure_chat_gpt_with_data_api.py
+++ b/python/samples/concepts/on_your_data/azure_chat_gpt_with_data_api.py
@@ -10,7 +10,7 @@
     AzureChatPromptExecutionSettings,
     ExtraBody,
 )
-from semantic_kernel.connectors.memory.azure_cognitive_search.azure_ai_search_settings import AzureAISearchSettings
+from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchSettings
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.functions import KernelArguments
 from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
diff --git a/python/samples/concepts/rag/rag_with_vector_collection.py b/python/samples/concepts/rag/rag_with_vector_collection.py
index 9c95d67789bf..297a20cd6be6 100644
--- a/python/samples/concepts/rag/rag_with_vector_collection.py
+++ b/python/samples/concepts/rag/rag_with_vector_collection.py
@@ -11,7 +11,7 @@
     OpenAITextEmbedding,
 )
 from semantic_kernel.connectors.memory import InMemoryCollection
-from semantic_kernel.data import VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreField, vectorstoremodel
 from semantic_kernel.functions import KernelArguments
 
 """
diff --git a/python/samples/concepts/rag/self_critique_rag.py b/python/samples/concepts/rag/self_critique_rag.py
index 7e131ab79747..32b90bd1adc8 100644
--- a/python/samples/concepts/rag/self_critique_rag.py
+++ b/python/samples/concepts/rag/self_critique_rag.py
@@ -9,7 +9,7 @@
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding
 from semantic_kernel.connectors.memory import AzureAISearchCollection
 from semantic_kernel.contents import ChatHistory
-from semantic_kernel.data import VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreField, vectorstoremodel
 from semantic_kernel.functions.kernel_function import KernelFunction
 
 """
diff --git a/python/samples/concepts/search/bing_text_search_as_plugin.py b/python/samples/concepts/search/bing_text_search_as_plugin.py
deleted file mode 100644
index 53968f10ec21..000000000000
--- a/python/samples/concepts/search/bing_text_search_as_plugin.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from collections.abc import Awaitable, Callable
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai import (
-    OpenAIChatCompletion,
-    OpenAIChatPromptExecutionSettings,
-)
-from semantic_kernel.connectors.search.bing import BingSearch
-from semantic_kernel.contents import ChatHistory
-from semantic_kernel.filters import FilterTypes, FunctionInvocationContext
-from semantic_kernel.functions import KernelArguments, KernelParameterMetadata, KernelPlugin
-
-kernel = Kernel()
-kernel.add_service(OpenAIChatCompletion(service_id="chat"))
-kernel.add_plugin(
-    KernelPlugin.from_text_search_with_search(
-        BingSearch(),
-        plugin_name="bing",
-        description="Get details about Semantic Kernel concepts.",
-        parameters=[
-            KernelParameterMetadata(
-                name="query",
-                description="The search query.",
-                type="str",
-                is_required=True,
-                type_object=str,
-            ),
-            KernelParameterMetadata(
-                name="top",
-                description="The number of results to return.",
-                type="int",
-                is_required=False,
-                default_value=2,
-                type_object=int,
-            ),
-            KernelParameterMetadata(
-                name="skip",
-                description="The number of results to skip.",
-                type="int",
-                is_required=False,
-                default_value=0,
-                type_object=int,
-            ),
-            KernelParameterMetadata(
-                name="site",
-                description="The site to search.",
-                default_value="https://github.com/microsoft/semantic-kernel/tree/main/python",
-                type="str",
-                is_required=False,
-                type_object=str,
-            ),
-        ],
-    )
-)
-chat_function = kernel.add_function(
-    prompt="{{$chat_history}}{{$user_input}}",
-    plugin_name="ChatBot",
-    function_name="Chat",
-)
-execution_settings = OpenAIChatPromptExecutionSettings(
-    service_id="chat",
-    max_tokens=2000,
-    temperature=0.7,
-    top_p=0.8,
-    function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True),
-)
-
-history = ChatHistory()
-system_message = """
-You are a chat bot, specialized in Semantic Kernel, Microsoft LLM orchestration SDK.
-Assume questions are related to that, and use the Bing search plugin to find answers.
-"""
-history.add_system_message(system_message)
-history.add_user_message("Hi there, who are you?")
-history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
-
-arguments = KernelArguments(settings=execution_settings)
-
-
-@kernel.filter(filter_type=FilterTypes.FUNCTION_INVOCATION)
-async def log_bing_filter(
-    context: FunctionInvocationContext, next: Callable[[FunctionInvocationContext], Awaitable[None]]
-):
-    if context.function.plugin_name == "bing":
-        print("Calling Bing search with arguments:")
-        if "query" in context.arguments:
-            print(f'  Query: "{context.arguments["query"]}"')
-        if "count" in context.arguments:
-            print(f'  Count: "{context.arguments["count"]}"')
-        if "skip" in context.arguments:
-            print(f'  Skip: "{context.arguments["skip"]}"')
-        await next(context)
-        print("Bing search completed.")
-    else:
-        await next(context)
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-    arguments["user_input"] = user_input
-    arguments["chat_history"] = history
-    result = await kernel.invoke(chat_function, arguments=arguments)
-    print(f"Mosscap:> {result}")
-    history.add_user_message(user_input)
-    history.add_assistant_message(str(result))
-    return True
-
-
-async def main():
-    chatting = True
-    print(
-        "Welcome to the chat bot!\
-        \n  Type 'exit' to exit.\
-        \n  Try to find out more about the inner workings of Semantic Kernel."
-    )
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(main())
diff --git a/python/samples/concepts/search/brave_text_search_as_plugin.py b/python/samples/concepts/search/brave_text_search_as_plugin.py
index 326ee853283e..ca18dd924030 100644
--- a/python/samples/concepts/search/brave_text_search_as_plugin.py
+++ b/python/samples/concepts/search/brave_text_search_as_plugin.py
@@ -8,7 +8,7 @@
 from semantic_kernel.connectors.search.brave import BraveSearch
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.filters import FilterTypes, FunctionInvocationContext
-from semantic_kernel.functions import KernelArguments, KernelParameterMetadata, KernelPlugin
+from semantic_kernel.functions import KernelArguments, KernelParameterMetadata
 
 """
 This project demonstrates how to integrate the Brave Search API as a plugin into the Semantic Kernel 
@@ -21,10 +21,10 @@
 
 kernel = Kernel()
 kernel.add_service(OpenAIChatCompletion(service_id="chat"))
-kernel.add_plugin(
-    KernelPlugin.from_text_search_with_search(
-        BraveSearch(),
-        plugin_name="brave",
+kernel.add_function(
+    plugin_name="brave",
+    function=BraveSearch().create_search_function(
+        function_name="brave_search",
         description="Get details about Semantic Kernel concepts.",
         parameters=[
             KernelParameterMetadata(
@@ -51,7 +51,7 @@
                 type_object=int,
             ),
         ],
-    )
+    ),
 )
 chat_function = kernel.add_function(
     prompt="{{$chat_history}}{{$user_input}}",
diff --git a/python/samples/getting_started/third_party/postgres-memory.ipynb b/python/samples/getting_started/third_party/postgres-memory.ipynb
index 4b0f6fbef9ce..a45f807242f5 100644
--- a/python/samples/getting_started/third_party/postgres-memory.ipynb
+++ b/python/samples/getting_started/third_party/postgres-memory.ipynb
@@ -1,710 +1,710 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "# Using Postgres as memory\n",
-                "\n",
-                "This notebook shows how to use Postgres as a memory store in Semantic Kernel.\n",
-                "\n",
-                "The code below pulls the most recent papers from [ArviX](https://arxiv.org/), creates embeddings from the paper abstracts, and stores them in a Postgres database.\n",
-                "\n",
-                "In the future, we can use the Postgres vector store to search the database for similar papers based on the embeddings - stay tuned!"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "import textwrap\n",
-                "import xml.etree.ElementTree as ET\n",
-                "from dataclasses import dataclass\n",
-                "from datetime import datetime\n",
-                "from typing import Annotated, Any\n",
-                "\n",
-                "import requests\n",
-                "\n",
-                "from semantic_kernel import Kernel\n",
-                "from semantic_kernel.connectors.ai import FunctionChoiceBehavior\n",
-                "from semantic_kernel.connectors.ai.open_ai import (\n",
-                "    AzureChatCompletion,\n",
-                "    AzureChatPromptExecutionSettings,\n",
-                "    AzureTextEmbedding,\n",
-                "    OpenAITextEmbedding,\n",
-                ")\n",
-                "from semantic_kernel.connectors.memory.postgres import PostgresCollection\n",
-                "from semantic_kernel.contents import ChatHistory\n",
-                "from semantic_kernel.data import (\n",
-                "    DistanceFunction,\n",
-                "    IndexKind,\n",
-                "    VectorStoreField,\n",
-                "    vectorstoremodel,\n",
-                ")\n",
-                "from semantic_kernel.functions import KernelParameterMetadata\n",
-                "from semantic_kernel.functions.kernel_arguments import KernelArguments"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Set up your environment\n",
-                "\n",
-                "You'll need to set up your environment to provide connection information to Postgres, as well as OpenAI or Azure OpenAI.\n",
-                "\n",
-                "To do this, copy the `.env.example` file to `.env` and fill in the necessary information.\n",
-                "\n",
-                "__Note__: If you're using VSCode to execute the notebook, the settings in `.env` in the root of the repository will be picked up automatically.\n",
-                "\n",
-                "### Postgres configuration\n",
-                "\n",
-                "You'll need to provide a connection string to a Postgres database. You can use a local Postgres instance, or a cloud-hosted one.\n",
-                "You can provide a connection string, or provide environment variables with the connection information. See the .env.example file for `POSTGRES_` settings.\n",
-                "\n",
-                "#### Using Docker\n",
-                "\n",
-                "You can also use docker to bring up a Postgres instance by following the steps below:\n",
-                "\n",
-                "Create an `init.sql` that has the following:\n",
-                "\n",
-                "```sql\n",
-                "CREATE EXTENSION IF NOT EXISTS vector;\n",
-                "```\n",
-                "\n",
-                "Now you can start a postgres instance with the following:\n",
-                "\n",
-                "```\n",
-                "docker pull pgvector/pgvector:pg16\n",
-                "docker run --rm -it --name pgvector -p 5432:5432 -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -e POSTGRES_PASSWORD=example pgvector/pgvector:pg16\n",
-                "```\n",
-                "\n",
-                "_Note_: Use `.\\init.sql` on Windows and `./init.sql` on WSL or Linux/Mac.\n",
-                "\n",
-                "Then you could use the connection string:\n",
-                "\n",
-                "```\n",
-                "POSTGRES_CONNECTION_STRING=\"host=localhost port=5432 dbname=postgres user=postgres password=example\"\n",
-                "```\n",
-                "\n",
-                "### OpenAI configuration\n",
-                "\n",
-                "You can either use OpenAI or Azure OpenAI APIs. You provide the API key and other configuration in the `.env` file. Set either the `OPENAI_` or `AZURE_OPENAI_` settings.\n"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 2,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Path to the environment file\n",
-                "env_file_path = \".env\""
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Here we set some additional configuration."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 3,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# -- ArXiv settings --\n",
-                "\n",
-                "# The search term to use when searching for papers on arXiv. All metadata fields for the papers are searched.\n",
-                "SEARCH_TERM = \"RAG\"\n",
-                "\n",
-                "# The category of papers to search for on arXiv. See https://arxiv.org/category_taxonomy for a list of categories.\n",
-                "ARVIX_CATEGORY = \"cs.AI\"\n",
-                "\n",
-                "# The maximum number of papers to search for on arXiv.\n",
-                "MAX_RESULTS = 300\n",
-                "\n",
-                "# -- OpenAI settings --\n",
-                "\n",
-                "# Set this flag to False to use the OpenAI API instead of Azure OpenAI\n",
-                "USE_AZURE_OPENAI = True"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Here we define a vector store model. This model defines the table and column names for storing the embeddings. We use the `@vectorstoremodel` decorator to tell Semantic Kernel to create a vector store definition from the model. The VectorStoreRecordField annotations define the fields that will be stored in the database, including key and vector fields."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "@vectorstoremodel\n",
-                "@dataclass\n",
-                "class ArxivPaper:\n",
-                "    id: Annotated[str, VectorStoreField(\"key\")]\n",
-                "    title: Annotated[str, VectorStoreField(\"data\")]\n",
-                "    abstract: Annotated[str, VectorStoreField(\"data\")]\n",
-                "    published: Annotated[datetime, VectorStoreField(\"data\")]\n",
-                "    authors: Annotated[list[str], VectorStoreField(\"data\")]\n",
-                "    link: Annotated[str | None, VectorStoreField(\"data\")]\n",
-                "    abstract_vector: Annotated[\n",
-                "        list[float] | str | None,\n",
-                "        VectorStoreField(\n",
-                "            \"vector\",\n",
-                "            index_kind=IndexKind.HNSW,\n",
-                "            dimensions=1536,\n",
-                "            distance_function=DistanceFunction.COSINE_DISTANCE,\n",
-                "        ),\n",
-                "    ] = None\n",
-                "\n",
-                "    def __post_init__(self):\n",
-                "        if self.abstract_vector is None:\n",
-                "            self.abstract_vector = self.abstract\n",
-                "\n",
-                "    @classmethod\n",
-                "    def from_arxiv_info(cls, arxiv_info: dict[str, Any]) -> \"ArxivPaper\":\n",
-                "        return cls(\n",
-                "            id=arxiv_info[\"id\"],\n",
-                "            title=arxiv_info[\"title\"].replace(\"\\n  \", \" \"),\n",
-                "            abstract=arxiv_info[\"abstract\"].replace(\"\\n  \", \" \"),\n",
-                "            published=arxiv_info[\"published\"],\n",
-                "            authors=arxiv_info[\"authors\"],\n",
-                "            link=arxiv_info[\"link\"],\n",
-                "        )"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Below is a function that queries the ArviX API for the most recent papers based on our search query and category."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 5,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "def query_arxiv(search_query: str, category: str = \"cs.AI\", max_results: int = 10) -> list[dict[str, Any]]:\n",
-                "    \"\"\"\n",
-                "    Query the ArXiv API and return a list of dictionaries with relevant metadata for each paper.\n",
-                "\n",
-                "    Args:\n",
-                "        search_query: The search term or topic to query for.\n",
-                "        category: The category to restrict the search to (default is \"cs.AI\").\n",
-                "        See https://arxiv.org/category_taxonomy for a list of categories.\n",
-                "        max_results: Maximum number of results to retrieve (default is 10).\n",
-                "    \"\"\"\n",
-                "    response = requests.get(\n",
-                "        \"http://export.arxiv.org/api/query?\"\n",
-                "        f\"search_query=all:%22{search_query.replace(' ', '+')}%22\"\n",
-                "        f\"+AND+cat:{category}&start=0&max_results={max_results}&sortBy=lastUpdatedDate&sortOrder=descending\"\n",
-                "    )\n",
-                "\n",
-                "    root = ET.fromstring(response.content)\n",
-                "    ns = {\"atom\": \"http://www.w3.org/2005/Atom\"}\n",
-                "\n",
-                "    return [\n",
-                "        {\n",
-                "            \"id\": entry.find(\"atom:id\", ns).text.split(\"/\")[-1],\n",
-                "            \"title\": entry.find(\"atom:title\", ns).text,\n",
-                "            \"abstract\": entry.find(\"atom:summary\", ns).text,\n",
-                "            \"published\": entry.find(\"atom:published\", ns).text,\n",
-                "            \"link\": entry.find(\"atom:id\", ns).text,\n",
-                "            \"authors\": [author.find(\"atom:name\", ns).text for author in entry.findall(\"atom:author\", ns)],\n",
-                "            \"categories\": [category.get(\"term\") for category in entry.findall(\"atom:category\", ns)],\n",
-                "            \"pdf_link\": next(\n",
-                "                (link_tag.get(\"href\") for link_tag in entry.findall(\"atom:link\", ns) if link_tag.get(\"title\") == \"pdf\"),\n",
-                "                None,\n",
-                "            ),\n",
-                "        }\n",
-                "        for entry in root.findall(\"atom:entry\", ns)\n",
-                "    ]"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "We use this function to query papers and store them in memory as our model types."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 6,
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Found 300 papers on 'RAG'\n"
-                    ]
-                }
-            ],
-            "source": [
-                "arxiv_papers: list[ArxivPaper] = [\n",
-                "    ArxivPaper.from_arxiv_info(paper)\n",
-                "    for paper in query_arxiv(SEARCH_TERM, category=ARVIX_CATEGORY, max_results=MAX_RESULTS)\n",
-                "]\n",
-                "\n",
-                "print(f\"Found {len(arxiv_papers)} papers on '{SEARCH_TERM}'\")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Create a `PostgresCollection`, which represents the table in Postgres where we will store the paper information and embeddings."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "if USE_AZURE_OPENAI:\n",
-                "    text_embedding = AzureTextEmbedding(service_id=\"embedding\", env_file_path=env_file_path)\n",
-                "else:\n",
-                "    text_embedding = OpenAITextEmbedding(service_id=\"embedding\", env_file_path=env_file_path)\n",
-                "collection = PostgresCollection[str, ArxivPaper](\n",
-                "    collection_name=\"arxiv_records\",\n",
-                "    record_type=ArxivPaper,\n",
-                "    env_file_path=env_file_path,\n",
-                "    embedding_generator=text_embedding,\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Now that the models have embeddings, we can write them into the Postgres database."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "async with collection:\n",
-                "    await collection.ensure_collection_exists()\n",
-                "    keys = await collection.upsert(arxiv_papers)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Here we retrieve the first few models from the database and print out their information."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "# Engineering LLM Powered Multi-agent Framework for Autonomous CloudOps\n",
-                        "\n",
-                        "Abstract:   Cloud Operations (CloudOps) is a rapidly growing field focused on the\n",
-                        "automated management and optimization of cloud infrastructure which is essential\n",
-                        "for organizations navigating increasingly complex cloud environments. MontyCloud\n",
-                        "Inc. is one of the major companies in the CloudOps domain that leverages\n",
-                        "autonomous bots to manage cloud compliance, security, and continuous operations.\n",
-                        "To make the platform more accessible and effective to the customers, we\n",
-                        "leveraged the use of GenAI. Developing a GenAI-based solution for autonomous\n",
-                        "CloudOps for the existing MontyCloud system presented us with various challenges\n",
-                        "such as i) diverse data sources; ii) orchestration of multiple processes; and\n",
-                        "iii) handling complex workflows to automate routine tasks. To this end, we\n",
-                        "developed MOYA, a multi-agent framework that leverages GenAI and balances\n",
-                        "autonomy with the necessary human control. This framework integrates various\n",
-                        "internal and external systems and is optimized for factors like task\n",
-                        "orchestration, security, and error mitigation while producing accurate,\n",
-                        "reliable, and relevant insights by utilizing Retrieval Augmented Generation\n",
-                        "(RAG). Evaluations of our multi-agent system with the help of practitioners as\n",
-                        "well as using automated checks demonstrate enhanced accuracy, responsiveness,\n",
-                        "and effectiveness over non-agentic approaches across complex workflows.\n",
-                        "Published: 2025-01-14 16:30:10\n",
-                        "Link: http://arxiv.org/abs/2501.08243v1\n",
-                        "PDF Link: http://arxiv.org/abs/2501.08243v1\n",
-                        "Authors: Kannan Parthasarathy, Karthik Vaidhyanathan, Rudra Dhar, Venkat Krishnamachari, Basil Muhammed, Adyansh Kakran, Sreemaee Akshathala, Shrikara Arun, Sumant Dubey, Mohan Veerubhotla, Amey Karan\n",
-                        "Embedding: [ 0.01063822  0.02977918  0.04532182 ... -0.00264323  0.00081101\n",
-                        "  0.01491571]\n",
-                        "\n",
-                        "\n",
-                        "# Eliciting In-context Retrieval and Reasoning for Long-context Large Language Models\n",
-                        "\n",
-                        "Abstract:   Recent advancements in long-context language models (LCLMs) promise to\n",
-                        "transform Retrieval-Augmented Generation (RAG) by simplifying pipelines. With\n",
-                        "their expanded context windows, LCLMs can process entire knowledge bases and\n",
-                        "perform retrieval and reasoning directly -- a capability we define as In-Context\n",
-                        "Retrieval and Reasoning (ICR^2). However, existing benchmarks like LOFT often\n",
-                        "overestimate LCLM performance by providing overly simplified contexts. To\n",
-                        "address this, we introduce ICR^2, a benchmark that evaluates LCLMs in more\n",
-                        "realistic scenarios by including confounding passages retrieved with strong\n",
-                        "retrievers. We then propose three methods to enhance LCLM performance: (1)\n",
-                        "retrieve-then-generate fine-tuning, (2) retrieval-attention-probing, which uses\n",
-                        "attention heads to filter and de-noise long contexts during decoding, and (3)\n",
-                        "joint retrieval head training alongside the generation head. Our evaluation of\n",
-                        "five well-known LCLMs on LOFT and ICR^2 demonstrates significant gains with our\n",
-                        "best approach applied to Mistral-7B: +17 and +15 points by Exact Match on LOFT,\n",
-                        "and +13 and +2 points on ICR^2, compared to vanilla RAG and supervised fine-\n",
-                        "tuning, respectively. It even outperforms GPT-4-Turbo on most tasks despite\n",
-                        "being a much smaller model.\n",
-                        "Published: 2025-01-14 16:38:33\n",
-                        "Link: http://arxiv.org/abs/2501.08248v1\n",
-                        "PDF Link: http://arxiv.org/abs/2501.08248v1\n",
-                        "Authors: Yifu Qiu, Varun Embar, Yizhe Zhang, Navdeep Jaitly, Shay B. Cohen, Benjamin Han\n",
-                        "Embedding: [-0.01305697  0.01166064  0.06267344 ... -0.01627254  0.00974741\n",
-                        " -0.00573298]\n",
-                        "\n",
-                        "\n",
-                        "# ADAM-1: AI and Bioinformatics for Alzheimer's Detection and Microbiome-Clinical Data Integrations\n",
-                        "\n",
-                        "Abstract:   The Alzheimer's Disease Analysis Model Generation 1 (ADAM) is a multi-agent\n",
-                        "large language model (LLM) framework designed to integrate and analyze multi-\n",
-                        "modal data, including microbiome profiles, clinical datasets, and external\n",
-                        "knowledge bases, to enhance the understanding and detection of Alzheimer's\n",
-                        "disease (AD). By leveraging retrieval-augmented generation (RAG) techniques\n",
-                        "along with its multi-agent architecture, ADAM-1 synthesizes insights from\n",
-                        "diverse data sources and contextualizes findings using literature-driven\n",
-                        "evidence. Comparative evaluation against XGBoost revealed similar mean F1 scores\n",
-                        "but significantly reduced variance for ADAM-1, highlighting its robustness and\n",
-                        "consistency, particularly in small laboratory datasets. While currently tailored\n",
-                        "for binary classification tasks, future iterations aim to incorporate additional\n",
-                        "data modalities, such as neuroimaging and biomarkers, to broaden the scalability\n",
-                        "and applicability for Alzheimer's research and diagnostics.\n",
-                        "Published: 2025-01-14 18:56:33\n",
-                        "Link: http://arxiv.org/abs/2501.08324v1\n",
-                        "PDF Link: http://arxiv.org/abs/2501.08324v1\n",
-                        "Authors: Ziyuan Huang, Vishaldeep Kaur Sekhon, Ouyang Guo, Mark Newman, Roozbeh Sadeghian, Maria L. Vaida, Cynthia Jo, Doyle Ward, Vanni Bucci, John P. Haran\n",
-                        "Embedding: [ 0.03896349  0.00422515  0.05525447 ...  0.03374933 -0.01468264\n",
-                        "  0.01850895]\n",
-                        "\n",
-                        "\n"
-                    ]
-                }
-            ],
-            "source": [
-                "async with collection:\n",
-                "    results = await collection.get(keys[:3])\n",
-                "    if results:\n",
-                "        for result in results:\n",
-                "            print(f\"# {result.title}\")\n",
-                "            print()\n",
-                "            wrapped_abstract = textwrap.fill(result.abstract, width=80)\n",
-                "            print(f\"Abstract: {wrapped_abstract}\")\n",
-                "            print(f\"Published: {result.published}\")\n",
-                "            print(f\"Link: {result.link}\")\n",
-                "            print(f\"PDF Link: {result.link}\")\n",
-                "            print(f\"Authors: {', '.join(result.authors)}\")\n",
-                "            print(f\"Embedding: {result.abstract_vector}\")\n",
-                "            print()\n",
-                "            print()"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "The `VectorStoreTextSearch` object gives us the ability to retrieve semantically similar documents directly from a prompt.\n",
-                "Here we search for the top 5 ArXiV abstracts in our database similar to the query about chunking strategies in RAG applications:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Found 5 results for query.\n",
-                        "Advanced ingestion process powered by LLM parsing for RAG system: 0.38676463602221456\n",
-                        "StructRAG: Boosting Knowledge Intensive Reasoning of LLMs via Inference-time Hybrid Information Structurization: 0.39733734194342085\n",
-                        "UDA: A Benchmark Suite for Retrieval Augmented Generation in Real-world Document Analysis: 0.3981809737466562\n",
-                        "R^2AG: Incorporating Retrieval Information into Retrieval Augmented Generation: 0.4134050114864055\n",
-                        "Enhancing Retrieval-Augmented Generation: A Study of Best Practices: 0.4144733752075731\n"
-                    ]
-                }
-            ],
-            "source": [
-                "query = \"What are good chunking strategies to use for unstructured text in Retrieval-Augmented Generation applications?\"\n",
-                "\n",
-                "async with collection:\n",
-                "    search_results = await collection.search(query, top=5, include_total_count=True)\n",
-                "    print(f\"Found {search_results.total_count} results for query.\")\n",
-                "    async for search_result in search_results.results:\n",
-                "        title = search_result.record.title\n",
-                "        score = search_result.score\n",
-                "        print(f\"{title}: {score}\")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "We can enable chat completion to utilize the text search by creating a kernel function for searching the database..."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "kernel = Kernel()\n",
-                "plugin = kernel.add_functions(\n",
-                "    plugin_name=\"arxiv_plugin\",\n",
-                "    functions=[\n",
-                "        collection.create_search_function(\n",
-                "            # The default parameters match the parameters of the VectorSearchOptions class.\n",
-                "            description=\"Searches for ArXiv papers that are related to the query.\",\n",
-                "            parameters=[\n",
-                "                KernelParameterMetadata(\n",
-                "                    name=\"query\", description=\"What to search for.\", type=\"str\", is_required=True, type_object=str\n",
-                "                ),\n",
-                "                KernelParameterMetadata(\n",
-                "                    name=\"top\",\n",
-                "                    description=\"Number of results to return.\",\n",
-                "                    type=\"int\",\n",
-                "                    default_value=2,\n",
-                "                    type_object=int,\n",
-                "                ),\n",
-                "            ],\n",
-                "        ),\n",
-                "    ],\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "...and then setting up a chat completions service that uses `FunctionChoiceBehavior.Auto` to automatically call the search function when appropriate to the users query. We also create the chat function that will be invoked by the kernel."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 15,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Create the chat completion service. This requires an Azure OpenAI completions model deployment and configuration.\n",
-                "chat_completion = AzureChatCompletion(service_id=\"completions\")\n",
-                "kernel.add_service(chat_completion)\n",
-                "\n",
-                "# Now we create the chat function that will use the chat service.\n",
-                "chat_function = kernel.add_function(\n",
-                "    prompt=\"{{$chat_history}}{{$user_input}}\",\n",
-                "    plugin_name=\"ChatBot\",\n",
-                "    function_name=\"Chat\",\n",
-                ")\n",
-                "\n",
-                "# we set the function choice to Auto, so that the LLM can choose the correct function to call.\n",
-                "# and we exclude the ChatBot plugin, so that it does not call itself.\n",
-                "execution_settings = AzureChatPromptExecutionSettings(\n",
-                "    function_choice_behavior=FunctionChoiceBehavior.Auto(filters={\"excluded_plugins\": [\"ChatBot\"]}),\n",
-                "    service_id=\"chat\",\n",
-                "    max_tokens=7000,\n",
-                "    temperature=0.7,\n",
-                "    top_p=0.8,\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Here we create a chat history with a system message and some initial context:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 16,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "history = ChatHistory()\n",
-                "system_message = \"\"\"\n",
-                "You are a chat bot. Your name is Archie and\n",
-                "you have one goal: help people find answers\n",
-                "to technical questions by relying on the latest\n",
-                "research papers published on ArXiv.\n",
-                "You communicate effectively in the style of a helpful librarian. \n",
-                "You always make sure to include the\n",
-                "ArXiV paper references in your responses.\n",
-                "If you cannot find the answer in the papers,\n",
-                "you will let the user know, but also provide the papers\n",
-                "you did find to be most relevant. If the abstract of the \n",
-                "paper does not specifically reference the user's inquiry,\n",
-                "but you believe it might be relevant, you can still include it\n",
-                "BUT you must make sure to mention that the paper might not directly\n",
-                "address the user's inquiry. Make certain that the papers you link are\n",
-                "from a specific search result.\n",
-                "\"\"\"\n",
-                "history.add_system_message(system_message)\n",
-                "history.add_user_message(\"Hi there, who are you?\")\n",
-                "history.add_assistant_message(\n",
-                "    \"I am Archie, the ArXiV chat bot. I'm here to help you find the latest research papers from ArXiv that relate to your inquiries.\"\n",
-                ")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "We can now invoke the chat function via the Kernel to get chat completions:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 17,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "arguments = KernelArguments(\n",
-                "    user_input=query,\n",
-                "    chat_history=history,\n",
-                "    settings=execution_settings,\n",
-                ")\n",
-                "\n",
-                "result = await kernel.invoke(chat_function, arguments=arguments)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Printing the result shows that the chat completion service used our text search to locate relevant ArXiV papers based on the query:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 18,
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Archie:>\n",
-                        "What an excellent and timely question! Chunking strategies for unstructured text are\n",
-                        "critical for optimizing Retrieval-Augmented Generation (RAG) systems since they\n",
-                        "significantly affect how effectively a RAG model can retrieve and generate contextually\n",
-                        "relevant information. Let me consult the latest papers on this topic from ArXiv and\n",
-                        "provide you with relevant insights.\n",
-                        "---\n",
-                        "Here are some recent papers that dive into chunking strategies or similar concepts for\n",
-                        "retrieval-augmented frameworks:\n",
-                        "1. **\"Post-training optimization of retrieval-augmented generation models\"**\n",
-                        "   *Authors*: Vibhor Agarwal et al.\n",
-                        "   *Abstract*: While the paper discusses optimization strategies for retrieval-augmented\n",
-                        "generation models, there is a discussion on handling unstructured text that could apply to\n",
-                        "chunking methodologies. Chunking isn't always explicitly mentioned as \"chunking\" but may\n",
-                        "be referred to in contexts like splitting data for retrieval.\n",
-                        "   *ArXiv link*: [arXiv:2308.10701](https://arxiv.org/abs/2308.10701)\n",
-                        "   *Note*: This paper may not focus entirely on chunking strategies but might discuss\n",
-                        "relevant downstream considerations. It could still provide a foundation for you to explore\n",
-                        "how chunking integrates with retrievers.\n",
-                        "2. **\"Beyond Text: Retrieval-Augmented Reranking for Open-Domain Tasks\"**\n",
-                        "   *Authors*: Younggyo Seo et al.\n",
-                        "   *Abstract*: Although primarily focused on retrieval augmentation for reranking, there\n",
-                        "are reflections on how document structure impacts task performance. Chunking unstructured\n",
-                        "text to improve retrievability for such tasks could indirectly relate to this work.\n",
-                        "   *ArXiv link*: [arXiv:2310.03714](https://arxiv.org/abs/2310.03714)\n",
-                        "3. **\"ALMA: Alignment of Generative and Retrieval Models for Long Documents\"**\n",
-                        "   *Authors*: Yao Fu et al.\n",
-                        "   *Abstract excerpt*: \"Our approach is designed to handle retrieval and generation for\n",
-                        "long documents by aligning the retrieval and generation models more effectively.\"\n",
-                        "Strategies to divide and process long documents into smaller chunks for efficient\n",
-                        "alignment are explicitly discussed. A focus on handling unstructured long-form content\n",
-                        "makes this paper highly relevant.\n",
-                        "   *ArXiv link*: [arXiv:2308.05467](https://arxiv.org/abs/2308.05467)\n",
-                        "4. **\"Enhancing Context-aware Question Generation with Multi-modal Knowledge\"**\n",
-                        "   *Authors*: Jialong Han et al.\n",
-                        "   *Abstract excerpt*: \"Proposed techniques focus on improving retrievals through better\n",
-                        "division of available knowledge.\" It doesn’t focus solely on text chunking in the RAG\n",
-                        "framework but might be interesting since contextual awareness often relates to\n",
-                        "preprocessing unstructured input into structured chunks.\n",
-                        "   *ArXiv link*: [arXiv:2307.12345](https://arxiv.org/abs/2307.12345)\n",
-                        "---\n",
-                        "### Practical Approaches Discussed in Literature:\n",
-                        "From my broad understanding of RAG systems and some of the details in these papers, here\n",
-                        "are common chunking strategies discussed in the research community:\n",
-                        "1. **Sliding Window Approach**: Divide the text into overlapping chunks of fixed lengths\n",
-                        "(e.g., 512 tokens with an overlap of 128 tokens). This helps ensure no important context\n",
-                        "is left behind when chunks are created.\n",
-                        "\n",
-                        "2. **Semantic Chunking**: Use sentence embeddings or clustering techniques (e.g., via Bi-\n",
-                        "Encoders or Sentence Transformers) to ensure chunks align semantically rather than naively\n",
-                        "by token count.\n",
-                        "3. **Dynamic Partitioning**: Implement chunking based on higher-order structure in the\n",
-                        "text, such as splitting at sentence boundaries, paragraph breaks, or logical sections.\n",
-                        "4. **Content-aware Chunking**: Experiment with LLMs to pre-identify contextual relevance\n",
-                        "of different parts of the text and chunk accordingly.\n",
-                        "---\n",
-                        "If you'd like, I can search more specifically on a sub-part of chunking strategies or\n",
-                        "related RAG optimizations. Let me know!\n"
-                    ]
-                }
-            ],
-            "source": [
-                "def wrap_text(text, width=90):\n",
-                "    paragraphs = text.split(\"\\n\\n\")  # Split the text into paragraphs\n",
-                "    wrapped_paragraphs = [\n",
-                "        \"\\n\".join(textwrap.fill(part, width=width) for paragraph in paragraphs for part in paragraph.split(\"\\n\"))\n",
-                "    ]  # Wrap each paragraph, split by newlines\n",
-                "    return \"\\n\\n\".join(wrapped_paragraphs)  # Join the wrapped paragraphs back together\n",
-                "\n",
-                "\n",
-                "print(f\"Archie:>\\n{wrap_text(str(result))}\")"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": []
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": ".venv",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.10.15"
-        }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 2
-}
\ No newline at end of file
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Using Postgres as memory\n",
+    "\n",
+    "This notebook shows how to use Postgres as a memory store in Semantic Kernel.\n",
+    "\n",
+    "The code below pulls the most recent papers from [ArviX](https://arxiv.org/), creates embeddings from the paper abstracts, and stores them in a Postgres database.\n",
+    "\n",
+    "In the future, we can use the Postgres vector store to search the database for similar papers based on the embeddings - stay tuned!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import textwrap\n",
+    "import xml.etree.ElementTree as ET\n",
+    "from dataclasses import dataclass\n",
+    "from datetime import datetime\n",
+    "from typing import Annotated, Any\n",
+    "\n",
+    "import requests\n",
+    "\n",
+    "from semantic_kernel import Kernel\n",
+    "from semantic_kernel.connectors.ai import FunctionChoiceBehavior\n",
+    "from semantic_kernel.connectors.ai.open_ai import (\n",
+    "    AzureChatCompletion,\n",
+    "    AzureChatPromptExecutionSettings,\n",
+    "    AzureTextEmbedding,\n",
+    "    OpenAITextEmbedding,\n",
+    ")\n",
+    "from semantic_kernel.connectors.memory.postgres import PostgresCollection\n",
+    "from semantic_kernel.contents import ChatHistory\n",
+    "from semantic_kernel.data.vectors import (\n",
+    "    DistanceFunction,\n",
+    "    IndexKind,\n",
+    "    VectorStoreField,\n",
+    "    vectorstoremodel,\n",
+    ")\n",
+    "from semantic_kernel.functions import KernelParameterMetadata\n",
+    "from semantic_kernel.functions.kernel_arguments import KernelArguments"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set up your environment\n",
+    "\n",
+    "You'll need to set up your environment to provide connection information to Postgres, as well as OpenAI or Azure OpenAI.\n",
+    "\n",
+    "To do this, copy the `.env.example` file to `.env` and fill in the necessary information.\n",
+    "\n",
+    "__Note__: If you're using VSCode to execute the notebook, the settings in `.env` in the root of the repository will be picked up automatically.\n",
+    "\n",
+    "### Postgres configuration\n",
+    "\n",
+    "You'll need to provide a connection string to a Postgres database. You can use a local Postgres instance, or a cloud-hosted one.\n",
+    "You can provide a connection string, or provide environment variables with the connection information. See the .env.example file for `POSTGRES_` settings.\n",
+    "\n",
+    "#### Using Docker\n",
+    "\n",
+    "You can also use docker to bring up a Postgres instance by following the steps below:\n",
+    "\n",
+    "Create an `init.sql` that has the following:\n",
+    "\n",
+    "```sql\n",
+    "CREATE EXTENSION IF NOT EXISTS vector;\n",
+    "```\n",
+    "\n",
+    "Now you can start a postgres instance with the following:\n",
+    "\n",
+    "```\n",
+    "docker pull pgvector/pgvector:pg16\n",
+    "docker run --rm -it --name pgvector -p 5432:5432 -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -e POSTGRES_PASSWORD=example pgvector/pgvector:pg16\n",
+    "```\n",
+    "\n",
+    "_Note_: Use `.\\init.sql` on Windows and `./init.sql` on WSL or Linux/Mac.\n",
+    "\n",
+    "Then you could use the connection string:\n",
+    "\n",
+    "```\n",
+    "POSTGRES_CONNECTION_STRING=\"host=localhost port=5432 dbname=postgres user=postgres password=example\"\n",
+    "```\n",
+    "\n",
+    "### OpenAI configuration\n",
+    "\n",
+    "You can either use OpenAI or Azure OpenAI APIs. You provide the API key and other configuration in the `.env` file. Set either the `OPENAI_` or `AZURE_OPENAI_` settings.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Path to the environment file\n",
+    "env_file_path = \".env\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we set some additional configuration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# -- ArXiv settings --\n",
+    "\n",
+    "# The search term to use when searching for papers on arXiv. All metadata fields for the papers are searched.\n",
+    "SEARCH_TERM = \"RAG\"\n",
+    "\n",
+    "# The category of papers to search for on arXiv. See https://arxiv.org/category_taxonomy for a list of categories.\n",
+    "ARVIX_CATEGORY = \"cs.AI\"\n",
+    "\n",
+    "# The maximum number of papers to search for on arXiv.\n",
+    "MAX_RESULTS = 300\n",
+    "\n",
+    "# -- OpenAI settings --\n",
+    "\n",
+    "# Set this flag to False to use the OpenAI API instead of Azure OpenAI\n",
+    "USE_AZURE_OPENAI = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we define a vector store model. This model defines the table and column names for storing the embeddings. We use the `@vectorstoremodel` decorator to tell Semantic Kernel to create a vector store definition from the model. The VectorStoreRecordField annotations define the fields that will be stored in the database, including key and vector fields."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@vectorstoremodel\n",
+    "@dataclass\n",
+    "class ArxivPaper:\n",
+    "    id: Annotated[str, VectorStoreField(\"key\")]\n",
+    "    title: Annotated[str, VectorStoreField(\"data\")]\n",
+    "    abstract: Annotated[str, VectorStoreField(\"data\")]\n",
+    "    published: Annotated[datetime, VectorStoreField(\"data\")]\n",
+    "    authors: Annotated[list[str], VectorStoreField(\"data\")]\n",
+    "    link: Annotated[str | None, VectorStoreField(\"data\")]\n",
+    "    abstract_vector: Annotated[\n",
+    "        list[float] | str | None,\n",
+    "        VectorStoreField(\n",
+    "            \"vector\",\n",
+    "            index_kind=IndexKind.HNSW,\n",
+    "            dimensions=1536,\n",
+    "            distance_function=DistanceFunction.COSINE_DISTANCE,\n",
+    "        ),\n",
+    "    ] = None\n",
+    "\n",
+    "    def __post_init__(self):\n",
+    "        if self.abstract_vector is None:\n",
+    "            self.abstract_vector = self.abstract\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_arxiv_info(cls, arxiv_info: dict[str, Any]) -> \"ArxivPaper\":\n",
+    "        return cls(\n",
+    "            id=arxiv_info[\"id\"],\n",
+    "            title=arxiv_info[\"title\"].replace(\"\\n  \", \" \"),\n",
+    "            abstract=arxiv_info[\"abstract\"].replace(\"\\n  \", \" \"),\n",
+    "            published=arxiv_info[\"published\"],\n",
+    "            authors=arxiv_info[\"authors\"],\n",
+    "            link=arxiv_info[\"link\"],\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is a function that queries the ArviX API for the most recent papers based on our search query and category."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def query_arxiv(search_query: str, category: str = \"cs.AI\", max_results: int = 10) -> list[dict[str, Any]]:\n",
+    "    \"\"\"\n",
+    "    Query the ArXiv API and return a list of dictionaries with relevant metadata for each paper.\n",
+    "\n",
+    "    Args:\n",
+    "        search_query: The search term or topic to query for.\n",
+    "        category: The category to restrict the search to (default is \"cs.AI\").\n",
+    "        See https://arxiv.org/category_taxonomy for a list of categories.\n",
+    "        max_results: Maximum number of results to retrieve (default is 10).\n",
+    "    \"\"\"\n",
+    "    response = requests.get(\n",
+    "        \"http://export.arxiv.org/api/query?\"\n",
+    "        f\"search_query=all:%22{search_query.replace(' ', '+')}%22\"\n",
+    "        f\"+AND+cat:{category}&start=0&max_results={max_results}&sortBy=lastUpdatedDate&sortOrder=descending\"\n",
+    "    )\n",
+    "\n",
+    "    root = ET.fromstring(response.content)\n",
+    "    ns = {\"atom\": \"http://www.w3.org/2005/Atom\"}\n",
+    "\n",
+    "    return [\n",
+    "        {\n",
+    "            \"id\": entry.find(\"atom:id\", ns).text.split(\"/\")[-1],\n",
+    "            \"title\": entry.find(\"atom:title\", ns).text,\n",
+    "            \"abstract\": entry.find(\"atom:summary\", ns).text,\n",
+    "            \"published\": entry.find(\"atom:published\", ns).text,\n",
+    "            \"link\": entry.find(\"atom:id\", ns).text,\n",
+    "            \"authors\": [author.find(\"atom:name\", ns).text for author in entry.findall(\"atom:author\", ns)],\n",
+    "            \"categories\": [category.get(\"term\") for category in entry.findall(\"atom:category\", ns)],\n",
+    "            \"pdf_link\": next(\n",
+    "                (link_tag.get(\"href\") for link_tag in entry.findall(\"atom:link\", ns) if link_tag.get(\"title\") == \"pdf\"),\n",
+    "                None,\n",
+    "            ),\n",
+    "        }\n",
+    "        for entry in root.findall(\"atom:entry\", ns)\n",
+    "    ]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use this function to query papers and store them in memory as our model types."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 300 papers on 'RAG'\n"
+     ]
+    }
+   ],
+   "source": [
+    "arxiv_papers: list[ArxivPaper] = [\n",
+    "    ArxivPaper.from_arxiv_info(paper)\n",
+    "    for paper in query_arxiv(SEARCH_TERM, category=ARVIX_CATEGORY, max_results=MAX_RESULTS)\n",
+    "]\n",
+    "\n",
+    "print(f\"Found {len(arxiv_papers)} papers on '{SEARCH_TERM}'\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a `PostgresCollection`, which represents the table in Postgres where we will store the paper information and embeddings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if USE_AZURE_OPENAI:\n",
+    "    text_embedding = AzureTextEmbedding(service_id=\"embedding\", env_file_path=env_file_path)\n",
+    "else:\n",
+    "    text_embedding = OpenAITextEmbedding(service_id=\"embedding\", env_file_path=env_file_path)\n",
+    "collection = PostgresCollection[str, ArxivPaper](\n",
+    "    collection_name=\"arxiv_records\",\n",
+    "    record_type=ArxivPaper,\n",
+    "    env_file_path=env_file_path,\n",
+    "    embedding_generator=text_embedding,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that the models have embeddings, we can write them into the Postgres database."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async with collection:\n",
+    "    await collection.ensure_collection_exists()\n",
+    "    keys = await collection.upsert(arxiv_papers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we retrieve the first few models from the database and print out their information."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "# Engineering LLM Powered Multi-agent Framework for Autonomous CloudOps\n",
+      "\n",
+      "Abstract:   Cloud Operations (CloudOps) is a rapidly growing field focused on the\n",
+      "automated management and optimization of cloud infrastructure which is essential\n",
+      "for organizations navigating increasingly complex cloud environments. MontyCloud\n",
+      "Inc. is one of the major companies in the CloudOps domain that leverages\n",
+      "autonomous bots to manage cloud compliance, security, and continuous operations.\n",
+      "To make the platform more accessible and effective to the customers, we\n",
+      "leveraged the use of GenAI. Developing a GenAI-based solution for autonomous\n",
+      "CloudOps for the existing MontyCloud system presented us with various challenges\n",
+      "such as i) diverse data sources; ii) orchestration of multiple processes; and\n",
+      "iii) handling complex workflows to automate routine tasks. To this end, we\n",
+      "developed MOYA, a multi-agent framework that leverages GenAI and balances\n",
+      "autonomy with the necessary human control. This framework integrates various\n",
+      "internal and external systems and is optimized for factors like task\n",
+      "orchestration, security, and error mitigation while producing accurate,\n",
+      "reliable, and relevant insights by utilizing Retrieval Augmented Generation\n",
+      "(RAG). Evaluations of our multi-agent system with the help of practitioners as\n",
+      "well as using automated checks demonstrate enhanced accuracy, responsiveness,\n",
+      "and effectiveness over non-agentic approaches across complex workflows.\n",
+      "Published: 2025-01-14 16:30:10\n",
+      "Link: http://arxiv.org/abs/2501.08243v1\n",
+      "PDF Link: http://arxiv.org/abs/2501.08243v1\n",
+      "Authors: Kannan Parthasarathy, Karthik Vaidhyanathan, Rudra Dhar, Venkat Krishnamachari, Basil Muhammed, Adyansh Kakran, Sreemaee Akshathala, Shrikara Arun, Sumant Dubey, Mohan Veerubhotla, Amey Karan\n",
+      "Embedding: [ 0.01063822  0.02977918  0.04532182 ... -0.00264323  0.00081101\n",
+      "  0.01491571]\n",
+      "\n",
+      "\n",
+      "# Eliciting In-context Retrieval and Reasoning for Long-context Large Language Models\n",
+      "\n",
+      "Abstract:   Recent advancements in long-context language models (LCLMs) promise to\n",
+      "transform Retrieval-Augmented Generation (RAG) by simplifying pipelines. With\n",
+      "their expanded context windows, LCLMs can process entire knowledge bases and\n",
+      "perform retrieval and reasoning directly -- a capability we define as In-Context\n",
+      "Retrieval and Reasoning (ICR^2). However, existing benchmarks like LOFT often\n",
+      "overestimate LCLM performance by providing overly simplified contexts. To\n",
+      "address this, we introduce ICR^2, a benchmark that evaluates LCLMs in more\n",
+      "realistic scenarios by including confounding passages retrieved with strong\n",
+      "retrievers. We then propose three methods to enhance LCLM performance: (1)\n",
+      "retrieve-then-generate fine-tuning, (2) retrieval-attention-probing, which uses\n",
+      "attention heads to filter and de-noise long contexts during decoding, and (3)\n",
+      "joint retrieval head training alongside the generation head. Our evaluation of\n",
+      "five well-known LCLMs on LOFT and ICR^2 demonstrates significant gains with our\n",
+      "best approach applied to Mistral-7B: +17 and +15 points by Exact Match on LOFT,\n",
+      "and +13 and +2 points on ICR^2, compared to vanilla RAG and supervised fine-\n",
+      "tuning, respectively. It even outperforms GPT-4-Turbo on most tasks despite\n",
+      "being a much smaller model.\n",
+      "Published: 2025-01-14 16:38:33\n",
+      "Link: http://arxiv.org/abs/2501.08248v1\n",
+      "PDF Link: http://arxiv.org/abs/2501.08248v1\n",
+      "Authors: Yifu Qiu, Varun Embar, Yizhe Zhang, Navdeep Jaitly, Shay B. Cohen, Benjamin Han\n",
+      "Embedding: [-0.01305697  0.01166064  0.06267344 ... -0.01627254  0.00974741\n",
+      " -0.00573298]\n",
+      "\n",
+      "\n",
+      "# ADAM-1: AI and Bioinformatics for Alzheimer's Detection and Microbiome-Clinical Data Integrations\n",
+      "\n",
+      "Abstract:   The Alzheimer's Disease Analysis Model Generation 1 (ADAM) is a multi-agent\n",
+      "large language model (LLM) framework designed to integrate and analyze multi-\n",
+      "modal data, including microbiome profiles, clinical datasets, and external\n",
+      "knowledge bases, to enhance the understanding and detection of Alzheimer's\n",
+      "disease (AD). By leveraging retrieval-augmented generation (RAG) techniques\n",
+      "along with its multi-agent architecture, ADAM-1 synthesizes insights from\n",
+      "diverse data sources and contextualizes findings using literature-driven\n",
+      "evidence. Comparative evaluation against XGBoost revealed similar mean F1 scores\n",
+      "but significantly reduced variance for ADAM-1, highlighting its robustness and\n",
+      "consistency, particularly in small laboratory datasets. While currently tailored\n",
+      "for binary classification tasks, future iterations aim to incorporate additional\n",
+      "data modalities, such as neuroimaging and biomarkers, to broaden the scalability\n",
+      "and applicability for Alzheimer's research and diagnostics.\n",
+      "Published: 2025-01-14 18:56:33\n",
+      "Link: http://arxiv.org/abs/2501.08324v1\n",
+      "PDF Link: http://arxiv.org/abs/2501.08324v1\n",
+      "Authors: Ziyuan Huang, Vishaldeep Kaur Sekhon, Ouyang Guo, Mark Newman, Roozbeh Sadeghian, Maria L. Vaida, Cynthia Jo, Doyle Ward, Vanni Bucci, John P. Haran\n",
+      "Embedding: [ 0.03896349  0.00422515  0.05525447 ...  0.03374933 -0.01468264\n",
+      "  0.01850895]\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "async with collection:\n",
+    "    results = await collection.get(keys[:3])\n",
+    "    if results:\n",
+    "        for result in results:\n",
+    "            print(f\"# {result.title}\")\n",
+    "            print()\n",
+    "            wrapped_abstract = textwrap.fill(result.abstract, width=80)\n",
+    "            print(f\"Abstract: {wrapped_abstract}\")\n",
+    "            print(f\"Published: {result.published}\")\n",
+    "            print(f\"Link: {result.link}\")\n",
+    "            print(f\"PDF Link: {result.link}\")\n",
+    "            print(f\"Authors: {', '.join(result.authors)}\")\n",
+    "            print(f\"Embedding: {result.abstract_vector}\")\n",
+    "            print()\n",
+    "            print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `VectorStoreTextSearch` object gives us the ability to retrieve semantically similar documents directly from a prompt.\n",
+    "Here we search for the top 5 ArXiV abstracts in our database similar to the query about chunking strategies in RAG applications:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 5 results for query.\n",
+      "Advanced ingestion process powered by LLM parsing for RAG system: 0.38676463602221456\n",
+      "StructRAG: Boosting Knowledge Intensive Reasoning of LLMs via Inference-time Hybrid Information Structurization: 0.39733734194342085\n",
+      "UDA: A Benchmark Suite for Retrieval Augmented Generation in Real-world Document Analysis: 0.3981809737466562\n",
+      "R^2AG: Incorporating Retrieval Information into Retrieval Augmented Generation: 0.4134050114864055\n",
+      "Enhancing Retrieval-Augmented Generation: A Study of Best Practices: 0.4144733752075731\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"What are good chunking strategies to use for unstructured text in Retrieval-Augmented Generation applications?\"\n",
+    "\n",
+    "async with collection:\n",
+    "    search_results = await collection.search(query, top=5, include_total_count=True)\n",
+    "    print(f\"Found {search_results.total_count} results for query.\")\n",
+    "    async for search_result in search_results.results:\n",
+    "        title = search_result.record.title\n",
+    "        score = search_result.score\n",
+    "        print(f\"{title}: {score}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can enable chat completion to utilize the text search by creating a kernel function for searching the database..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kernel = Kernel()\n",
+    "plugin = kernel.add_functions(\n",
+    "    plugin_name=\"arxiv_plugin\",\n",
+    "    functions=[\n",
+    "        collection.create_search_function(\n",
+    "            # The default parameters match the parameters of the VectorSearchOptions class.\n",
+    "            description=\"Searches for ArXiv papers that are related to the query.\",\n",
+    "            parameters=[\n",
+    "                KernelParameterMetadata(\n",
+    "                    name=\"query\", description=\"What to search for.\", type=\"str\", is_required=True, type_object=str\n",
+    "                ),\n",
+    "                KernelParameterMetadata(\n",
+    "                    name=\"top\",\n",
+    "                    description=\"Number of results to return.\",\n",
+    "                    type=\"int\",\n",
+    "                    default_value=2,\n",
+    "                    type_object=int,\n",
+    "                ),\n",
+    "            ],\n",
+    "        ),\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "...and then setting up a chat completions service that uses `FunctionChoiceBehavior.Auto` to automatically call the search function when appropriate to the users query. We also create the chat function that will be invoked by the kernel."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the chat completion service. This requires an Azure OpenAI completions model deployment and configuration.\n",
+    "chat_completion = AzureChatCompletion(service_id=\"completions\")\n",
+    "kernel.add_service(chat_completion)\n",
+    "\n",
+    "# Now we create the chat function that will use the chat service.\n",
+    "chat_function = kernel.add_function(\n",
+    "    prompt=\"{{$chat_history}}{{$user_input}}\",\n",
+    "    plugin_name=\"ChatBot\",\n",
+    "    function_name=\"Chat\",\n",
+    ")\n",
+    "\n",
+    "# we set the function choice to Auto, so that the LLM can choose the correct function to call.\n",
+    "# and we exclude the ChatBot plugin, so that it does not call itself.\n",
+    "execution_settings = AzureChatPromptExecutionSettings(\n",
+    "    function_choice_behavior=FunctionChoiceBehavior.Auto(filters={\"excluded_plugins\": [\"ChatBot\"]}),\n",
+    "    service_id=\"chat\",\n",
+    "    max_tokens=7000,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.8,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we create a chat history with a system message and some initial context:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history = ChatHistory()\n",
+    "system_message = \"\"\"\n",
+    "You are a chat bot. Your name is Archie and\n",
+    "you have one goal: help people find answers\n",
+    "to technical questions by relying on the latest\n",
+    "research papers published on ArXiv.\n",
+    "You communicate effectively in the style of a helpful librarian. \n",
+    "You always make sure to include the\n",
+    "ArXiV paper references in your responses.\n",
+    "If you cannot find the answer in the papers,\n",
+    "you will let the user know, but also provide the papers\n",
+    "you did find to be most relevant. If the abstract of the \n",
+    "paper does not specifically reference the user's inquiry,\n",
+    "but you believe it might be relevant, you can still include it\n",
+    "BUT you must make sure to mention that the paper might not directly\n",
+    "address the user's inquiry. Make certain that the papers you link are\n",
+    "from a specific search result.\n",
+    "\"\"\"\n",
+    "history.add_system_message(system_message)\n",
+    "history.add_user_message(\"Hi there, who are you?\")\n",
+    "history.add_assistant_message(\n",
+    "    \"I am Archie, the ArXiV chat bot. I'm here to help you find the latest research papers from ArXiv that relate to your inquiries.\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now invoke the chat function via the Kernel to get chat completions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "arguments = KernelArguments(\n",
+    "    user_input=query,\n",
+    "    chat_history=history,\n",
+    "    settings=execution_settings,\n",
+    ")\n",
+    "\n",
+    "result = await kernel.invoke(chat_function, arguments=arguments)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Printing the result shows that the chat completion service used our text search to locate relevant ArXiV papers based on the query:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Archie:>\n",
+      "What an excellent and timely question! Chunking strategies for unstructured text are\n",
+      "critical for optimizing Retrieval-Augmented Generation (RAG) systems since they\n",
+      "significantly affect how effectively a RAG model can retrieve and generate contextually\n",
+      "relevant information. Let me consult the latest papers on this topic from ArXiv and\n",
+      "provide you with relevant insights.\n",
+      "---\n",
+      "Here are some recent papers that dive into chunking strategies or similar concepts for\n",
+      "retrieval-augmented frameworks:\n",
+      "1. **\"Post-training optimization of retrieval-augmented generation models\"**\n",
+      "   *Authors*: Vibhor Agarwal et al.\n",
+      "   *Abstract*: While the paper discusses optimization strategies for retrieval-augmented\n",
+      "generation models, there is a discussion on handling unstructured text that could apply to\n",
+      "chunking methodologies. Chunking isn't always explicitly mentioned as \"chunking\" but may\n",
+      "be referred to in contexts like splitting data for retrieval.\n",
+      "   *ArXiv link*: [arXiv:2308.10701](https://arxiv.org/abs/2308.10701)\n",
+      "   *Note*: This paper may not focus entirely on chunking strategies but might discuss\n",
+      "relevant downstream considerations. It could still provide a foundation for you to explore\n",
+      "how chunking integrates with retrievers.\n",
+      "2. **\"Beyond Text: Retrieval-Augmented Reranking for Open-Domain Tasks\"**\n",
+      "   *Authors*: Younggyo Seo et al.\n",
+      "   *Abstract*: Although primarily focused on retrieval augmentation for reranking, there\n",
+      "are reflections on how document structure impacts task performance. Chunking unstructured\n",
+      "text to improve retrievability for such tasks could indirectly relate to this work.\n",
+      "   *ArXiv link*: [arXiv:2310.03714](https://arxiv.org/abs/2310.03714)\n",
+      "3. **\"ALMA: Alignment of Generative and Retrieval Models for Long Documents\"**\n",
+      "   *Authors*: Yao Fu et al.\n",
+      "   *Abstract excerpt*: \"Our approach is designed to handle retrieval and generation for\n",
+      "long documents by aligning the retrieval and generation models more effectively.\"\n",
+      "Strategies to divide and process long documents into smaller chunks for efficient\n",
+      "alignment are explicitly discussed. A focus on handling unstructured long-form content\n",
+      "makes this paper highly relevant.\n",
+      "   *ArXiv link*: [arXiv:2308.05467](https://arxiv.org/abs/2308.05467)\n",
+      "4. **\"Enhancing Context-aware Question Generation with Multi-modal Knowledge\"**\n",
+      "   *Authors*: Jialong Han et al.\n",
+      "   *Abstract excerpt*: \"Proposed techniques focus on improving retrievals through better\n",
+      "division of available knowledge.\" It doesn’t focus solely on text chunking in the RAG\n",
+      "framework but might be interesting since contextual awareness often relates to\n",
+      "preprocessing unstructured input into structured chunks.\n",
+      "   *ArXiv link*: [arXiv:2307.12345](https://arxiv.org/abs/2307.12345)\n",
+      "---\n",
+      "### Practical Approaches Discussed in Literature:\n",
+      "From my broad understanding of RAG systems and some of the details in these papers, here\n",
+      "are common chunking strategies discussed in the research community:\n",
+      "1. **Sliding Window Approach**: Divide the text into overlapping chunks of fixed lengths\n",
+      "(e.g., 512 tokens with an overlap of 128 tokens). This helps ensure no important context\n",
+      "is left behind when chunks are created.\n",
+      "\n",
+      "2. **Semantic Chunking**: Use sentence embeddings or clustering techniques (e.g., via Bi-\n",
+      "Encoders or Sentence Transformers) to ensure chunks align semantically rather than naively\n",
+      "by token count.\n",
+      "3. **Dynamic Partitioning**: Implement chunking based on higher-order structure in the\n",
+      "text, such as splitting at sentence boundaries, paragraph breaks, or logical sections.\n",
+      "4. **Content-aware Chunking**: Experiment with LLMs to pre-identify contextual relevance\n",
+      "of different parts of the text and chunk accordingly.\n",
+      "---\n",
+      "If you'd like, I can search more specifically on a sub-part of chunking strategies or\n",
+      "related RAG optimizations. Let me know!\n"
+     ]
+    }
+   ],
+   "source": [
+    "def wrap_text(text, width=90):\n",
+    "    paragraphs = text.split(\"\\n\\n\")  # Split the text into paragraphs\n",
+    "    wrapped_paragraphs = [\n",
+    "        \"\\n\".join(textwrap.fill(part, width=width) for paragraph in paragraphs for part in paragraph.split(\"\\n\"))\n",
+    "    ]  # Wrap each paragraph, split by newlines\n",
+    "    return \"\\n\\n\".join(wrapped_paragraphs)  # Join the wrapped paragraphs back together\n",
+    "\n",
+    "\n",
+    "print(f\"Archie:>\\n{wrap_text(str(result))}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search.py b/python/semantic_kernel/connectors/memory/azure_ai_search.py
index de041b5c502c..a21c6d3d759d 100644
--- a/python/semantic_kernel/connectors/memory/azure_ai_search.py
+++ b/python/semantic_kernel/connectors/memory/azure_ai_search.py
@@ -29,18 +29,20 @@
 from pydantic import SecretStr, ValidationError
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import FieldTypes, VectorStoreCollectionDefinition
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
+    FieldTypes,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
     _get_collection_name_from_model,
 )
 from semantic_kernel.exceptions import (
@@ -276,7 +278,7 @@ def _definition_to_azure_ai_search_index(
 
 @release_candidate
 class AzureAISearchCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
@@ -432,12 +434,11 @@ async def _inner_get(
         if options is not None:
             ordering = []
             if options.order_by:
-                order_by = options.order_by if isinstance(options.order_by, Sequence) else [options.order_by]
-                for order in order_by:
-                    if order.field not in self.definition.storage_names:
-                        logger.warning(f"Field {order.field} not in data model, skipping.")
+                for field, asc_flag in options.order_by.items():
+                    if field not in self.definition.storage_names:
+                        logger.warning(f"Field {field} not in data model, skipping.")
                         continue
-                    ordering.append(order.field if order.ascending else f"{order.field} desc")
+                    ordering.append(field if asc_flag else f"{field} desc")
 
             result = await client.search(
                 search_text="*",
diff --git a/python/semantic_kernel/connectors/memory/azure_cosmos_db.py b/python/semantic_kernel/connectors/memory/azure_cosmos_db.py
index 6ce200dc3b4f..15168819e6be 100644
--- a/python/semantic_kernel/connectors/memory/azure_cosmos_db.py
+++ b/python/semantic_kernel/connectors/memory/azure_cosmos_db.py
@@ -23,18 +23,20 @@
     MongoDBAtlasCollection,
     MongoDBAtlasStore,
 )
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import FieldTypes, VectorStoreCollectionDefinition
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
+    FieldTypes,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
     _get_collection_name_from_model,
 )
 from semantic_kernel.exceptions import (
@@ -654,7 +656,7 @@ async def _get_container_proxy(self, container_name: str, **kwargs) -> Container
 @release_candidate
 class CosmosNoSqlCollection(
     CosmosNoSqlBase,
-    VectorStoreRecordCollection[TNoSQLKey, TModel],
+    VectorStoreCollection[TNoSQLKey, TModel],
     VectorSearch[TNoSQLKey, TModel],
     Generic[TNoSQLKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/chroma.py b/python/semantic_kernel/connectors/memory/chroma.py
index cbe86665a87b..3c1d451edf6b 100644
--- a/python/semantic_kernel/connectors/memory/chroma.py
+++ b/python/semantic_kernel/connectors/memory/chroma.py
@@ -13,18 +13,19 @@
 from chromadb.config import Settings
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
     _get_collection_name_from_model,
 )
 from semantic_kernel.exceptions.vector_store_exceptions import (
@@ -60,7 +61,7 @@
 
 @release_candidate
 class ChromaCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/faiss.py b/python/semantic_kernel/connectors/memory/faiss.py
index d9aaa7038efa..ecc1a106c941 100644
--- a/python/semantic_kernel/connectors/memory/faiss.py
+++ b/python/semantic_kernel/connectors/memory/faiss.py
@@ -10,10 +10,17 @@
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
 from semantic_kernel.connectors.memory.in_memory import IN_MEMORY_SCORE_KEY, InMemoryCollection, InMemoryStore, TKey
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
-from semantic_kernel.data.vectors import SearchType, TModel, VectorSearchOptions, VectorSearchResult
+from semantic_kernel.data._search import KernelSearchResults
+from semantic_kernel.data.vectors import (
+    DistanceFunction,
+    IndexKind,
+    SearchType,
+    TModel,
+    VectorSearchOptions,
+    VectorSearchResult,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
+)
 from semantic_kernel.exceptions import VectorStoreInitializationException, VectorStoreOperationException
 from semantic_kernel.exceptions.vector_store_exceptions import VectorStoreModelException
 
diff --git a/python/semantic_kernel/connectors/memory/in_memory.py b/python/semantic_kernel/connectors/memory/in_memory.py
index 636f0526ed53..909599c71eb5 100644
--- a/python/semantic_kernel/connectors/memory/in_memory.py
+++ b/python/semantic_kernel/connectors/memory/in_memory.py
@@ -11,10 +11,10 @@
 from typing_extensions import override
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DISTANCE_FUNCTION_DIRECTION_HELPER, DistanceFunction
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DISTANCE_FUNCTION_DIRECTION_HELPER,
+    DistanceFunction,
     GetFilteredRecordOptions,
     SearchType,
     TModel,
@@ -22,7 +22,8 @@
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
 )
 from semantic_kernel.exceptions import VectorSearchExecutionException, VectorStoreModelValidationError
 from semantic_kernel.exceptions.vector_store_exceptions import VectorStoreModelException, VectorStoreOperationException
@@ -81,7 +82,7 @@ def __delattr__(self, name) -> None:
 
 
 class InMemoryCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/mongodb.py b/python/semantic_kernel/connectors/memory/mongodb.py
index 102e96008282..b4318b5f81ad 100644
--- a/python/semantic_kernel/connectors/memory/mongodb.py
+++ b/python/semantic_kernel/connectors/memory/mongodb.py
@@ -15,10 +15,9 @@
 from pymongo.operations import SearchIndexModel
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
     GetFilteredRecordOptions,
     SearchType,
     TModel,
@@ -26,7 +25,9 @@
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
     _get_collection_name_from_model,
 )
 from semantic_kernel.exceptions import (
@@ -150,7 +151,7 @@ def _create_index_definitions(
 
 @release_candidate
 class MongoDBAtlasCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/pinecone.py b/python/semantic_kernel/connectors/memory/pinecone.py
index d0387b65bf02..c813dd33e741 100644
--- a/python/semantic_kernel/connectors/memory/pinecone.py
+++ b/python/semantic_kernel/connectors/memory/pinecone.py
@@ -13,10 +13,9 @@
 from pydantic import SecretStr, ValidationError
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
     GetFilteredRecordOptions,
     SearchType,
     TModel,
@@ -24,7 +23,9 @@
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
     _get_collection_name_from_model,
 )
 from semantic_kernel.exceptions.vector_store_exceptions import (
@@ -73,7 +74,7 @@ class PineconeSettings(KernelBaseSettings):
 
 @release_candidate
 class PineconeCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/postgres.py b/python/semantic_kernel/connectors/memory/postgres.py
index 2e2edb9187be..f4ac2584b1dc 100644
--- a/python/semantic_kernel/connectors/memory/postgres.py
+++ b/python/semantic_kernel/connectors/memory/postgres.py
@@ -17,18 +17,21 @@
 from pydantic_settings import SettingsConfigDict
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import FieldTypes, VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
+    FieldTypes,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
 )
 from semantic_kernel.exceptions import VectorStoreModelValidationError, VectorStoreOperationException
 from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorConnectionException
@@ -301,7 +304,7 @@ async def create_connection_pool(
 
 @release_candidate
 class PostgresCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/qdrant.py b/python/semantic_kernel/connectors/memory/qdrant.py
index bd49427331de..124401ebd5cb 100644
--- a/python/semantic_kernel/connectors/memory/qdrant.py
+++ b/python/semantic_kernel/connectors/memory/qdrant.py
@@ -28,18 +28,19 @@
 from typing_extensions import override
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
 )
 from semantic_kernel.exceptions import (
     VectorSearchExecutionException,
@@ -120,7 +121,7 @@ def model_dump(self, **kwargs):
 
 @release_candidate
 class QdrantCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/redis.py b/python/semantic_kernel/connectors/memory/redis.py
index 61a7472529ff..ae9394275bfe 100644
--- a/python/semantic_kernel/connectors/memory/redis.py
+++ b/python/semantic_kernel/connectors/memory/redis.py
@@ -24,18 +24,21 @@
 from redisvl.schema import StorageType
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import FieldTypes, VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
+    FieldTypes,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
 )
 from semantic_kernel.exceptions import (
     VectorSearchExecutionException,
@@ -179,7 +182,7 @@ class RedisSettings(KernelBaseSettings):
 
 @release_candidate
 class RedisCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/sql_server.py b/python/semantic_kernel/connectors/memory/sql_server.py
index 79de3cd3d6a4..b579d228eae2 100644
--- a/python/semantic_kernel/connectors/memory/sql_server.py
+++ b/python/semantic_kernel/connectors/memory/sql_server.py
@@ -17,17 +17,20 @@
 from pydantic import SecretStr, ValidationError, field_validator
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DISTANCE_FUNCTION_DIRECTION_HELPER, DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DISTANCE_FUNCTION_DIRECTION_HELPER,
+    DistanceFunction,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
 )
 from semantic_kernel.exceptions import (
     VectorSearchExecutionException,
@@ -268,7 +271,7 @@ async def _get_mssql_connection(settings: SqlSettings) -> "Connection":
 
 @release_candidate
 class SqlServerCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/memory/weaviate.py b/python/semantic_kernel/connectors/memory/weaviate.py
index e86bdfa75437..6fb843b69bbb 100644
--- a/python/semantic_kernel/connectors/memory/weaviate.py
+++ b/python/semantic_kernel/connectors/memory/weaviate.py
@@ -20,18 +20,20 @@
 from weaviate.exceptions import WeaviateClosedClientError, WeaviateConnectionError
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.search import KernelSearchResults
+from semantic_kernel.data._search import KernelSearchResults
 from semantic_kernel.data.vectors import (
+    DistanceFunction,
     GetFilteredRecordOptions,
+    IndexKind,
     SearchType,
     TModel,
     VectorSearch,
     VectorSearchOptions,
     VectorSearchResult,
     VectorStore,
-    VectorStoreRecordCollection,
+    VectorStoreCollection,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
 )
 from semantic_kernel.exceptions import (
     ServiceInvalidExecutionSettingsError,
@@ -194,7 +196,7 @@ def is_using_client_embedding(cls, data: dict[str, Any]) -> bool:
 
 @release_candidate
 class WeaviateCollection(
-    VectorStoreRecordCollection[TKey, TModel],
+    VectorStoreCollection[TKey, TModel],
     VectorSearch[TKey, TModel],
     Generic[TKey, TModel],
 ):
diff --git a/python/semantic_kernel/connectors/search/brave.py b/python/semantic_kernel/connectors/search/brave.py
index bf8bb2b8d157..f38c5631e100 100644
--- a/python/semantic_kernel/connectors/search/brave.py
+++ b/python/semantic_kernel/connectors/search/brave.py
@@ -11,13 +11,8 @@
 from pydantic import Field, SecretStr, ValidationError
 
 from semantic_kernel.connectors.search.utils import SearchLambdaVisitor
-from semantic_kernel.data.search import (
-    KernelSearchResults,
-    SearchOptions,
-    TextSearch,
-    TextSearchResult,
-    TSearchResult,
-)
+from semantic_kernel.data._search import KernelSearchResults, SearchOptions
+from semantic_kernel.data.text_search import TextSearch, TextSearchResult, TSearchResult
 from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError
 from semantic_kernel.kernel_pydantic import KernelBaseModel, KernelBaseSettings
 from semantic_kernel.kernel_types import OptionalOneOrList
diff --git a/python/semantic_kernel/connectors/search/google.py b/python/semantic_kernel/connectors/search/google.py
index 57c42085fd70..3c5d84ff8e9b 100644
--- a/python/semantic_kernel/connectors/search/google.py
+++ b/python/semantic_kernel/connectors/search/google.py
@@ -12,13 +12,8 @@
 from pydantic import Field, SecretStr, ValidationError
 
 from semantic_kernel.connectors.search.utils import SearchLambdaVisitor
-from semantic_kernel.data.search import (
-    KernelSearchResults,
-    SearchOptions,
-    TextSearch,
-    TextSearchResult,
-    TSearchResult,
-)
+from semantic_kernel.data._search import KernelSearchResults, SearchOptions
+from semantic_kernel.data.text_search import TextSearch, TextSearchResult, TSearchResult
 from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError
 from semantic_kernel.kernel_pydantic import KernelBaseModel, KernelBaseSettings
 from semantic_kernel.kernel_types import OptionalOneOrList
diff --git a/python/semantic_kernel/data/__init__.py b/python/semantic_kernel/data/__init__.py
index f7c5854f66e0..e69de29bb2d1 100644
--- a/python/semantic_kernel/data/__init__.py
+++ b/python/semantic_kernel/data/__init__.py
@@ -1,47 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-
-from semantic_kernel.data.const import (
-    DEFAULT_DESCRIPTION,
-    DEFAULT_FUNCTION_NAME,
-    DISTANCE_FUNCTION_DIRECTION_HELPER,
-    DistanceFunction,
-    IndexKind,
-)
-from semantic_kernel.data.definitions import (
-    FieldTypes,
-    VectorStoreCollectionDefinition,
-    VectorStoreField,
-    vectorstoremodel,
-)
-from semantic_kernel.data.search import (
-    DynamicFilterFunction,
-    KernelSearchResults,
-    TextSearch,
-    TextSearchResult,
-    create_options,
-    default_dynamic_filter_function,
-)
-from semantic_kernel.data.vectors import VectorSearch, VectorSearchResult, VectorStore, VectorStoreRecordCollection
-
-__all__ = [
-    "DEFAULT_DESCRIPTION",
-    "DEFAULT_FUNCTION_NAME",
-    "DISTANCE_FUNCTION_DIRECTION_HELPER",
-    "DistanceFunction",
-    "DynamicFilterFunction",
-    "FieldTypes",
-    "IndexKind",
-    "KernelSearchResults",
-    "TextSearch",
-    "TextSearchResult",
-    "VectorSearch",
-    "VectorSearchResult",
-    "VectorStore",
-    "VectorStoreCollectionDefinition",
-    "VectorStoreField",
-    "VectorStoreRecordCollection",
-    "create_options",
-    "default_dynamic_filter_function",
-    "vectorstoremodel",
-]
diff --git a/python/semantic_kernel/data/_search.py b/python/semantic_kernel/data/_search.py
new file mode 100644
index 000000000000..94377a9ebb90
--- /dev/null
+++ b/python/semantic_kernel/data/_search.py
@@ -0,0 +1,188 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+# region: Options
+
+
+from abc import ABC
+from collections.abc import AsyncIterable, Callable, Mapping
+from logging import Logger
+from typing import Annotated, Any, Final, Generic, Protocol, TypeVar
+
+from pydantic import ConfigDict, Field
+
+from semantic_kernel.functions.kernel_parameter_metadata import KernelParameterMetadata
+from semantic_kernel.kernel_pydantic import KernelBaseModel
+from semantic_kernel.kernel_types import OptionalOneOrList
+from semantic_kernel.utils.feature_stage_decorator import release_candidate
+
+TSearchResult = TypeVar("TSearchResult")
+TSearchOptions = TypeVar("TSearchOptions", bound="SearchOptions")
+
+
+DEFAULT_RETURN_PARAMETER_METADATA: KernelParameterMetadata = KernelParameterMetadata(
+    name="results",
+    description="The search results.",
+    type="list[str]",
+    type_object=list,
+    is_required=True,
+)
+# region: Text Search
+
+DEFAULT_PARAMETER_METADATA: list[KernelParameterMetadata] = [
+    KernelParameterMetadata(
+        name="query",
+        description="What to search for.",
+        type="str",
+        is_required=True,
+        type_object=str,
+    ),
+    KernelParameterMetadata(
+        name="top",
+        description="Number of results to return.",
+        type="int",
+        is_required=False,
+        default_value=2,
+        type_object=int,
+    ),
+    KernelParameterMetadata(
+        name="skip",
+        description="Number of results to skip.",
+        type="int",
+        is_required=False,
+        default_value=0,
+        type_object=int,
+    ),
+]
+DEFAULT_FUNCTION_NAME: Final[str] = "search"
+
+
+@release_candidate
+class SearchOptions(ABC, KernelBaseModel):
+    """Options for a search.
+
+    When multiple filters are used, they are combined with an AND operator.
+    """
+
+    filter: OptionalOneOrList[Callable | str] = None
+    skip: Annotated[int, Field(ge=0)] = 0
+    top: Annotated[int, Field(gt=0)] = 5
+    include_total_count: bool = False
+
+    model_config = ConfigDict(
+        extra="allow", populate_by_name=True, arbitrary_types_allowed=True, validate_assignment=True
+    )
+
+
+@release_candidate
+class KernelSearchResults(KernelBaseModel, Generic[TSearchResult]):
+    """The result of a kernel search."""
+
+    results: AsyncIterable[TSearchResult]
+    total_count: int | None = None
+    metadata: Mapping[str, Any] | None = None
+
+
+# region: Options functions
+
+
+class DynamicFilterFunction(Protocol):
+    """Type definition for the filter update function in Text Search."""
+
+    def __call__(
+        self,
+        filter: OptionalOneOrList[Callable | str] | None = None,
+        parameters: list["KernelParameterMetadata"] | None = None,
+        **kwargs: Any,
+    ) -> OptionalOneOrList[Callable | str] | None:
+        """Signature of the function."""
+        ...  # pragma: no cover
+
+
+def create_options(
+    options_class: type["TSearchOptions"],
+    options: SearchOptions | None,
+    logger: Logger | None = None,
+    **kwargs: Any,
+) -> "TSearchOptions":
+    """Create search options.
+
+    If options are supplied, they are checked for the right type, and the kwargs are used to update the options.
+
+    If options are not supplied, they are created from the kwargs.
+    If that fails, an empty options object is returned.
+
+    Args:
+        options_class: The class of the options.
+        options: The existing options to update.
+        logger: The logger to use for warnings.
+        **kwargs: The keyword arguments to use to create the options.
+
+    Returns:
+        The options of type options_class.
+
+    Raises:
+        ValidationError: If the options are not valid.
+
+    """
+    # no options give, so just try to create from kwargs
+    if not options:
+        return options_class.model_validate(kwargs)
+    # options are the right class, just update based on kwargs
+    if not isinstance(options, options_class):
+        # options are not the right class, so create new options
+        # first try to dump the existing, if this doesn't work for some reason, try with kwargs only
+        additional_kwargs = {}
+        try:
+            additional_kwargs = options.model_dump(exclude_none=True, exclude_defaults=True, exclude_unset=True)
+        except Exception:
+            # This is very unlikely to happen, but if it does, we will just create new options.
+            # one reason this could happen is if a different class is passed that has no model_dump method
+            if logger:
+                logger.warning("Options are not valid. Creating new options from just kwargs.")
+        kwargs.update(additional_kwargs)
+        return options_class.model_validate(kwargs)
+
+    for key, value in kwargs.items():
+        if key in options.__class__.model_fields:
+            setattr(options, key, value)
+    return options
+
+
+def default_dynamic_filter_function(
+    filter: OptionalOneOrList[Callable | str] | None = None,
+    parameters: list["KernelParameterMetadata"] | None = None,
+    **kwargs: Any,
+) -> OptionalOneOrList[Callable | str] | None:
+    """The default options update function.
+
+    This function is used to update the query and options with the kwargs.
+    You can supply your own version of this function to customize the behavior.
+
+    Args:
+        filter: The filter to use for the search.
+        parameters: The parameters to use to create the options.
+        **kwargs: The keyword arguments to use to update the options.
+
+    Returns:
+        OptionalOneOrList[Callable | str] | None: The updated filters
+
+    """
+    for param in parameters or []:
+        assert param.name  # nosec, when used param name is always set
+        if param.name in {"query", "top", "skip", "include_total_count"}:
+            continue
+        new_filter = None
+        if param.name in kwargs:
+            new_filter = f"lambda x: x.{param.name} == '{kwargs[param.name]}'"
+        elif param.default_value:
+            new_filter = f"lambda x: x.{param.name} == '{param.default_value}'"
+        if not new_filter:
+            continue
+        if filter is None:
+            filter = new_filter
+        elif isinstance(filter, list):
+            filter.append(new_filter)
+        else:
+            filter = [filter, new_filter]
+
+    return filter
diff --git a/python/semantic_kernel/data/const.py b/python/semantic_kernel/data/const.py
deleted file mode 100644
index 096314ab1e0c..000000000000
--- a/python/semantic_kernel/data/const.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import operator
-from collections.abc import Callable
-from enum import Enum
-from typing import Final
-
-
-class IndexKind(str, Enum):
-    """Index kinds for similarity search.
-
-    HNSW
-        Hierarchical Navigable Small World which performs an approximate nearest neighbor (ANN) search.
-        Lower accuracy than exhaustive k nearest neighbor, but faster and more efficient.
-
-    Flat
-        Does a brute force search to find the nearest neighbors.
-        Calculates the distances between all pairs of data points, so has a linear time complexity,
-        that grows directly proportional to the number of points.
-        Also referred to as exhaustive k nearest neighbor in some databases.
-        High recall accuracy, but slower and more expensive than HNSW.
-        Better with smaller datasets.
-
-    IVF Flat
-        Inverted File with Flat Compression.
-        Designed to enhance search efficiency by narrowing the search area
-        through the use of neighbor partitions or clusters.
-        Also referred to as approximate nearest neighbor (ANN) search.
-
-    Disk ANN
-        Disk-based Approximate Nearest Neighbor algorithm designed for efficiently searching
-        for approximate nearest neighbors (ANN) in high-dimensional spaces.
-        The primary focus of DiskANN is to handle large-scale datasets that cannot fit entirely
-        into memory, leveraging disk storage to store the data while maintaining fast search times.
-
-    Quantized Flat
-        Index that compresses vectors using DiskANN-based quantization methods for better efficiency in the kNN search.
-
-    Dynamic
-        Dynamic index allows to automatically switch from FLAT to HNSW indexes.
-
-    Default
-        Default index type.
-        Used when no index type is specified.
-        Will differ per vector store.
-
-    """
-
-    HNSW = "hnsw"
-    FLAT = "flat"
-    IVF_FLAT = "ivf_flat"
-    DISK_ANN = "disk_ann"
-    QUANTIZED_FLAT = "quantized_flat"
-    DYNAMIC = "dynamic"
-    DEFAULT = "default"
-
-
-class DistanceFunction(str, Enum):
-    """Distance functions for similarity search.
-
-    Cosine Similarity
-        the cosine (angular) similarity between two vectors
-        measures only the angle between the two vectors, without taking into account the length of the vectors
-        Cosine Similarity = 1 - Cosine Distance
-        -1 means vectors are opposite
-        0 means vectors are orthogonal
-        1 means vectors are identical
-    Cosine Distance
-        the cosine (angular) distance between two vectors
-        measures only the angle between the two vectors, without taking into account the length of the vectors
-        Cosine Distance = 1 - Cosine Similarity
-        2 means vectors are opposite
-        1 means vectors are orthogonal
-        0 means vectors are identical
-    Dot Product
-        measures both the length and angle between two vectors
-        same as cosine similarity if the vectors are the same length, but more performant
-    Euclidean Distance
-        measures the Euclidean distance between two vectors
-        also known as l2-norm
-    Euclidean Squared Distance
-        measures the Euclidean squared distance between two vectors
-        also known as l2-squared
-    Manhattan
-        measures the Manhattan distance between two vectors
-    Hamming
-        number of differences between vectors at each dimensions
-    DEFAULT
-        default distance function
-        used when no distance function is specified
-        will differ per vector store.
-    """
-
-    COSINE_SIMILARITY = "cosine_similarity"
-    COSINE_DISTANCE = "cosine_distance"
-    DOT_PROD = "dot_prod"
-    EUCLIDEAN_DISTANCE = "euclidean_distance"
-    EUCLIDEAN_SQUARED_DISTANCE = "euclidean_squared_distance"
-    MANHATTAN = "manhattan"
-    HAMMING = "hamming"
-    DEFAULT = "DEFAULT"
-
-
-DISTANCE_FUNCTION_DIRECTION_HELPER: Final[dict[DistanceFunction, Callable[[int | float, int | float], bool]]] = {
-    DistanceFunction.COSINE_SIMILARITY: operator.gt,
-    DistanceFunction.COSINE_DISTANCE: operator.le,
-    DistanceFunction.DOT_PROD: operator.gt,
-    DistanceFunction.EUCLIDEAN_DISTANCE: operator.le,
-    DistanceFunction.EUCLIDEAN_SQUARED_DISTANCE: operator.le,
-    DistanceFunction.MANHATTAN: operator.le,
-    DistanceFunction.HAMMING: operator.le,
-}
-DEFAULT_FUNCTION_NAME: Final[str] = "search"
-DEFAULT_DESCRIPTION: Final[str] = (
-    "Perform a search for content related to the specified query and return string results"
-)
-
-
-class TextSearchFunctions(str, Enum):
-    """Text search functions.
-
-    Attributes:
-        SEARCH: Search using a query.
-        GET_TEXT_SEARCH_RESULT: Get text search results.
-        GET_SEARCH_RESULT: Get search results.
-    """
-
-    SEARCH = "search"
-    GET_TEXT_SEARCH_RESULT = "get_text_search_result"
-    GET_SEARCH_RESULT = "get_search_result"
diff --git a/python/semantic_kernel/data/definitions.py b/python/semantic_kernel/data/definitions.py
deleted file mode 100644
index 4d12401684a7..000000000000
--- a/python/semantic_kernel/data/definitions.py
+++ /dev/null
@@ -1,541 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import logging
-from collections.abc import Sequence
-from dataclasses import dataclass
-from enum import Enum
-from inspect import Parameter, _empty, signature
-from types import MappingProxyType, NoneType
-from typing import Annotated, Any, Literal, Protocol, TypeVar, overload, runtime_checkable
-
-from pydantic import Field, ValidationError
-
-from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.exceptions import VectorStoreModelException
-from semantic_kernel.kernel_pydantic import KernelBaseModel
-from semantic_kernel.utils.feature_stage_decorator import release_candidate
-
-logger = logging.getLogger(__name__)
-
-
-# region: Fields
-
-
-@release_candidate
-class FieldTypes(str, Enum):
-    """Enumeration for field types in vector store models."""
-
-    KEY = "key"
-    VECTOR = "vector"
-    DATA = "data"
-
-    def __str__(self) -> str:
-        """Return the string representation of the enum."""
-        return self.value
-
-
-@release_candidate
-@dataclass
-class VectorStoreField:
-    """Vector store fields."""
-
-    field_type: Literal[FieldTypes.DATA, FieldTypes.KEY, FieldTypes.VECTOR] = FieldTypes.DATA
-    name: str = ""
-    storage_name: str | None = None
-    type_: str | None = None
-    # data specific fields (all optional)
-    is_indexed: bool | None = None
-    is_full_text_indexed: bool | None = None
-    # vector specific fields (dimensions is mandatory)
-    dimensions: int | None = None
-    embedding_generator: EmbeddingGeneratorBase | None = None
-    # defaults for these fields are not set here, because they are not relevant for data and key types
-    index_kind: IndexKind | None = None
-    distance_function: DistanceFunction | None = None
-
-    @overload
-    def __init__(
-        self,
-        field_type: Literal[FieldTypes.KEY, "key"] = FieldTypes.KEY,  # type: ignore[assignment]
-        *,
-        name: str | None = None,
-        type: str | None = None,
-        storage_name: str | None = None,
-    ):
-        """Key field of the record.
-
-        When the key will be auto-generated by the store, make sure it has a default, usually None.
-
-        Args:
-            field_type: always "key".
-            name: The name of the field.
-            storage_name: The name of the field in the store, uses the field name by default.
-            type: The type of the field.
-        """
-        ...
-
-    @overload
-    def __init__(
-        self,
-        field_type: Literal[FieldTypes.DATA, "data"] = FieldTypes.DATA,  # type: ignore[assignment]
-        *,
-        name: str | None = None,
-        type: str | None = None,
-        storage_name: str | None = None,
-        is_indexed: bool | None = None,
-        is_full_text_indexed: bool | None = None,
-    ):
-        """Data field in the record.
-
-        Args:
-            field_type: always "data".
-            name: The name of the field.
-            storage_name: The name of the field in the store, uses the field name by default.
-            type: The type of the field.
-            is_indexed: Whether the field is indexed.
-            is_full_text_indexed: Whether the field is full text indexed.
-        """
-        ...
-
-    @overload
-    def __init__(
-        self,
-        field_type: Literal[FieldTypes.VECTOR, "vector"] = FieldTypes.VECTOR,  # type: ignore[assignment]
-        *,
-        name: str | None = None,
-        type: str | None = None,
-        dimensions: Annotated[int, Field(gt=0)],
-        storage_name: str | None = None,
-        index_kind: IndexKind | None = None,
-        distance_function: DistanceFunction | None = None,
-        embedding_generator: EmbeddingGeneratorBase | None = None,
-    ):
-        """Vector field in the record.
-
-        This field should contain the value you want to use for the vector.
-        When passing in the embedding generator, the embedding will be
-        generated locally before upserting.
-        If this is not set, the store should support generating the embedding for you.
-        If you want to retrieve the original content of the vector,
-        make sure to set this field twice,
-        once with the VectorStoreRecordDataField and once with the VectorStoreRecordVectorField.
-
-        If you want to be able to get the vectors back, make sure the type allows this, especially for pydantic models.
-        For instance, if the input is a string, then the type annotation should be `str | list[float] | None`.
-
-        If you want to cast the vector that is returned, you need to set the deserialize_function,
-        for instance: `deserialize_function=np.array`, (with `import numpy as np` at the top of your file).
-        If you want to set it up with more specific options, use a lambda, a custom function or a partial.
-
-        Args:
-            field_type: always "vector".
-            name: The name of the field.
-            storage_name: The name of the field in the store, uses the field name by default.
-            type: Property type.
-                For vectors this should be the inner type of the vector.
-                By default the vector will be a list of numbers.
-                If you want to use a numpy array or some other optimized format,
-                set the cast_function with a function
-                that takes a list of floats and returns a numpy array.
-            dimensions: The number of dimensions of the vector, mandatory.
-            index_kind: The index kind to use, uses a default index kind when None.
-            distance_function: The distance function to use, uses a default distance function when None.
-            embedding_generator: The embedding generator to use.
-                If this is set, the embedding will be generated locally before upserting.
-        """
-        ...
-
-    def __init__(
-        self,
-        field_type=FieldTypes.DATA,
-        *,
-        name=None,
-        type=None,
-        storage_name=None,
-        is_indexed=None,
-        is_full_text_indexed=None,
-        dimensions=None,
-        index_kind=None,
-        distance_function=None,
-        embedding_generator=None,
-    ):
-        """Vector store field."""
-        self.field_type = field_type if isinstance(field_type, FieldTypes) else FieldTypes(field_type)
-        # when a field is created, the name can be empty,
-        # when a field get's added to a definition, the name needs to be there.
-        self.name = name
-        self.storage_name = storage_name
-        self.type_ = type
-        self.is_indexed = is_indexed
-        self.is_full_text_indexed = is_full_text_indexed
-        if field_type == "vector":
-            if dimensions is None:
-                raise ValidationError("Vector fields must specify 'dimensions'")
-            self.dimensions = dimensions
-            self.index_kind = index_kind or IndexKind.DEFAULT
-            self.distance_function = distance_function or DistanceFunction.DEFAULT
-            self.embedding_generator = embedding_generator
-
-
-# region: Protocols
-
-
-@runtime_checkable
-class ToDictFunctionProtocol(Protocol):
-    """Protocol for to_dict function.
-
-    Args:
-        record: The record to be serialized.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        A list of dictionaries.
-    """
-
-    def __call__(self, record: Any, **kwargs: Any) -> Sequence[dict[str, Any]]: ...  # pragma: no cover  # noqa: D102
-
-
-@runtime_checkable
-class FromDictFunctionProtocol(Protocol):
-    """Protocol for from_dict function.
-
-    Args:
-        records: A list of dictionaries.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        A record or list thereof.
-    """
-
-    def __call__(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Any: ...  # noqa: D102
-
-
-@runtime_checkable
-class SerializeFunctionProtocol(Protocol):
-    """Protocol for serialize function.
-
-    Args:
-        record: The record to be serialized.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        The serialized record, ready to be consumed by the specific store.
-
-    """
-
-    def __call__(self, record: Any, **kwargs: Any) -> Any: ...  # noqa: D102
-
-
-@runtime_checkable
-class DeserializeFunctionProtocol(Protocol):
-    """Protocol for deserialize function.
-
-    Args:
-        records: The serialized record directly from the store.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        The deserialized record in the format expected by the application.
-
-    """
-
-    def __call__(self, records: Any, **kwargs: Any) -> Any: ...  # noqa: D102
-
-
-@runtime_checkable
-class SerializeMethodProtocol(Protocol):
-    """Data model serialization protocol.
-
-    This can optionally be implemented to allow single step serialization and deserialization
-    for using your data model with a specific datastore.
-    """
-
-    def serialize(self, **kwargs: Any) -> Any:
-        """Serialize the object to the format required by the data store."""
-        ...  # pragma: no cover
-
-
-@runtime_checkable
-class ToDictMethodProtocol(Protocol):
-    """Class used internally to check if a model has a to_dict method."""
-
-    def to_dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
-        """Serialize the object to the format required by the data store."""
-        ...  # pragma: no cover
-
-
-# region: VectorStoreRecordDefinition
-
-
-@release_candidate
-class VectorStoreCollectionDefinition(KernelBaseModel):
-    """Collection definition for vector stores.
-
-    Args:
-        fields: The fields of the record.
-        container_mode: Whether the record is in container mode.
-        to_dict: The to_dict function, should take a record and return a list of dicts.
-        from_dict: The from_dict function, should take a list of dicts and return a record.
-        deserialize: The deserialize function, should take a type specific to a datastore and return a record.
-
-    """
-
-    fields: list[VectorStoreField]
-    key_name: str = Field(default="", init=False)
-    container_mode: bool = False
-    collection_name: str | None = None
-    to_dict: ToDictFunctionProtocol | None = None
-    from_dict: FromDictFunctionProtocol | None = None
-    serialize: SerializeFunctionProtocol | None = None
-    deserialize: DeserializeFunctionProtocol | None = None
-
-    @property
-    def names(self) -> list[str]:
-        """Get the names of the fields."""
-        return [field.name for field in self.fields]
-
-    @property
-    def storage_names(self) -> list[str]:
-        """Get the names of the fields for storage."""
-        return [field.storage_name or field.name for field in self.fields]
-
-    @property
-    def key_field(self) -> VectorStoreField:
-        """Get the key field."""
-        return next((field for field in self.fields if field.name == self.key_name), None)  # type: ignore
-
-    @property
-    def key_field_storage_name(self) -> str:
-        """Get the key field storage name."""
-        return self.key_field.storage_name or self.key_field.name
-
-    @property
-    def vector_fields(self) -> list[VectorStoreField]:
-        """Get the names of the vector fields."""
-        return [field for field in self.fields if field.field_type == FieldTypes.VECTOR]
-
-    @property
-    def data_fields(self) -> list[VectorStoreField]:
-        """Get the names of the data fields."""
-        return [field for field in self.fields if field.field_type == FieldTypes.DATA]
-
-    @property
-    def vector_field_names(self) -> list[str]:
-        """Get the names of the vector fields."""
-        return [field.name for field in self.fields if field.field_type == FieldTypes.VECTOR]
-
-    @property
-    def data_field_names(self) -> list[str]:
-        """Get the names of all the data fields."""
-        return [field.name for field in self.fields if field.field_type == FieldTypes.DATA]
-
-    def try_get_vector_field(self, field_name: str | None = None) -> VectorStoreField | None:
-        """Try to get the vector field.
-
-        If the field_name is None, then the first vector field is returned.
-        If no vector fields are present None is returned.
-
-        Args:
-            field_name: The field name.
-
-        Returns:
-            VectorStoreRecordVectorField | None: The vector field or None.
-        """
-        if field_name is None:
-            if len(self.vector_fields) == 0:
-                return None
-            return self.vector_fields[0]
-        for field in self.fields:
-            if field.name == field_name or field.storage_name == field_name:
-                if field.field_type == FieldTypes.VECTOR:
-                    return field
-                raise VectorStoreModelException(
-                    f"Field {field_name} is not a vector field, it is of type {type(field).__name__}."
-                )
-        raise VectorStoreModelException(f"Field {field_name} not found.")
-
-    def get_storage_names(self, include_vector_fields: bool = True, include_key_field: bool = True) -> list[str]:
-        """Get the names of the fields for the storage.
-
-        Args:
-            include_vector_fields: Whether to include vector fields.
-            include_key_field: Whether to include the key field.
-
-        Returns:
-            list[str]: The names of the fields.
-        """
-        return [
-            field.storage_name or field.name
-            for field in self.fields
-            if field.field_type == FieldTypes.DATA
-            or (field.field_type == FieldTypes.VECTOR and include_vector_fields)
-            or (field.field_type == FieldTypes.KEY and include_key_field)
-        ]
-
-    def get_names(self, include_vector_fields: bool = True, include_key_field: bool = True) -> list[str]:
-        """Get the names of the fields.
-
-        Args:
-            include_vector_fields: Whether to include vector fields.
-            include_key_field: Whether to include the key field.
-
-        Returns:
-            list[str]: The names of the fields.
-        """
-        return [
-            field.name
-            for field in self.fields
-            if field.field_type == FieldTypes.DATA
-            or (field.field_type == FieldTypes.VECTOR and include_vector_fields)
-            or (field.field_type == FieldTypes.KEY and include_key_field)
-        ]
-
-    def model_post_init(self, _: Any):
-        """Validate the fields.
-
-        Raises:
-            VectorStoreModelException: If there is a field with an embedding property name
-                but no corresponding vector field.
-            VectorStoreModelException: If there is no key field.
-        """
-        if len(self.fields) == 0:
-            raise VectorStoreModelException(
-                "There must be at least one field with a VectorStoreRecordField annotation."
-            )
-        for field in self.fields:
-            if not field.name or field.name == "":
-                raise VectorStoreModelException("Field names must not be empty.")
-            if field.field_type == FieldTypes.KEY:
-                if self.key_name != "":
-                    raise VectorStoreModelException("Memory record definition must have exactly one key field.")
-                self.key_name = field.name
-        if not self.key_name:
-            raise VectorStoreModelException("Memory record definition must have exactly one key field.")
-
-
-# region: Signature parsing functions
-
-
-def _parse_vector_store_record_field_instance(record_field: VectorStoreField, field: Parameter) -> VectorStoreField:
-    if not record_field.name or record_field.name != field.name:
-        record_field.name = field.name
-    if not record_field.type_ and hasattr(field.annotation, "__origin__"):
-        property_type = field.annotation.__origin__
-        if record_field.field_type == FieldTypes.VECTOR:
-            if args := getattr(property_type, "__args__", None):
-                if NoneType in args and len(args) > 1:
-                    for arg in args:
-                        if arg is NoneType:
-                            continue
-
-                        if (
-                            (inner_args := getattr(arg, "__args__", None))
-                            and len(inner_args) == 1
-                            and inner_args[0] is not NoneType
-                        ):
-                            property_type = inner_args[0]
-                            break
-                        property_type = arg
-                        break
-                else:
-                    property_type = args[0]
-
-        else:
-            if (args := getattr(property_type, "__args__", None)) and NoneType in args and len(args) == 2:
-                property_type = args[0]
-
-        record_field.type_ = str(property_type) if hasattr(property_type, "__args__") else property_type.__name__
-
-    return record_field
-
-
-def _parse_parameter_to_field(field: Parameter) -> VectorStoreField | None:
-    # first check if there are any annotations
-    if field.annotation is not _empty and hasattr(field.annotation, "__metadata__"):
-        for field_annotation in field.annotation.__metadata__:
-            if isinstance(field_annotation, VectorStoreField):
-                return _parse_vector_store_record_field_instance(field_annotation, field)
-    # This means there are no annotations or that all annotations are of other types.
-    # we will check if there is a default, otherwise this will cause a runtime error.
-    # because it will not be stored, and retrieving this object will fail without a default for this field.
-    if field.default is _empty:
-        raise VectorStoreModelException(
-            "Fields that do not have a VectorStoreField annotation must have a default value."
-        )
-    logger.debug(f'Field "{field.name}" does not have a VectorStoreField annotation, will not be part of the record.')
-    return None
-
-
-def _parse_signature_to_definition(
-    parameters: MappingProxyType[str, Parameter], collection_name: str | None = None
-) -> VectorStoreCollectionDefinition:
-    if len(parameters) == 0:
-        raise VectorStoreModelException(
-            "There must be at least one field in the datamodel. If you are using this with a @dataclass, "
-            "you might have inverted the order of the decorators, the vectorstoremodel decorator should be the top one."
-        )
-    fields = []
-    for param in parameters.values():
-        field = _parse_parameter_to_field(param)
-        if field:
-            fields.append(field)
-
-    return VectorStoreCollectionDefinition(
-        fields=fields,
-        collection_name=collection_name,
-    )
-
-
-# region: VectorStoreModel decorator
-
-
-_T = TypeVar("_T")
-
-
-@release_candidate
-def vectorstoremodel(
-    cls: type[_T] | None = None,
-    collection_name: str | None = None,
-) -> type[_T]:
-    """Returns the class as a vector store model.
-
-    This decorator makes a class a vector store model.
-    There are three things being checked:
-    - The class must have at least one field with a annotation,
-        of type VectorStoreField.
-    - The class must have exactly one field with the field_type `key`.
-    - When creating a Vector Field, either supply the property type directly,
-    or make sure to set the property that you want the index to use first.
-
-
-    Args:
-        cls: The class to be decorated.
-        collection_name: The name of the collection to be used.
-            This is used to set the collection name in the VectorStoreCollectionDefinition.
-
-    Raises:
-        VectorStoreModelException: If there are no fields with a VectorStoreField annotation.
-        VectorStoreModelException: If there are fields with no name.
-        VectorStoreModelException: If there is no key field.
-    """
-
-    def wrap(cls: type[_T]) -> type[_T]:
-        # get fields and annotations
-        cls_sig = signature(cls)
-        setattr(cls, "__kernel_vectorstoremodel__", True)
-        setattr(
-            cls,
-            "__kernel_vectorstoremodel_definition__",
-            _parse_signature_to_definition(cls_sig.parameters, collection_name),
-        )
-
-        return cls  # type: ignore
-
-    # See if we're being called as @vectorstoremodel or @vectorstoremodel().
-    if cls is None:
-        # We're called with parens.
-        return wrap  # type: ignore
-
-    # We're called as @vectorstoremodel without parens.
-    return wrap(cls)
diff --git a/python/semantic_kernel/data/search.py b/python/semantic_kernel/data/text_search.py
similarity index 67%
rename from python/semantic_kernel/data/search.py
rename to python/semantic_kernel/data/text_search.py
index d6864a774e61..29350ca2a82c 100644
--- a/python/semantic_kernel/data/search.py
+++ b/python/semantic_kernel/data/text_search.py
@@ -2,14 +2,23 @@
 
 import json
 import logging
-from abc import ABC, abstractmethod
-from collections.abc import AsyncIterable, Callable, Mapping, Sequence
+from abc import abstractmethod
+from collections.abc import Callable, Sequence
 from copy import deepcopy
-from typing import Annotated, Any, Generic, Literal, Protocol, TypeVar, overload
-
-from pydantic import BaseModel, ConfigDict, Field, ValidationError
-
-from semantic_kernel.data.const import DEFAULT_DESCRIPTION, DEFAULT_FUNCTION_NAME
+from typing import Any, Final, Literal, TypeVar, overload
+
+from pydantic import BaseModel, ValidationError
+
+from semantic_kernel.data._search import (
+    DEFAULT_FUNCTION_NAME,
+    DEFAULT_PARAMETER_METADATA,
+    DEFAULT_RETURN_PARAMETER_METADATA,
+    DynamicFilterFunction,
+    KernelSearchResults,
+    SearchOptions,
+    create_options,
+    default_dynamic_filter_function,
+)
 from semantic_kernel.exceptions import TextSearchException
 from semantic_kernel.functions.kernel_function import KernelFunction
 from semantic_kernel.functions.kernel_function_decorator import kernel_function
@@ -19,29 +28,13 @@
 from semantic_kernel.kernel_types import OptionalOneOrList
 from semantic_kernel.utils.feature_stage_decorator import release_candidate
 
-TSearchOptions = TypeVar("TSearchOptions", bound="SearchOptions")
-
 logger = logging.getLogger(__name__)
 
-# region: Options
-
-
-@release_candidate
-class SearchOptions(ABC, KernelBaseModel):
-    """Options for a search.
-
-    When multiple filters are used, they are combined with an AND operator.
-    """
-
-    filter: OptionalOneOrList[Callable | str] = None
-    skip: Annotated[int, Field(ge=0)] = 0
-    top: Annotated[int, Field(gt=0)] = 5
-    include_total_count: bool = False
-
-    model_config = ConfigDict(
-        extra="allow", populate_by_name=True, arbitrary_types_allowed=True, validate_assignment=True
-    )
+TSearchOptions = TypeVar("TSearchOptions", bound="SearchOptions")
 
+DEFAULT_DESCRIPTION: Final[str] = (
+    "Perform a search for content related to the specified query and return string results"
+)
 
 # region: Results
 
@@ -58,121 +51,6 @@ class TextSearchResult(KernelBaseModel):
 TSearchResult = TypeVar("TSearchResult")
 
 
-@release_candidate
-class KernelSearchResults(KernelBaseModel, Generic[TSearchResult]):
-    """The result of a kernel search."""
-
-    results: AsyncIterable[TSearchResult]
-    total_count: int | None = None
-    metadata: Mapping[str, Any] | None = None
-
-
-# region: Options functions
-
-
-class DynamicFilterFunction(Protocol):
-    """Type definition for the filter update function in Text Search."""
-
-    def __call__(
-        self,
-        filter: OptionalOneOrList[Callable | str] | None = None,
-        parameters: list["KernelParameterMetadata"] | None = None,
-        **kwargs: Any,
-    ) -> OptionalOneOrList[Callable | str] | None:
-        """Signature of the function."""
-        ...  # pragma: no cover
-
-
-def create_options(
-    options_class: type["TSearchOptions"],
-    options: "SearchOptions | None",
-    **kwargs: Any,
-) -> "TSearchOptions":
-    """Create search options.
-
-    If options are supplied, they are checked for the right type, and the kwargs are used to update the options.
-
-    If options are not supplied, they are created from the kwargs.
-    If that fails, an empty options object is returned.
-
-    Args:
-        options_class: The class of the options.
-        options: The existing options to update.
-        **kwargs: The keyword arguments to use to create the options.
-
-    Returns:
-        The options of type options_class.
-
-    Raises:
-        ValidationError: If the options are not valid.
-
-    """
-    # no options give, so just try to create from kwargs
-    if not options:
-        return options_class.model_validate(kwargs)
-    # options are the right class, just update based on kwargs
-    if not isinstance(options, options_class):
-        # options are not the right class, so create new options
-        # first try to dump the existing, if this doesn't work for some reason, try with kwargs only
-        additional_kwargs = {}
-        try:
-            additional_kwargs = options.model_dump(exclude_none=True, exclude_defaults=True, exclude_unset=True)
-        except Exception:
-            # This is very unlikely to happen, but if it does, we will just create new options.
-            # one reason this could happen is if a different class is passed that has no model_dump method
-            logger.warning("Options are not valid. Creating new options from just kwargs.")
-        kwargs.update(additional_kwargs)
-        return options_class.model_validate(kwargs)
-
-    for key, value in kwargs.items():
-        if key in options.__class__.model_fields:
-            setattr(options, key, value)
-    return options
-
-
-def default_dynamic_filter_function(
-    filter: OptionalOneOrList[Callable | str] | None = None,
-    parameters: list["KernelParameterMetadata"] | None = None,
-    **kwargs: Any,
-) -> OptionalOneOrList[Callable | str] | None:
-    """The default options update function.
-
-    This function is used to update the query and options with the kwargs.
-    You can supply your own version of this function to customize the behavior.
-
-    Args:
-        filter: The filter to use for the search.
-        parameters: The parameters to use to create the options.
-        **kwargs: The keyword arguments to use to update the options.
-
-    Returns:
-        OptionalOneOrList[Callable | str] | None: The updated filters
-
-    """
-    for param in parameters or []:
-        assert param.name  # nosec, when used param name is always set
-        if param.name in {"query", "top", "skip", "include_total_count"}:
-            continue
-        new_filter = None
-        if param.name in kwargs:
-            new_filter = f"lambda x: x.{param.name} == '{kwargs[param.name]}'"
-        elif param.default_value:
-            new_filter = f"lambda x: x.{param.name} == '{param.default_value}'"
-        if not new_filter:
-            continue
-        if filter is None:
-            filter = new_filter
-        elif isinstance(filter, list):
-            filter.append(new_filter)
-        else:
-            filter = [filter, new_filter]
-
-    return filter
-
-
-# region: Text Search
-
-
 @release_candidate
 class TextSearch:
     """The base class for all text searchers."""
@@ -182,52 +60,6 @@ def options_class(self) -> type["SearchOptions"]:
         """The options class for the search."""
         return SearchOptions
 
-    @staticmethod
-    def _default_parameter_metadata() -> list[KernelParameterMetadata]:
-        """Default parameter metadata for text search functions.
-
-        This function should be overridden when necessary.
-        """
-        return [
-            KernelParameterMetadata(
-                name="query",
-                description="What to search for.",
-                type="str",
-                is_required=True,
-                type_object=str,
-            ),
-            KernelParameterMetadata(
-                name="top",
-                description="Number of results to return.",
-                type="int",
-                is_required=False,
-                default_value=2,
-                type_object=int,
-            ),
-            KernelParameterMetadata(
-                name="skip",
-                description="Number of results to skip.",
-                type="int",
-                is_required=False,
-                default_value=0,
-                type_object=int,
-            ),
-        ]
-
-    @staticmethod
-    def _default_return_parameter_metadata() -> KernelParameterMetadata:
-        """Default return parameter metadata for text search functions.
-
-        This function should be overridden by subclasses.
-        """
-        return KernelParameterMetadata(
-            name="results",
-            description="The search results.",
-            type="list[str]",
-            type_object=list,
-            is_required=True,
-        )
-
     # region: Public methods
 
     @overload
@@ -460,8 +292,8 @@ async def search_wrapper(**kwargs: Any) -> Sequence[str]:
 
         return KernelFunctionFromMethod(
             method=search_wrapper,
-            parameters=self._default_parameter_metadata() if parameters is None else parameters,
-            return_parameter=return_parameter or self._default_return_parameter_metadata(),
+            parameters=DEFAULT_PARAMETER_METADATA if parameters is None else parameters,
+            return_parameter=return_parameter or DEFAULT_RETURN_PARAMETER_METADATA,
         )
 
     async def _map_results(
@@ -500,3 +332,17 @@ async def search(
 
         """
         ...
+
+
+__all__ = [
+    "DEFAULT_DESCRIPTION",
+    "DEFAULT_FUNCTION_NAME",
+    "DEFAULT_PARAMETER_METADATA",
+    "DEFAULT_RETURN_PARAMETER_METADATA",
+    "DynamicFilterFunction",
+    "KernelSearchResults",
+    "TextSearch",
+    "TextSearchResult",
+    "create_options",
+    "default_dynamic_filter_function",
+]
diff --git a/python/semantic_kernel/data/vectors.py b/python/semantic_kernel/data/vectors.py
index 87c5fc84e2f5..b63e6871e9da 100644
--- a/python/semantic_kernel/data/vectors.py
+++ b/python/semantic_kernel/data/vectors.py
@@ -2,32 +2,30 @@
 
 import json
 import logging
+import operator
 import sys
 from abc import abstractmethod
 from ast import AST, Lambda, NodeVisitor, expr, parse
 from collections.abc import AsyncIterable, Callable, Mapping, Sequence
 from copy import deepcopy
+from dataclasses import dataclass
 from enum import Enum
-from inspect import getsource
-from typing import Annotated, Any, ClassVar, Generic, Literal, TypeVar, overload
+from inspect import Parameter, _empty, getsource, signature
+from types import MappingProxyType, NoneType
+from typing import Annotated, Any, ClassVar, Final, Generic, Literal, Protocol, TypeVar, overload, runtime_checkable
 
 from pydantic import BaseModel, Field, ValidationError, model_validator
-from pydantic.dataclasses import dataclass
+from pydantic.dataclasses import dataclass as pyd_dataclass
 
 from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-from semantic_kernel.data.const import DEFAULT_DESCRIPTION, DEFAULT_FUNCTION_NAME
-from semantic_kernel.data.definitions import (
-    FieldTypes,
-    SerializeMethodProtocol,
-    VectorStoreCollectionDefinition,
-    VectorStoreField,
-)
-from semantic_kernel.data.search import (
+from semantic_kernel.data._search import (
+    DEFAULT_FUNCTION_NAME,
+    DEFAULT_PARAMETER_METADATA,
+    DEFAULT_RETURN_PARAMETER_METADATA,
     DynamicFilterFunction,
     KernelSearchResults,
     SearchOptions,
-    TextSearch,
     create_options,
     default_dynamic_filter_function,
 )
@@ -62,10 +60,631 @@
 TModel = TypeVar("TModel", bound=object)
 TKey = TypeVar("TKey")
 _T = TypeVar("_T", bound="VectorStoreRecordHandler")
-TSearchOptions = TypeVar("TSearchOptions", bound=SearchOptions)
 TFilters = TypeVar("TFilters")
 
-# region: Helpers
+DEFAULT_DESCRIPTION: Final[str] = (
+    "Perform a vector search for data in a vector store, using the provided search options."
+)
+
+
+# region: Fields and Collection Definitions
+
+
+@release_candidate
+class FieldTypes(str, Enum):
+    """Enumeration for field types in vector store models."""
+
+    KEY = "key"
+    VECTOR = "vector"
+    DATA = "data"
+
+    def __str__(self) -> str:
+        """Return the string representation of the enum."""
+        return self.value
+
+
+@runtime_checkable
+class SerializeMethodProtocol(Protocol):
+    """Data model serialization protocol.
+
+    This can optionally be implemented to allow single step serialization and deserialization
+    for using your data model with a specific datastore.
+    """
+
+    def serialize(self, **kwargs: Any) -> Any:
+        """Serialize the object to the format required by the data store."""
+        ...  # pragma: no cover
+
+
+@runtime_checkable
+class ToDictFunctionProtocol(Protocol):
+    """Protocol for to_dict function.
+
+    Args:
+        record: The record to be serialized.
+        **kwargs: Additional keyword arguments.
+
+    Returns:
+        A list of dictionaries.
+    """
+
+    def __call__(self, record: Any, **kwargs: Any) -> Sequence[dict[str, Any]]: ...  # pragma: no cover
+
+
+@runtime_checkable
+class FromDictFunctionProtocol(Protocol):
+    """Protocol for from_dict function.
+
+    Args:
+        records: A list of dictionaries.
+        **kwargs: Additional keyword arguments.
+
+    Returns:
+        A record or list thereof.
+    """
+
+    def __call__(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Any: ...
+
+
+@runtime_checkable
+class SerializeFunctionProtocol(Protocol):
+    """Protocol for serialize function.
+
+    Args:
+        record: The record to be serialized.
+        **kwargs: Additional keyword arguments.
+
+    Returns:
+        The serialized record, ready to be consumed by the specific store.
+
+    """
+
+    def __call__(self, record: Any, **kwargs: Any) -> Any: ...
+
+
+@runtime_checkable
+class DeserializeFunctionProtocol(Protocol):
+    """Protocol for deserialize function.
+
+    Args:
+        records: The serialized record directly from the store.
+        **kwargs: Additional keyword arguments.
+
+    Returns:
+        The deserialized record in the format expected by the application.
+
+    """
+
+    def __call__(self, records: Any, **kwargs: Any) -> Any: ...
+
+
+@runtime_checkable
+class ToDictMethodProtocol(Protocol):
+    """Class used internally to check if a model has a to_dict method."""
+
+    def to_dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
+        """Serialize the object to the format required by the data store."""
+        ...  # pragma: no cover
+
+
+class IndexKind(str, Enum):
+    """Index kinds for similarity search.
+
+    HNSW
+        Hierarchical Navigable Small World which performs an approximate nearest neighbor (ANN) search.
+        Lower accuracy than exhaustive k nearest neighbor, but faster and more efficient.
+
+    Flat
+        Does a brute force search to find the nearest neighbors.
+        Calculates the distances between all pairs of data points, so has a linear time complexity,
+        that grows directly proportional to the number of points.
+        Also referred to as exhaustive k nearest neighbor in some databases.
+        High recall accuracy, but slower and more expensive than HNSW.
+        Better with smaller datasets.
+
+    IVF Flat
+        Inverted File with Flat Compression.
+        Designed to enhance search efficiency by narrowing the search area
+        through the use of neighbor partitions or clusters.
+        Also referred to as approximate nearest neighbor (ANN) search.
+
+    Disk ANN
+        Disk-based Approximate Nearest Neighbor algorithm designed for efficiently searching
+        for approximate nearest neighbors (ANN) in high-dimensional spaces.
+        The primary focus of DiskANN is to handle large-scale datasets that cannot fit entirely
+        into memory, leveraging disk storage to store the data while maintaining fast search times.
+
+    Quantized Flat
+        Index that compresses vectors using DiskANN-based quantization methods for better efficiency in the kNN search.
+
+    Dynamic
+        Dynamic index allows to automatically switch from FLAT to HNSW indexes.
+
+    Default
+        Default index type.
+        Used when no index type is specified.
+        Will differ per vector store.
+
+    """
+
+    HNSW = "hnsw"
+    FLAT = "flat"
+    IVF_FLAT = "ivf_flat"
+    DISK_ANN = "disk_ann"
+    QUANTIZED_FLAT = "quantized_flat"
+    DYNAMIC = "dynamic"
+    DEFAULT = "default"
+
+
+class DistanceFunction(str, Enum):
+    """Distance functions for similarity search.
+
+    Cosine Similarity
+        the cosine (angular) similarity between two vectors
+        measures only the angle between the two vectors, without taking into account the length of the vectors
+        Cosine Similarity = 1 - Cosine Distance
+        -1 means vectors are opposite
+        0 means vectors are orthogonal
+        1 means vectors are identical
+    Cosine Distance
+        the cosine (angular) distance between two vectors
+        measures only the angle between the two vectors, without taking into account the length of the vectors
+        Cosine Distance = 1 - Cosine Similarity
+        2 means vectors are opposite
+        1 means vectors are orthogonal
+        0 means vectors are identical
+    Dot Product
+        measures both the length and angle between two vectors
+        same as cosine similarity if the vectors are the same length, but more performant
+    Euclidean Distance
+        measures the Euclidean distance between two vectors
+        also known as l2-norm
+    Euclidean Squared Distance
+        measures the Euclidean squared distance between two vectors
+        also known as l2-squared
+    Manhattan
+        measures the Manhattan distance between two vectors
+    Hamming
+        number of differences between vectors at each dimensions
+    DEFAULT
+        default distance function
+        used when no distance function is specified
+        will differ per vector store.
+    """
+
+    COSINE_SIMILARITY = "cosine_similarity"
+    COSINE_DISTANCE = "cosine_distance"
+    DOT_PROD = "dot_prod"
+    EUCLIDEAN_DISTANCE = "euclidean_distance"
+    EUCLIDEAN_SQUARED_DISTANCE = "euclidean_squared_distance"
+    MANHATTAN = "manhattan"
+    HAMMING = "hamming"
+    DEFAULT = "DEFAULT"
+
+
+DISTANCE_FUNCTION_DIRECTION_HELPER: Final[dict[DistanceFunction, Callable[[int | float, int | float], bool]]] = {
+    DistanceFunction.COSINE_SIMILARITY: operator.gt,
+    DistanceFunction.COSINE_DISTANCE: operator.le,
+    DistanceFunction.DOT_PROD: operator.gt,
+    DistanceFunction.EUCLIDEAN_DISTANCE: operator.le,
+    DistanceFunction.EUCLIDEAN_SQUARED_DISTANCE: operator.le,
+    DistanceFunction.MANHATTAN: operator.le,
+    DistanceFunction.HAMMING: operator.le,
+}
+
+
+@release_candidate
+@dataclass
+class VectorStoreField:
+    """Vector store fields."""
+
+    field_type: Literal[FieldTypes.DATA, FieldTypes.KEY, FieldTypes.VECTOR] = FieldTypes.DATA
+    name: str = ""
+    storage_name: str | None = None
+    type_: str | None = None
+    # data specific fields (all optional)
+    is_indexed: bool | None = None
+    is_full_text_indexed: bool | None = None
+    # vector specific fields (dimensions is mandatory)
+    dimensions: int | None = None
+    embedding_generator: EmbeddingGeneratorBase | None = None
+    # defaults for these fields are not set here, because they are not relevant for data and key types
+    index_kind: IndexKind | None = None
+    distance_function: DistanceFunction | None = None
+
+    @overload
+    def __init__(
+        self,
+        field_type: Literal[FieldTypes.KEY, "key"] = FieldTypes.KEY,  # type: ignore[assignment]
+        *,
+        name: str | None = None,
+        type: str | None = None,
+        storage_name: str | None = None,
+    ):
+        """Key field of the record.
+
+        When the key will be auto-generated by the store, make sure it has a default, usually None.
+
+        Args:
+            field_type: always "key".
+            name: The name of the field.
+            storage_name: The name of the field in the store, uses the field name by default.
+            type: The type of the field.
+        """
+        ...
+
+    @overload
+    def __init__(
+        self,
+        field_type: Literal[FieldTypes.DATA, "data"] = FieldTypes.DATA,  # type: ignore[assignment]
+        *,
+        name: str | None = None,
+        type: str | None = None,
+        storage_name: str | None = None,
+        is_indexed: bool | None = None,
+        is_full_text_indexed: bool | None = None,
+    ):
+        """Data field in the record.
+
+        Args:
+            field_type: always "data".
+            name: The name of the field.
+            storage_name: The name of the field in the store, uses the field name by default.
+            type: The type of the field.
+            is_indexed: Whether the field is indexed.
+            is_full_text_indexed: Whether the field is full text indexed.
+        """
+        ...
+
+    @overload
+    def __init__(
+        self,
+        field_type: Literal[FieldTypes.VECTOR, "vector"] = FieldTypes.VECTOR,  # type: ignore[assignment]
+        *,
+        name: str | None = None,
+        type: str | None = None,
+        dimensions: Annotated[int, Field(gt=0)],
+        storage_name: str | None = None,
+        index_kind: IndexKind | None = None,
+        distance_function: DistanceFunction | None = None,
+        embedding_generator: EmbeddingGeneratorBase | None = None,
+    ):
+        """Vector field in the record.
+
+        This field should contain the value you want to use for the vector.
+        When passing in the embedding generator, the embedding will be
+        generated locally before upserting.
+        If this is not set, the store should support generating the embedding for you.
+        If you want to retrieve the original content of the vector,
+        make sure to set this field twice,
+        once with the VectorStoreRecordDataField and once with the VectorStoreRecordVectorField.
+
+        If you want to be able to get the vectors back, make sure the type allows this, especially for pydantic models.
+        For instance, if the input is a string, then the type annotation should be `str | list[float] | None`.
+
+        If you want to cast the vector that is returned, you need to set the deserialize_function,
+        for instance: `deserialize_function=np.array`, (with `import numpy as np` at the top of your file).
+        If you want to set it up with more specific options, use a lambda, a custom function or a partial.
+
+        Args:
+            field_type: always "vector".
+            name: The name of the field.
+            storage_name: The name of the field in the store, uses the field name by default.
+            type: Property type.
+                For vectors this should be the inner type of the vector.
+                By default the vector will be a list of numbers.
+                If you want to use a numpy array or some other optimized format,
+                set the cast_function with a function
+                that takes a list of floats and returns a numpy array.
+            dimensions: The number of dimensions of the vector, mandatory.
+            index_kind: The index kind to use, uses a default index kind when None.
+            distance_function: The distance function to use, uses a default distance function when None.
+            embedding_generator: The embedding generator to use.
+                If this is set, the embedding will be generated locally before upserting.
+        """
+        ...
+
+    def __init__(
+        self,
+        field_type=FieldTypes.DATA,
+        *,
+        name=None,
+        type=None,
+        storage_name=None,
+        is_indexed=None,
+        is_full_text_indexed=None,
+        dimensions=None,
+        index_kind=None,
+        distance_function=None,
+        embedding_generator=None,
+    ):
+        """Vector store field."""
+        self.field_type = field_type if isinstance(field_type, FieldTypes) else FieldTypes(field_type)
+        # when a field is created, the name can be empty,
+        # when a field get's added to a definition, the name needs to be there.
+        if name:
+            self.name = name
+        self.storage_name = storage_name
+        self.type_ = type
+        self.is_indexed = is_indexed
+        self.is_full_text_indexed = is_full_text_indexed
+        if field_type == FieldTypes.VECTOR:
+            if dimensions is None:
+                raise ValidationError("Vector fields must specify 'dimensions'")
+            self.dimensions = dimensions
+            self.index_kind = index_kind or IndexKind.DEFAULT
+            self.distance_function = distance_function or DistanceFunction.DEFAULT
+            self.embedding_generator = embedding_generator
+
+
+@release_candidate
+class VectorStoreCollectionDefinition(KernelBaseModel):
+    """Collection definition for vector stores.
+
+    Args:
+        fields: The fields of the record.
+        container_mode: Whether the record is in container mode.
+        to_dict: The to_dict function, should take a record and return a list of dicts.
+        from_dict: The from_dict function, should take a list of dicts and return a record.
+        deserialize: The deserialize function, should take a type specific to a datastore and return a record.
+
+    """
+
+    fields: list[VectorStoreField]
+    key_name: str = Field(default="", init=False)
+    container_mode: bool = False
+    collection_name: str | None = None
+    to_dict: ToDictFunctionProtocol | None = None
+    from_dict: FromDictFunctionProtocol | None = None
+    serialize: SerializeFunctionProtocol | None = None
+    deserialize: DeserializeFunctionProtocol | None = None
+
+    @property
+    def names(self) -> list[str]:
+        """Get the names of the fields."""
+        return [field.name for field in self.fields]
+
+    @property
+    def storage_names(self) -> list[str]:
+        """Get the names of the fields for storage."""
+        return [field.storage_name or field.name for field in self.fields]
+
+    @property
+    def key_field(self) -> VectorStoreField:
+        """Get the key field."""
+        return next((field for field in self.fields if field.name == self.key_name), None)  # type: ignore
+
+    @property
+    def key_field_storage_name(self) -> str:
+        """Get the key field storage name."""
+        return self.key_field.storage_name or self.key_field.name
+
+    @property
+    def vector_fields(self) -> list[VectorStoreField]:
+        """Get the names of the vector fields."""
+        return [field for field in self.fields if field.field_type == FieldTypes.VECTOR]
+
+    @property
+    def data_fields(self) -> list[VectorStoreField]:
+        """Get the names of the data fields."""
+        return [field for field in self.fields if field.field_type == FieldTypes.DATA]
+
+    @property
+    def vector_field_names(self) -> list[str]:
+        """Get the names of the vector fields."""
+        return [field.name for field in self.fields if field.field_type == FieldTypes.VECTOR]
+
+    @property
+    def data_field_names(self) -> list[str]:
+        """Get the names of all the data fields."""
+        return [field.name for field in self.fields if field.field_type == FieldTypes.DATA]
+
+    def try_get_vector_field(self, field_name: str | None = None) -> VectorStoreField | None:
+        """Try to get the vector field.
+
+        If the field_name is None, then the first vector field is returned.
+        If no vector fields are present None is returned.
+
+        Args:
+            field_name: The field name.
+
+        Returns:
+            VectorStoreRecordVectorField | None: The vector field or None.
+        """
+        if field_name is None:
+            if len(self.vector_fields) == 0:
+                return None
+            return self.vector_fields[0]
+        for field in self.fields:
+            if field.name == field_name or field.storage_name == field_name:
+                if field.field_type == FieldTypes.VECTOR:
+                    return field
+                raise VectorStoreModelException(
+                    f"Field {field_name} is not a vector field, it is of type {type(field).__name__}."
+                )
+        raise VectorStoreModelException(f"Field {field_name} not found.")
+
+    def get_storage_names(self, include_vector_fields: bool = True, include_key_field: bool = True) -> list[str]:
+        """Get the names of the fields for the storage.
+
+        Args:
+            include_vector_fields: Whether to include vector fields.
+            include_key_field: Whether to include the key field.
+
+        Returns:
+            list[str]: The names of the fields.
+        """
+        return [
+            field.storage_name or field.name
+            for field in self.fields
+            if field.field_type == FieldTypes.DATA
+            or (field.field_type == FieldTypes.VECTOR and include_vector_fields)
+            or (field.field_type == FieldTypes.KEY and include_key_field)
+        ]
+
+    def get_names(self, include_vector_fields: bool = True, include_key_field: bool = True) -> list[str]:
+        """Get the names of the fields.
+
+        Args:
+            include_vector_fields: Whether to include vector fields.
+            include_key_field: Whether to include the key field.
+
+        Returns:
+            list[str]: The names of the fields.
+        """
+        return [
+            field.name
+            for field in self.fields
+            if field.field_type == FieldTypes.DATA
+            or (field.field_type == FieldTypes.VECTOR and include_vector_fields)
+            or (field.field_type == FieldTypes.KEY and include_key_field)
+        ]
+
+    def model_post_init(self, _: Any):
+        """Validate the fields.
+
+        Raises:
+            VectorStoreModelException: If there is a field with an embedding property name
+                but no corresponding vector field.
+            VectorStoreModelException: If there is no key field.
+        """
+        if len(self.fields) == 0:
+            raise VectorStoreModelException(
+                "There must be at least one field with a VectorStoreRecordField annotation."
+            )
+        for field in self.fields:
+            if not field.name or field.name == "":
+                raise VectorStoreModelException("Field names must not be empty.")
+            if field.field_type == FieldTypes.KEY:
+                if self.key_name != "":
+                    raise VectorStoreModelException("Memory record definition must have exactly one key field.")
+                self.key_name = field.name
+        if not self.key_name:
+            raise VectorStoreModelException("Memory record definition must have exactly one key field.")
+
+
+# region: Decorator
+
+
+def _parse_vector_store_record_field_instance(record_field: VectorStoreField, field: Parameter) -> VectorStoreField:
+    if not record_field.name or record_field.name != field.name:
+        record_field.name = field.name
+    if not record_field.type_ and hasattr(field.annotation, "__origin__"):
+        property_type = field.annotation.__origin__
+        if record_field.field_type == FieldTypes.VECTOR:
+            if args := getattr(property_type, "__args__", None):
+                if NoneType in args and len(args) > 1:
+                    for arg in args:
+                        if arg is NoneType:
+                            continue
+
+                        if (
+                            (inner_args := getattr(arg, "__args__", None))
+                            and len(inner_args) == 1
+                            and inner_args[0] is not NoneType
+                        ):
+                            property_type = inner_args[0]
+                            break
+                        property_type = arg
+                        break
+                else:
+                    property_type = args[0]
+
+        else:
+            if (args := getattr(property_type, "__args__", None)) and NoneType in args and len(args) == 2:
+                property_type = args[0]
+
+        record_field.type_ = str(property_type) if hasattr(property_type, "__args__") else property_type.__name__
+
+    return record_field
+
+
+def _parse_parameter_to_field(field: Parameter) -> VectorStoreField | None:
+    # first check if there are any annotations
+    if field.annotation is not _empty and hasattr(field.annotation, "__metadata__"):
+        for field_annotation in field.annotation.__metadata__:
+            if isinstance(field_annotation, VectorStoreField):
+                return _parse_vector_store_record_field_instance(field_annotation, field)
+    # This means there are no annotations or that all annotations are of other types.
+    # we will check if there is a default, otherwise this will cause a runtime error.
+    # because it will not be stored, and retrieving this object will fail without a default for this field.
+    if field.default is _empty:
+        raise VectorStoreModelException(
+            "Fields that do not have a VectorStoreField annotation must have a default value."
+        )
+    logger.debug(f'Field "{field.name}" does not have a VectorStoreField annotation, will not be part of the record.')
+    return None
+
+
+def _parse_signature_to_definition(
+    parameters: MappingProxyType[str, Parameter], collection_name: str | None = None
+) -> VectorStoreCollectionDefinition:
+    if len(parameters) == 0:
+        raise VectorStoreModelException(
+            "There must be at least one field in the datamodel. If you are using this with a @dataclass, "
+            "you might have inverted the order of the decorators, the vectorstoremodel decorator should be the top one."
+        )
+    fields = []
+    for param in parameters.values():
+        field = _parse_parameter_to_field(param)
+        if field:
+            fields.append(field)
+
+    return VectorStoreCollectionDefinition(
+        fields=fields,
+        collection_name=collection_name,
+    )
+
+
+@release_candidate
+def vectorstoremodel(
+    cls: type[TModel] | None = None,
+    collection_name: str | None = None,
+) -> type[TModel]:
+    """Returns the class as a vector store model.
+
+    This decorator makes a class a vector store model.
+    There are three things being checked:
+    - The class must have at least one field with a annotation,
+        of type VectorStoreField.
+    - The class must have exactly one field with the field_type `key`.
+    - When creating a Vector Field, either supply the property type directly,
+    or make sure to set the property that you want the index to use first.
+
+
+    Args:
+        cls: The class to be decorated.
+        collection_name: The name of the collection to be used.
+            This is used to set the collection name in the VectorStoreCollectionDefinition.
+
+    Raises:
+        VectorStoreModelException: If there are no fields with a VectorStoreField annotation.
+        VectorStoreModelException: If there are fields with no name.
+        VectorStoreModelException: If there is no key field.
+    """
+
+    def wrap(cls: type[TModel]) -> type[TModel]:
+        # get fields and annotations
+        cls_sig = signature(cls)
+        setattr(cls, "__kernel_vectorstoremodel__", True)
+        setattr(
+            cls,
+            "__kernel_vectorstoremodel_definition__",
+            _parse_signature_to_definition(cls_sig.parameters, collection_name),
+        )
+
+        return cls  # type: ignore
+
+    # See if we're being called as @vectorstoremodel or @vectorstoremodel().
+    if cls is None:
+        # We're called with parens.
+        return wrap  # type: ignore
+
+    # We're called as @vectorstoremodel without parens.
+    return wrap(cls)
+
+
+# region: VectorSearch Helpers
 
 
 def _get_collection_name_from_model(
@@ -80,21 +699,19 @@ def _get_collection_name_from_model(
     return None
 
 
-@dataclass
-class OrderBy:
-    """Order by class."""
-
-    field: str
-    ascending: bool = Field(default=True)
-
-
-@dataclass
+@pyd_dataclass
 class GetFilteredRecordOptions:
-    """Options for filtering records."""
+    """Options for filtering records.
+
+    Args:
+        top: The maximum number of records to return.
+        skip: The number of records to skip.
+        order_by: A dictionary with fields names and a bool, True means ascending, False means descending.
+    """
 
     top: int = 10
     skip: int = 0
-    order_by: OptionalOneOrMany[OrderBy] = None
+    order_by: Mapping[str, bool] | None = None
 
 
 class LambdaVisitor(NodeVisitor, Generic[TFilters]):
@@ -513,7 +1130,7 @@ async def _add_embedding_to_object(
 
 
 @release_candidate
-class VectorStoreRecordCollection(VectorStoreRecordHandler[TKey, TModel], Generic[TKey, TModel]):
+class VectorStoreCollection(VectorStoreRecordHandler[TKey, TModel], Generic[TKey, TModel]):
     """Base class for a vector store record collection."""
 
     collection_name: str = ""
@@ -734,7 +1351,7 @@ async def get(
         self,
         top: int = ...,
         skip: int = ...,
-        order_by: OptionalOneOrMany[OrderBy | dict[str, Any] | list[dict[str, Any]]] = None,
+        order_by: OneOrMany[str] | dict[str, bool] | None = None,
         include_vectors: bool = False,
         **kwargs: Any,
     ) -> Sequence[TModel] | None:
@@ -749,10 +1366,11 @@ async def get(
                 Only used if keys are not provided.
             skip: The number of records to skip.
                 Only used if keys are not provided.
-            order_by: The order by clause, this is a list of dicts with the field name and ascending flag,
-                (default is True, which means ascending).
-                Only used if keys are not provided.
-                example: {"field": "hotel_id", "ascending": True}
+            order_by: The order by clause,
+                this can be a string, a list of strings or a dict,
+                when passing strings, they are assumed to be ascending.
+                Otherwise, use the value in the dict to set ascending (True) or descending (False).
+                example: {"field_name": True} or ["field_name", {"field_name2": False}].
             **kwargs: Additional arguments.
 
         Returns:
@@ -858,8 +1476,32 @@ async def get(
                 keys = key
         if not keys:
             if kwargs:
+                get_args = {}
+                kw_order_by: OneOrList[str] | dict[str, bool] | None = kwargs.pop("order_by", None)  # type: ignore
+                if "top" in kwargs:
+                    get_args["top"] = kwargs.pop("top", None)
+                if "skip" in kwargs:
+                    get_args["skip"] = kwargs.pop("skip", None)
+                order_by: dict[str, bool] | None = None
+                if kw_order_by is not None:
+                    order_by = {}
+                    if isinstance(kw_order_by, str):
+                        order_by[kw_order_by] = True
+                    elif isinstance(kw_order_by, dict):
+                        order_by = kw_order_by
+                    elif isinstance(kw_order_by, list):
+                        for item in kw_order_by:
+                            if isinstance(item, str):
+                                order_by[item] = True
+                            else:
+                                order_by.update(item)
+                    else:
+                        raise VectorStoreOperationException(
+                            f"Invalid order_by type: {type(order_by)}, expected str, dict or list."
+                        )
+                    get_args["order_by"] = order_by
                 try:
-                    options = GetFilteredRecordOptions(**kwargs)
+                    options = GetFilteredRecordOptions(**get_args)
                 except Exception as exc:
                     raise VectorStoreOperationException(f"Error creating options: {exc}") from exc
             else:
@@ -933,7 +1575,7 @@ def get_collection(
         collection_name: str | None = None,
         embedding_generator: EmbeddingGeneratorBase | None = None,
         **kwargs: Any,
-    ) -> "VectorStoreRecordCollection":
+    ) -> "VectorStoreCollection":
         """Get a vector store record collection instance tied to this store.
 
         Args:
@@ -1510,6 +2152,252 @@ async def search_wrapper(**kwargs: Any) -> Sequence[str]:
 
         return KernelFunctionFromMethod(
             method=search_wrapper,
-            parameters=TextSearch._default_parameter_metadata() if parameters is None else parameters,
-            return_parameter=return_parameter or TextSearch._default_return_parameter_metadata(),
+            parameters=DEFAULT_PARAMETER_METADATA if parameters is None else parameters,
+            return_parameter=return_parameter or DEFAULT_RETURN_PARAMETER_METADATA,
         )
+
+
+@runtime_checkable
+class VectorStoreCollectionProtocol(Protocol):  # noqa: D101
+    collection_name: str
+    record_type: object
+    definition: VectorStoreCollectionDefinition
+    supported_key_types: ClassVar[set[str]]
+    supported_vector_types: ClassVar[set[str]]
+    embedding_generator: EmbeddingGeneratorBase | None = None
+
+    async def ensure_collection_exists(self, **kwargs: Any) -> bool:
+        """Create the collection in the service if it does not exists.
+
+        First uses does_collection_exist to check if it exists, if it does returns False.
+        Otherwise, creates the collection and returns True.
+
+        Args:
+            **kwargs: Additional arguments.
+
+        Returns:
+            bool: True if the collection was created, False if it already exists.
+        """
+        ...
+
+    async def create_collection(self, **kwargs: Any) -> None:
+        """Create the collection in the service.
+
+        Args:
+            **kwargs: Additional arguments.
+
+        Raises:
+            Make sure the implementation of this function raises relevant exceptions with good descriptions.
+        """
+        ...
+
+    async def does_collection_exist(self, **kwargs: Any) -> bool:
+        """Check if the collection exists.
+
+        Args:
+            **kwargs: Additional arguments.
+
+        Returns:
+            bool: True if the collection exists, False otherwise.
+
+        Raises:
+            Make sure the implementation of this function raises relevant exceptions with good descriptions.
+        """
+        ...
+
+    async def ensure_collection_deleted(self, **kwargs: Any) -> None:
+        """Delete the collection.
+
+        Args:
+            **kwargs: Additional arguments.
+        """
+        ...
+
+    async def get(
+        self,
+        key: Any = None,
+        keys: Sequence[Any] | None = None,
+        include_vectors: bool = False,
+        top: int | None = None,
+        skip: int | None = None,
+        order_by: OneOrMany[str] | dict[str, bool] | None = None,
+        **kwargs: Any,
+    ) -> OptionalOneOrList[Any]:
+        """Get a batch of records whose keys exist in the collection, i.e. keys that do not exist are ignored.
+
+        Args:
+            key: The key to get.
+            keys: The keys to get, if keys are provided, key is ignored.
+            include_vectors: Include the vectors in the response. Default is False.
+                Some vector stores do not support retrieving without vectors, even when set to false.
+                Some vector stores have specific parameters to control that behavior, when
+                that parameter is set, include_vectors is ignored.
+            top: The number of records to return.
+                Only used if keys are not provided.
+            skip: The number of records to skip.
+                Only used if keys are not provided.
+            order_by: The order by clause,
+                this can be a string, a list of strings or a dict,
+                when passing strings, they are assumed to be ascending.
+                Otherwise, use the value in the dict to set ascending (True) or descending (False).
+                example: {"field_name": True} or ["field_name", {"field_name2": False}].
+            **kwargs: Additional arguments.
+
+        Returns:
+            The records, either a list of TModel or the container type.
+
+        Raises:
+            VectorStoreOperationException: If an error occurs during the get.
+            VectorStoreModelDeserializationException: If an error occurs during deserialization.
+        """
+        ...
+
+    async def upsert(
+        self,
+        records: OneOrMany[Any],
+        **kwargs: Any,
+    ) -> OneOrMany[Any]:
+        """Upsert one or more records.
+
+        If the key of the record already exists, the existing record will be updated.
+        If the key does not exist, a new record will be created.
+
+        Args:
+            records: The records to upsert, can be a single record, a list of records, or a single container.
+                If a single record is passed, a single key is returned, instead of a list of keys.
+            **kwargs: Additional arguments.
+
+        Returns:
+            OneOrMany[Any]: The keys of the upserted records.
+
+        Raises:
+            VectorStoreModelSerializationException: If an error occurs during serialization.
+            VectorStoreOperationException: If an error occurs during upserting.
+        """
+        ...
+
+    async def delete(self, keys: OneOrMany[Any], **kwargs: Any) -> None:
+        """Delete one or more records by key.
+
+        An exception will be raised at the end if any record does not exist.
+
+        Args:
+            keys: The key or keys to be deleted.
+            **kwargs: Additional arguments.
+
+        Raises:
+            VectorStoreOperationException: If an error occurs during deletion or a record does not exist.
+        """
+        ...
+
+
+@runtime_checkable
+class VectorSearchProtocol(VectorStoreCollectionProtocol, Protocol):
+    """Protocol to check that a collection supports vector search."""
+
+    supported_search_types: ClassVar[set[SearchType]]
+
+    async def search(
+        self,
+        values: Any = None,
+        *,
+        vector: Sequence[float | int] | None = None,
+        vector_property_name: str | None = None,
+        filter: OptionalOneOrList[Callable | str] = None,
+        top: int = 3,
+        skip: int = 0,
+        include_total_count: bool = False,
+        include_vectors: bool = False,
+        **kwargs: Any,
+    ) -> KernelSearchResults[VectorSearchResult]:
+        """Search the vector store for records that match the given value and filter.
+
+        Args:
+            values: The values to search for. These will be vectorized,
+                either by the store or using the provided generator.
+            vector: The vector to search for, if not provided, the values will be used to generate a vector.
+            vector_property_name: The name of the vector property to use for the search.
+            filter: The filter to apply to the search.
+            top: The number of results to return.
+            skip: The number of results to skip.
+            include_total_count: Whether to include the total count of results.
+            include_vectors: Whether to include the vectors in the results.
+            kwargs: If options are not set, this is used to create them.
+                they are passed on to the inner search method.
+
+        Returns:
+            The search results.
+
+        Raises:
+            VectorSearchExecutionException: If an error occurs during the search.
+            VectorStoreModelDeserializationException: If an error occurs during deserialization.
+            VectorSearchOptionsException: If the search options are invalid.
+            VectorStoreOperationNotSupportedException: If the search type is not supported.
+        """
+        ...
+
+    async def hybrid_search(
+        self,
+        values: Any,
+        *,
+        vector: list[float | int] | None = None,
+        vector_property_name: str | None = None,
+        additional_property_name: str | None = None,
+        filter: OptionalOneOrList[Callable | str] = None,
+        top: int = 3,
+        skip: int = 0,
+        include_total_count: bool = False,
+        include_vectors: bool = False,
+        **kwargs: Any,
+    ) -> KernelSearchResults[VectorSearchResult]:
+        """Search the vector store for records that match the given values and filter using hybrid search.
+
+        Args:
+            values: The values to search for.
+            vector: The vector to search for, if not provided, the values will be used to generate a vector.
+            vector_property_name: The name of the vector field to use for the search.
+            additional_property_name: The name of the additional property field to use for the search.
+            filter: The filter to apply to the search.
+            top: The number of results to return.
+            skip: The number of results to skip.
+            include_total_count: Whether to include the total count of results.
+            include_vectors: Whether to include the vectors in the results.
+            kwargs: If options are not set, this is used to create them.
+                they are passed on to the inner search method.
+
+        Returns:
+            The search results.
+
+        Raises:
+            VectorSearchExecutionException: If an error occurs during the search.
+            VectorStoreModelDeserializationException: If an error occurs during deserialization.
+            VectorSearchOptionsException: If the search options are invalid.
+            VectorStoreOperationNotSupportedException: If the search type is not supported.
+        """
+        ...
+
+
+__all__ = [
+    "DEFAULT_DESCRIPTION",
+    "DEFAULT_FUNCTION_NAME",
+    "DEFAULT_PARAMETER_METADATA",
+    "DEFAULT_RETURN_PARAMETER_METADATA",
+    "DISTANCE_FUNCTION_DIRECTION_HELPER",
+    "DistanceFunction",
+    "DynamicFilterFunction",
+    "FieldTypes",
+    "IndexKind",
+    "KernelSearchResults",
+    "SearchType",
+    "VectorSearch",
+    "VectorSearchProtocol",
+    "VectorSearchResult",
+    "VectorStore",
+    "VectorStoreCollection",
+    "VectorStoreCollectionDefinition",
+    "VectorStoreCollectionProtocol",
+    "VectorStoreField",
+    "create_options",
+    "default_dynamic_filter_function",
+    "vectorstoremodel",
+]
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 30e20e578c2a..dfe935102687 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -12,7 +12,7 @@
 from pytest import fixture
 
 from semantic_kernel.agents import Agent, DeclarativeSpecMixin, register_agent_type
-from semantic_kernel.data.definitions import VectorStoreCollectionDefinition, VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreCollectionDefinition, VectorStoreField, vectorstoremodel
 
 if TYPE_CHECKING:
     from semantic_kernel import Kernel
@@ -380,7 +380,7 @@ def record_type(index_kind: str, distance_function: str, vector_property_type: s
     class DataModelClass(BaseModel):
         content: Annotated[str, VectorStoreField("data")]
         vector: Annotated[
-            str | list[float] | None,
+            list[float] | str | None,
             VectorStoreField(
                 "vector",
                 type=vector_property_type,
diff --git a/python/tests/integration/memory/azure_cosmos_db/conftest.py b/python/tests/integration/memory/azure_cosmos_db/conftest.py
index 9276453fdcc1..4162604a3076 100644
--- a/python/tests/integration/memory/azure_cosmos_db/conftest.py
+++ b/python/tests/integration/memory/azure_cosmos_db/conftest.py
@@ -8,7 +8,7 @@
 from pydantic import BaseModel
 from pytest import fixture
 
-from semantic_kernel.data.definitions import VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreField, vectorstoremodel
 
 
 @fixture
diff --git a/python/tests/integration/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql.py b/python/tests/integration/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
index 7e63ed321d5c..8b136a06bcf2 100644
--- a/python/tests/integration/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
+++ b/python/tests/integration/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
@@ -9,7 +9,7 @@
 from azure.cosmos.aio import CosmosClient
 from azure.cosmos.partition_key import PartitionKey
 
-from semantic_kernel.connectors.memory.azure_cosmos_db import AzureCosmosDBNoSQLCompositeKey, CosmosNoSqlStore
+from semantic_kernel.connectors.memory.azure_cosmos_db import CosmosNoSqlCompositeKey, CosmosNoSqlStore
 from semantic_kernel.data.vectors import VectorStore
 from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorException
 from tests.integration.memory.vector_store_test_base import VectorStoreTestBase
@@ -90,9 +90,7 @@ async def test_custom_partition_key(
                 partition_key=PartitionKey(path="/product_type"),
             )
 
-            composite_key = AzureCosmosDBNoSQLCompositeKey(
-                key=data_record["id"], partition_key=data_record["product_type"]
-            )
+            composite_key = CosmosNoSqlCompositeKey(key=data_record["id"], partition_key=data_record["product_type"])
 
             # Upsert
             await collection.create_collection()
diff --git a/python/tests/integration/memory/postgres/test_postgres_int.py b/python/tests/integration/memory/postgres/test_postgres_int.py
index 97ef8971bcab..566f8b3ad937 100644
--- a/python/tests/integration/memory/postgres/test_postgres_int.py
+++ b/python/tests/integration/memory/postgres/test_postgres_int.py
@@ -11,10 +11,14 @@
 from pydantic import BaseModel
 
 from semantic_kernel.connectors.memory.postgres import PostgresCollection, PostgresSettings, PostgresStore
-from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import vectorstoremodel
-from semantic_kernel.data.vectors import VectorSearchOptions
+from semantic_kernel.data.vectors import (
+    DistanceFunction,
+    IndexKind,
+    VectorSearchOptions,
+    VectorStoreCollectionDefinition,
+    VectorStoreField,
+    vectorstoremodel,
+)
 from semantic_kernel.exceptions.memory_connector_exceptions import (
     MemoryConnectorConnectionException,
     MemoryConnectorInitializationError,
diff --git a/python/tests/integration/memory/test_vector_store.py b/python/tests/integration/memory/test_vector_store.py
index 84a6dc69d065..e4312f5ecbb9 100644
--- a/python/tests/integration/memory/test_vector_store.py
+++ b/python/tests/integration/memory/test_vector_store.py
@@ -9,7 +9,7 @@
 import pytest
 
 from semantic_kernel.connectors.memory.redis import RedisCollectionTypes
-from semantic_kernel.data import VectorStore
+from semantic_kernel.data.vectors import VectorStore
 from semantic_kernel.exceptions import MemoryConnectorConnectionException
 from tests.integration.memory.data_records import RAW_RECORD_ARRAY, RAW_RECORD_LIST
 from tests.integration.memory.vector_store_test_base import VectorStoreTestBase
diff --git a/python/tests/integration/memory/vector_store_test_base.py b/python/tests/integration/memory/vector_store_test_base.py
index 3b07392f9344..c383bc65d2c6 100644
--- a/python/tests/integration/memory/vector_store_test_base.py
+++ b/python/tests/integration/memory/vector_store_test_base.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from semantic_kernel.data import VectorStore
+from semantic_kernel.data.vectors import VectorStore
 
 
 def get_redis_store():
diff --git a/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py b/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py
index 04d8b412d979..403c95d54fdc 100644
--- a/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py
+++ b/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py
@@ -6,7 +6,7 @@
 from pymongo import AsyncMongoClient
 
 from semantic_kernel.connectors.memory.azure_cosmos_db import CosmosMongoCollection
-from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField
+from semantic_kernel.data.vectors import VectorStoreCollectionDefinition, VectorStoreField
 from semantic_kernel.exceptions import VectorStoreInitializationException
 
 
diff --git a/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql_collection.py b/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql_collection.py
index 6242e212eca0..aff9f1477cb9 100644
--- a/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql_collection.py
+++ b/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_no_sql_collection.py
@@ -13,8 +13,11 @@
     _create_default_indexing_policy_nosql,
     _create_default_vector_embedding_policy,
 )
-from semantic_kernel.exceptions import VectorStoreInitializationException
-from semantic_kernel.exceptions.vector_store_exceptions import VectorStoreModelException, VectorStoreOperationException
+from semantic_kernel.exceptions import (
+    VectorStoreInitializationException,
+    VectorStoreModelException,
+    VectorStoreOperationException,
+)
 
 
 def test_azure_cosmos_db_no_sql_collection_init(
@@ -300,7 +303,6 @@ async def test_azure_cosmos_db_no_sql_collection_create_collection_allow_custom_
     [
         ("hnsw", "cosine_similarity", "float"),  # unsupported index kind
         ("flat", "hamming", "float"),  # unsupported distance function
-        ("flat", "cosine_similarity", "double"),  # unsupported property type
     ],
 )
 async def test_azure_cosmos_db_no_sql_collection_create_collection_unsupported_vector_field_property(
diff --git a/python/tests/unit/connectors/memory/test_azure_ai_search.py b/python/tests/unit/connectors/memory/test_azure_ai_search.py
index 31cb21af436d..a8dbe2bb7e9f 100644
--- a/python/tests/unit/connectors/memory/test_azure_ai_search.py
+++ b/python/tests/unit/connectors/memory/test_azure_ai_search.py
@@ -201,11 +201,13 @@ async def test_get(collection, mock_get):
 @mark.parametrize(
     "order_by, ordering",
     [
-        param({"field": "id"}, ["id"], id="single id"),
-        param({"field": "id", "ascending": True}, ["id"], id="ascending id"),
-        param({"field": "id", "ascending": False}, ["id desc"], id="descending id"),
-        param([{"field": "id", "ascending": True}], ["id"], id="ascending id list"),
-        param([{"field": "id"}, {"field": "content"}], ["id", "content"], id="multiple"),
+        param("id", ["id"], id="single id"),
+        param({"id": True}, ["id"], id="ascending id"),
+        param({"id": False}, ["id desc"], id="descending id"),
+        param(["id"], ["id"], id="ascending id list"),
+        param(["id", "content"], ["id", "content"], id="multiple"),
+        param([{"id": True}, {"content": False}], ["id", "content desc"], id="multiple desc"),
+        param(["id", {"content": False}], ["id", "content desc"], id="multiple mix"),
     ],
 )
 async def test_get_without_key(collection, mock_get, mock_search, order_by, ordering):
diff --git a/python/tests/unit/connectors/memory/test_faiss.py b/python/tests/unit/connectors/memory/test_faiss.py
index 2db5919e6cd6..af1147279dd4 100644
--- a/python/tests/unit/connectors/memory/test_faiss.py
+++ b/python/tests/unit/connectors/memory/test_faiss.py
@@ -4,7 +4,7 @@
 from pytest import fixture, mark, raises
 
 from semantic_kernel.connectors.memory.faiss import FaissCollection, FaissStore
-from semantic_kernel.data import DistanceFunction, VectorStoreCollectionDefinition, VectorStoreField
+from semantic_kernel.data.vectors import DistanceFunction, VectorStoreCollectionDefinition, VectorStoreField
 from semantic_kernel.exceptions import VectorStoreInitializationException
 
 
diff --git a/python/tests/unit/connectors/memory/test_in_memory.py b/python/tests/unit/connectors/memory/test_in_memory.py
index e49ada897269..4706a08bb3f5 100644
--- a/python/tests/unit/connectors/memory/test_in_memory.py
+++ b/python/tests/unit/connectors/memory/test_in_memory.py
@@ -3,7 +3,7 @@
 from pytest import fixture, mark, raises
 
 from semantic_kernel.connectors.memory.in_memory import InMemoryCollection, InMemoryStore
-from semantic_kernel.data.const import DistanceFunction
+from semantic_kernel.data.vectors import DistanceFunction
 from semantic_kernel.exceptions.vector_store_exceptions import VectorStoreOperationException
 
 
diff --git a/python/tests/unit/connectors/memory/test_postgres_store.py b/python/tests/unit/connectors/memory/test_postgres_store.py
index d7e129c74b0f..889465d67768 100644
--- a/python/tests/unit/connectors/memory/test_postgres_store.py
+++ b/python/tests/unit/connectors/memory/test_postgres_store.py
@@ -17,8 +17,7 @@
     PostgresSettings,
     PostgresStore,
 )
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreField, vectorstoremodel
+from semantic_kernel.data.vectors import DistanceFunction, IndexKind, VectorStoreField, vectorstoremodel
 
 
 @fixture(scope="function")
diff --git a/python/tests/unit/connectors/memory/test_qdrant.py b/python/tests/unit/connectors/memory/test_qdrant.py
index db278c8756fb..d0b71ea3a27f 100644
--- a/python/tests/unit/connectors/memory/test_qdrant.py
+++ b/python/tests/unit/connectors/memory/test_qdrant.py
@@ -7,8 +7,7 @@
 from qdrant_client.models import Datatype, Distance, FieldCondition, MatchValue, VectorParams
 
 from semantic_kernel.connectors.memory.qdrant import QdrantCollection, QdrantStore
-from semantic_kernel.data.const import DistanceFunction
-from semantic_kernel.data.definitions import VectorStoreField
+from semantic_kernel.data.vectors import DistanceFunction, VectorStoreField
 from semantic_kernel.exceptions import (
     VectorSearchExecutionException,
     VectorStoreInitializationException,
diff --git a/python/tests/unit/connectors/memory/test_sql_server.py b/python/tests/unit/connectors/memory/test_sql_server.py
index b287064c4e54..674b957b29fb 100644
--- a/python/tests/unit/connectors/memory/test_sql_server.py
+++ b/python/tests/unit/connectors/memory/test_sql_server.py
@@ -21,9 +21,7 @@
     _build_select_query,
     _build_select_table_names_query,
 )
-from semantic_kernel.data.const import DistanceFunction, IndexKind
-from semantic_kernel.data.definitions import VectorStoreField
-from semantic_kernel.data.vectors import VectorSearchOptions
+from semantic_kernel.data.vectors import DistanceFunction, IndexKind, VectorSearchOptions, VectorStoreField
 from semantic_kernel.exceptions.vector_store_exceptions import (
     VectorStoreInitializationException,
     VectorStoreOperationException,
diff --git a/python/tests/unit/connectors/search/test_brave_search.py b/python/tests/unit/connectors/search/test_brave_search.py
index 5f53d7dc74ce..59a6b4db3da3 100644
--- a/python/tests/unit/connectors/search/test_brave_search.py
+++ b/python/tests/unit/connectors/search/test_brave_search.py
@@ -6,7 +6,8 @@
 import pytest
 
 from semantic_kernel.connectors.search.brave import BraveSearch, BraveSearchResponse, BraveWebPage, BraveWebPages
-from semantic_kernel.data.search import KernelSearchResults, SearchOptions, TextSearchResult
+from semantic_kernel.data._search import KernelSearchResults, SearchOptions
+from semantic_kernel.data.text_search import TextSearchResult
 from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError
 
 
diff --git a/python/tests/unit/connectors/search/test_google_search.py b/python/tests/unit/connectors/search/test_google_search.py
index a1de242f12f8..d61d61bd76e9 100644
--- a/python/tests/unit/connectors/search/test_google_search.py
+++ b/python/tests/unit/connectors/search/test_google_search.py
@@ -11,7 +11,7 @@
     GoogleSearchResponse,
     GoogleSearchResult,
 )
-from semantic_kernel.data.search import TextSearchResult
+from semantic_kernel.data.text_search import TextSearchResult
 from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidRequestError
 
 
diff --git a/python/tests/unit/data/conftest.py b/python/tests/unit/data/conftest.py
index 3533483f4610..5d9dd478ddd1 100644
--- a/python/tests/unit/data/conftest.py
+++ b/python/tests/unit/data/conftest.py
@@ -10,23 +10,26 @@
 from pydantic import BaseModel, Field
 from pytest import fixture
 
-from semantic_kernel.data import (
+from semantic_kernel.data.vectors import (
     KernelSearchResults,
+    SearchType,
+    VectorSearch,
+    VectorSearchResult,
+    VectorStoreCollection,
     VectorStoreCollectionDefinition,
-    VectorStoreRecordCollection,
+    VectorStoreField,
     vectorstoremodel,
 )
-from semantic_kernel.data.definitions import VectorStoreField
-from semantic_kernel.data.vectors import VectorSearch, VectorSearchResult
 from semantic_kernel.kernel_types import OptionalOneOrMany
 
 
 @fixture
 def DictVectorStoreRecordCollection() -> type[VectorSearch]:
     class DictVectorStoreRecordCollection(
-        VectorStoreRecordCollection[str, Any],
+        VectorStoreCollection[str, Any],
         VectorSearch[str, Any],
     ):
+        supported_search_types = {SearchType.VECTOR}
         inner_storage: dict[str, Any] = Field(default_factory=dict)
 
         async def _inner_delete(self, keys: Sequence[str], **kwargs: Any) -> None:
diff --git a/python/tests/unit/data/test_filter.py b/python/tests/unit/data/test_filter.py
deleted file mode 100644
index 825390d304d6..000000000000
--- a/python/tests/unit/data/test_filter.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from semantic_kernel.data.vectors import VectorSearchOptions
-
-
-def test_lambda_filter():
-    options = VectorSearchOptions(filter=lambda x: x.tag == "value")
-    assert options.filter is not None
-
-
-def test_lambda_filter_str():
-    options = VectorSearchOptions(filter='lambda x: x.tag == "value"')
-    assert options.filter is not None
diff --git a/python/tests/unit/data/test_text_search.py b/python/tests/unit/data/test_text_search.py
index 9900a67e88de..fb8cf0acc738 100644
--- a/python/tests/unit/data/test_text_search.py
+++ b/python/tests/unit/data/test_text_search.py
@@ -8,9 +8,15 @@
 from pydantic import BaseModel
 
 from semantic_kernel import Kernel
-from semantic_kernel.data import TextSearch
-from semantic_kernel.data.const import DEFAULT_DESCRIPTION, DEFAULT_FUNCTION_NAME
-from semantic_kernel.data.search import KernelSearchResults, SearchOptions, TextSearchResult, create_options
+from semantic_kernel.data.text_search import (
+    DEFAULT_DESCRIPTION,
+    DEFAULT_FUNCTION_NAME,
+    KernelSearchResults,
+    SearchOptions,
+    TextSearch,
+    TextSearchResult,
+    create_options,
+)
 from semantic_kernel.data.vectors import VectorSearchOptions
 from semantic_kernel.exceptions import TextSearchException
 from semantic_kernel.functions import KernelArguments, KernelParameterMetadata
diff --git a/python/tests/unit/data/test_vector_search_base.py b/python/tests/unit/data/test_vector_search_base.py
index 66256bd77460..a4c3d5de817d 100644
--- a/python/tests/unit/data/test_vector_search_base.py
+++ b/python/tests/unit/data/test_vector_search_base.py
@@ -3,13 +3,14 @@
 
 import pytest
 
-from semantic_kernel.data.vectors import VectorSearch, VectorSearchOptions
+from semantic_kernel.data.vectors import VectorSearch, VectorSearchOptions, VectorSearchProtocol
 
 
 async def test_search(vector_store_record_collection: VectorSearch):
+    assert isinstance(vector_store_record_collection, VectorSearchProtocol)
     record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]}
     await vector_store_record_collection.upsert(record)
-    results = await vector_store_record_collection._inner_search(options=VectorSearchOptions(), keywords="test_content")
+    results = await vector_store_record_collection.search(vector=[1.0, 2.0, 3.0])
     records = [rec async for rec in results.results]
     assert records[0].record == record
 
diff --git a/python/tests/unit/data/test_vector_store_model_decorator.py b/python/tests/unit/data/test_vector_store_model_decorator.py
index 3e6cbdccf3e7..23ce3ceab5f2 100644
--- a/python/tests/unit/data/test_vector_store_model_decorator.py
+++ b/python/tests/unit/data/test_vector_store_model_decorator.py
@@ -9,8 +9,7 @@
 from pydantic.dataclasses import dataclass as pydantic_dataclass
 from pytest import raises
 
-from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField
-from semantic_kernel.data.definitions import vectorstoremodel
+from semantic_kernel.data.vectors import VectorStoreCollectionDefinition, VectorStoreField, vectorstoremodel
 from semantic_kernel.exceptions import VectorStoreModelException
 
 
diff --git a/python/tests/unit/data/test_vector_store_record_collection.py b/python/tests/unit/data/test_vector_store_record_collection.py
index 12bc3868faeb..ff6f1dcffe0a 100644
--- a/python/tests/unit/data/test_vector_store_record_collection.py
+++ b/python/tests/unit/data/test_vector_store_record_collection.py
@@ -6,7 +6,7 @@
 from pandas import DataFrame
 from pytest import mark, raises
 
-from semantic_kernel.data.definitions import SerializeMethodProtocol, ToDictMethodProtocol
+from semantic_kernel.data.vectors import SerializeMethodProtocol, ToDictMethodProtocol
 from semantic_kernel.exceptions import (
     VectorStoreModelDeserializationException,
     VectorStoreModelSerializationException,
@@ -276,7 +276,7 @@ async def test_get_fail_multiple(DictVectorStoreRecordCollection, definition):
     await vector_store_record_collection.upsert(record)
     assert len(vector_store_record_collection.inner_storage) == 1
     with (
-        patch("semantic_kernel.data.vectors.VectorStoreRecordCollection.deserialize") as deserialize_mock,
+        patch("semantic_kernel.data.vectors.VectorStoreCollection.deserialize") as deserialize_mock,
         raises(
             VectorStoreModelDeserializationException, match="Error deserializing record, multiple records returned:"
         ),
diff --git a/python/tests/unit/data/test_vector_store_record_definition.py b/python/tests/unit/data/test_vector_store_record_definition.py
index 7f88bcfffa4c..4b137f53fa0f 100644
--- a/python/tests/unit/data/test_vector_store_record_definition.py
+++ b/python/tests/unit/data/test_vector_store_record_definition.py
@@ -2,7 +2,7 @@
 
 from pytest import raises
 
-from semantic_kernel.data import VectorStoreCollectionDefinition, VectorStoreField
+from semantic_kernel.data.vectors import VectorStoreCollectionDefinition, VectorStoreField
 from semantic_kernel.exceptions import VectorStoreModelException