From ca2b6ab62dc98a4253ffcb14366e24b4f66a195f Mon Sep 17 00:00:00 2001 From: Harutaka Kawamura Date: Tue, 8 Apr 2025 10:07:38 +0900 Subject: [PATCH 1/3] Remove regex validation for `index_name` (#95) Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .../langchain/src/databricks_langchain/vectorstores.py | 7 ++----- .../langchain/tests/unit_tests/test_vectorstores.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/integrations/langchain/src/databricks_langchain/vectorstores.py b/integrations/langchain/src/databricks_langchain/vectorstores.py index 48c01553..ae24f7b4 100644 --- a/integrations/langchain/src/databricks_langchain/vectorstores.py +++ b/integrations/langchain/src/databricks_langchain/vectorstores.py @@ -2,7 +2,6 @@ import asyncio import logging -import re import uuid from functools import partial from typing import ( @@ -35,7 +34,6 @@ _DIRECT_ACCESS_ONLY_MSG = "`%s` is only supported for direct-access index." _NON_MANAGED_EMB_ONLY_MSG = "`%s` is not supported for index with Databricks-managed embeddings." -_INDEX_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9_]+\.[a-zA-Z0-9_]+\.[a-zA-Z0-9_]+$") class DatabricksVectorSearch(VectorStore): @@ -227,10 +225,9 @@ def __init__( workspace_client: Optional[WorkspaceClient] = None, client_args: Optional[Dict[str, Any]] = None, ): - if not (isinstance(index_name, str) and _INDEX_NAME_PATTERN.match(index_name)): + if not isinstance(index_name, str): raise ValueError( - "The `index_name` parameter must be a string in the format " - f"'catalog.schema.index'. Received: {index_name}" + f"The `index_name` parameter must be a string, but got {type(index_name).__name__}." ) try: diff --git a/integrations/langchain/tests/unit_tests/test_vectorstores.py b/integrations/langchain/tests/unit_tests/test_vectorstores.py index 7aab6b45..dd92be1f 100644 --- a/integrations/langchain/tests/unit_tests/test_vectorstores.py +++ b/integrations/langchain/tests/unit_tests/test_vectorstores.py @@ -52,7 +52,7 @@ def test_init_with_endpoint_name() -> None: assert vectorsearch.index.describe() == INDEX_DETAILS[DELTA_SYNC_INDEX] -@pytest.mark.parametrize("index_name", [None, "invalid", 123, MagicMock(spec=VectorSearchIndex)]) +@pytest.mark.parametrize("index_name", [None, 123, MagicMock(spec=VectorSearchIndex)]) def test_init_fail_invalid_index_name(index_name) -> None: with pytest.raises(ValueError, match="The `index_name` parameter must be"): DatabricksVectorSearch(index_name=index_name) From 02fa39ec11d172455deee84828d893152b1ba79e Mon Sep 17 00:00:00 2001 From: Harutaka Kawamura Date: Tue, 8 Apr 2025 11:15:15 +0900 Subject: [PATCH 2/3] Validate that index name is a UC index (#96) Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .../langchain/src/databricks_langchain/vectorstores.py | 5 +++++ integrations/langchain/tests/unit_tests/test_vectorstores.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/integrations/langchain/src/databricks_langchain/vectorstores.py b/integrations/langchain/src/databricks_langchain/vectorstores.py index ae24f7b4..1ec73856 100644 --- a/integrations/langchain/src/databricks_langchain/vectorstores.py +++ b/integrations/langchain/src/databricks_langchain/vectorstores.py @@ -230,6 +230,11 @@ def __init__( f"The `index_name` parameter must be a string, but got {type(index_name).__name__}." ) + if index_name.count(".") != 2: + raise ValueError( + f"The `index_name` parameter must be in the format 'catalog.schema.name', but got {index_name!r}." + ) + try: from databricks.vector_search.client import ( # type: ignore[import] VectorSearchClient, diff --git a/integrations/langchain/tests/unit_tests/test_vectorstores.py b/integrations/langchain/tests/unit_tests/test_vectorstores.py index dd92be1f..b352b548 100644 --- a/integrations/langchain/tests/unit_tests/test_vectorstores.py +++ b/integrations/langchain/tests/unit_tests/test_vectorstores.py @@ -52,7 +52,9 @@ def test_init_with_endpoint_name() -> None: assert vectorsearch.index.describe() == INDEX_DETAILS[DELTA_SYNC_INDEX] -@pytest.mark.parametrize("index_name", [None, 123, MagicMock(spec=VectorSearchIndex)]) +@pytest.mark.parametrize( + "index_name", [None, 123, "invalid.name", MagicMock(spec=VectorSearchIndex)] +) def test_init_fail_invalid_index_name(index_name) -> None: with pytest.raises(ValueError, match="The `index_name` parameter must be"): DatabricksVectorSearch(index_name=index_name) From 9057edc765342fd1579474bc4f71717655655850 Mon Sep 17 00:00:00 2001 From: Sunish Sheth Date: Tue, 8 Apr 2025 17:40:08 -0700 Subject: [PATCH 3/3] Updating version and release notes for databricks-langchain 0.4.2 version release --- CHANGELOG.md | 9 +++++++-- integrations/langchain/pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ced6ec6d..75c95464 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGELOG +## databricks-langchain 0.4.2, (2025-04-08) + +### Bugfix +- Improve validation for `index_name` for vectorIndex + ## databrick-ai-bridge 0.4.1, databricks-langchain 0.4.1, databricks-openai 0.3.1 (2025-03-27) ### Improvements @@ -43,6 +48,6 @@ Initial version of databricks-ai-bridge and databricks-langchain packages Features: - Support for Databricks AI/BI Genie via the `databricks_langchain.GenieAgent` API in `databricks-langchain` -- Support for most functionality in the existing `langchain-databricks` under `databricks-langchain`. Specifically, this +- Support for most functionality in the existing `langchain-databricks` under `databricks-langchain`. Specifically, this release introduces `databricks_langchain.ChatDatabricks`, `databricks_langchain.DatabricksEmbeddings`, and - `databricks_langchain.DatabricksVectorSearch` APIs. + `databricks_langchain.DatabricksVectorSearch` APIs. diff --git a/integrations/langchain/pyproject.toml b/integrations/langchain/pyproject.toml index 92d1beec..d17b6793 100644 --- a/integrations/langchain/pyproject.toml +++ b/integrations/langchain/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "databricks-langchain" -version = "0.4.1.dev0" +version = "0.4.2" description = "Support for Databricks AI support in LangChain" authors = [ { name="Databricks", email="agent-feedback@databricks.com" },