Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
dd726b5
feat: integrate Switch transpiler with Lakebridge installer
hiroyukinakazato-db Sep 30, 2025
febb62d
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Sep 30, 2025
fa26b4c
fix: remove undefined URLError from exception handling
hiroyukinakazato-db Sep 30, 2025
6511e20
refactor: streamline SwitchInstaller deployment logic and update tests
hiroyukinakazato-db Oct 3, 2025
33ea7de
refactor: simplify SwitchInstaller test structure and improve assertions
hiroyukinakazato-db Oct 3, 2025
d0c63c3
Merge remote-tracking branch 'origin/main' into feature/switch-instal…
hiroyukinakazato-db Oct 3, 2025
7cb9ea9
feat: add Switch transpiler installer for Lakebridge integration
hiroyukinakazato-db Oct 7, 2025
467dea9
fix: support case-insensitive config lookup in SwitchInstaller
hiroyukinakazato-db Oct 8, 2025
57298b0
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 8, 2025
09c0eb8
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
8439314
refactor: separate Switch installation from workspace deployment
hiroyukinakazato-db Oct 9, 2025
5f66f3f
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
9dc4b04
refactor: encapsulate Switch package path resolution in SwitchDeployment
hiroyukinakazato-db Oct 9, 2025
7637234
test: update Switch installation tests for refactored interface
hiroyukinakazato-db Oct 9, 2025
729cb0d
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 9, 2025
42ce0df
fix: exclude wait_for_completion from Switch job parameters
hiroyukinakazato-db Oct 10, 2025
f698470
Merge branch 'main' into feature/switch-installer-integration
hiroyukinakazato-db Oct 14, 2025
22cadc9
Defaults in `labs.yml` are strings.
asnare Oct 14, 2025
b3d2441
Update flag description to use placeholder syntax.
asnare Oct 14, 2025
ac7e2a4
Disable flag pending completion of integration.
asnare Oct 14, 2025
f0426e1
Leave pylint's max-args as-is.
asnare Oct 15, 2025
934c2e8
Remove unnecessary include_llm arguments.
asnare Oct 15, 2025
74923cc
Refactor Switch installation.
asnare Oct 16, 2025
084f90f
upgrade to latest switch plugin
sundarshankar89 Oct 21, 2025
61f796f
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 21, 2025
0c1d1d5
fixed package dependencies
sundarshankar89 Oct 21, 2025
6aeea25
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 22, 2025
468f8de
added additional configuration for making switch
sundarshankar89 Oct 22, 2025
6a57570
Latest Switch
sundarshankar89 Oct 22, 2025
2c3d153
Sorted List for FMAPI
sundarshankar89 Oct 22, 2025
f41dee8
setting logging level
sundarshankar89 Oct 22, 2025
950c1b8
setting logging level
sundarshankar89 Oct 22, 2025
6ca78ed
setting logging level
sundarshankar89 Oct 22, 2025
ba65df4
setting logging level
sundarshankar89 Oct 22, 2025
2e2abcb
setting logging level
sundarshankar89 Oct 22, 2025
bcbe4df
make default as first choice
sundarshankar89 Oct 22, 2025
486250f
fix tests
sundarshankar89 Oct 22, 2025
fc1ddca
fix tests
sundarshankar89 Oct 22, 2025
42c9c4e
fixes few bugs
sundarshankar89 Oct 23, 2025
1831076
update databricks-switch-plugin dependency to version 0.1.4
hiroyukinakazato-db Oct 26, 2025
01a0c87
Review Comments
sundarshankar89 Oct 27, 2025
ccce0f2
Review Comments
sundarshankar89 Oct 27, 2025
eae5997
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 28, 2025
34c9f8f
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 28, 2025
23df37b
added tests for configurator
sundarshankar89 Oct 29, 2025
4a0bf49
added tests for installer
sundarshankar89 Oct 29, 2025
c49c5b3
added tests for installer
sundarshankar89 Oct 29, 2025
078a0bc
added tests for switch
sundarshankar89 Oct 29, 2025
69f93b2
Merge branch 'main' into feature/switch-installer-integration
sundarshankar89 Oct 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ commands:
- name: interactive
description: (Optional) Whether installing in interactive mode (`true|false|auto`); configuration settings are prompted for when interactive
default: auto
- name: include-llm-transpiler
description: (Optional) Whether to include LLM-based transpiler in installation (`true|false`)
default: "false"

- name: describe-transpile
description: Describe installed transpilers
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ dependencies = [
"SQLAlchemy~=2.0.40",
"pygls~=2.0.0a2",
"duckdb~=1.2.2",
"databricks-switch-plugin~=0.1.4", # Temporary, until Switch is migrated to be a transpiler (LSP) plugin.
"requests>=2.28.1,<3" # Matches databricks-sdk (and 'types-requests' below), to avoid conflicts.

]

[project.urls]
Expand Down
7 changes: 6 additions & 1 deletion src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ def install_transpile(
w: WorkspaceClient,
artifact: str | None = None,
interactive: str | None = None,
include_llm_transpiler: bool = False,
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
) -> None:
"""Install or upgrade the Lakebridge transpilers."""
Expand All @@ -738,9 +739,13 @@ def install_transpile(
ctx.add_user_agent_extra("cmd", "install-transpile")
if artifact:
ctx.add_user_agent_extra("artifact-overload", Path(artifact).name)
if include_llm_transpiler:
ctx.add_user_agent_extra("include-llm-transpiler", "true")
user = w.current_user
logger.debug(f"User: {user}")
transpile_installer = installer(w, transpiler_repository, is_interactive=is_interactive)
transpile_installer = installer(
w, transpiler_repository, is_interactive=is_interactive, include_llm=include_llm_transpiler
)
transpile_installer.run(module="transpile", artifact=artifact)


Expand Down
6 changes: 4 additions & 2 deletions src/databricks/labs/lakebridge/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,5 +274,7 @@ class ReconcileConfig:

@dataclass
class LakebridgeConfiguration:
transpile: TranspileConfig | None = None
reconcile: ReconcileConfig | None = None
transpile: TranspileConfig | None
reconcile: ReconcileConfig | None
# Temporary flag, indicating whether to include the LLM-based Switch transpiler.
include_switch: bool = False
12 changes: 12 additions & 0 deletions src/databricks/labs/lakebridge/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
from databricks.labs.lakebridge.deployment.recon import TableDeployment, JobDeployment, ReconDeployment
from databricks.labs.lakebridge.deployment.switch import SwitchDeployment
from databricks.labs.lakebridge.helpers.metastore import CatalogOperations

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -119,13 +120,24 @@ def recon_deployment(self) -> ReconDeployment:
self.dashboard_deployment,
)

@cached_property
def switch_deployment(self) -> SwitchDeployment:
return SwitchDeployment(
self.workspace_client,
self.installation,
self.install_state,
self.product_info,
self.job_deployment,
)

@cached_property
def workspace_installation(self) -> WorkspaceInstallation:
return WorkspaceInstallation(
self.workspace_client,
self.prompts,
self.installation,
self.recon_deployment,
self.switch_deployment,
self.product_info,
self.upgrades,
)
Expand Down
36 changes: 35 additions & 1 deletion src/databricks/labs/lakebridge/deployment/configurator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import logging
import time

from collections.abc import Iterator

from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import DatabricksError
from databricks.sdk.service.catalog import Privilege, SecurableType
from databricks.sdk.service.sql import (
CreateWarehouseRequestWarehouseType,
EndpointInfoWarehouseType,
SpotInstancePolicy,
)
from databricks.sdk.service.serving import ServingEndpoint

from databricks.labs.lakebridge.helpers.metastore import CatalogOperations

Expand All @@ -29,8 +33,9 @@ def __init__(self, ws: WorkspaceClient, prompts: Prompts, catalog_ops: CatalogOp

def prompt_for_catalog_setup(
self,
default_catalog_name: str = "remorph",
) -> str:
catalog_name = self._prompts.question("Enter catalog name", default="remorph")
catalog_name = self._prompts.question("Enter catalog name", default=default_catalog_name)
catalog = self._catalog_ops.get_catalog(catalog_name)
if catalog:
logger.info(f"Found existing catalog `{catalog_name}`")
Expand Down Expand Up @@ -103,6 +108,35 @@ def warehouse_type(_):
raise SystemExit("Cannot continue installation, without a valid warehouse. Aborting the installation.")
return warehouse_id

def prompt_for_foundation_model_choice(self, default_choice: str = "databricks-claude-sonnet-4-5") -> str:
"""
List Serving Endpoints that expose a foundation model and prompt the user to pick one.
Returns the selected endpoint name
"""
endpoints: Iterator[ServingEndpoint] = self._ws.serving_endpoints.list()

model_endpoints = [
ep
for ep in endpoints
if ep.name
and ep.config
and ep.config.served_entities
and any(getattr(se, "foundation_model", None) is not None for se in ep.config.served_entities)
]

foundational_model_names = [ep.name for ep in model_endpoints if ep.name]

if foundational_model_names is None:
raise DatabricksError("No Foundation Model serving endpoints found. Aborting the installation.")
# This logic is implemented to make the default choice always to appear first in the list
other_models = sorted(set(foundational_model_names) - {default_choice})
choices = [f"[DEFAULT] {default_choice}", *other_models]
selected = self._prompts.choice("Select a Foundation Model serving endpoint:", choices, sort=True)

if selected.startswith("[DEFAULT]"):
selected = default_choice
return selected

def has_necessary_catalog_access(
self, catalog_name: str, user_name: str, privilege_sets: tuple[set[Privilege], ...]
):
Expand Down
11 changes: 11 additions & 0 deletions src/databricks/labs/lakebridge/deployment/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from databricks.labs.lakebridge.config import LakebridgeConfiguration
from databricks.labs.lakebridge.deployment.recon import ReconDeployment
from databricks.labs.lakebridge.deployment.switch import SwitchDeployment

logger = logging.getLogger("databricks.labs.lakebridge.install")

Expand All @@ -24,13 +25,15 @@ def __init__(
prompts: Prompts,
installation: Installation,
recon_deployment: ReconDeployment,
switch_deployment: SwitchDeployment,
product_info: ProductInfo,
upgrades: Upgrades,
):
self._ws = ws
self._prompts = prompts
self._installation = installation
self._recon_deployment = recon_deployment
self._switch_deployment = switch_deployment
self._product_info = product_info
self._upgrades = upgrades

Expand Down Expand Up @@ -96,6 +99,9 @@ def install(self, config: LakebridgeConfiguration):
if config.reconcile:
logger.info("Installing Lakebridge reconcile Metadata components.")
self._recon_deployment.install(config.reconcile, wheel_path)
if config.include_switch:
logger.info("Installing Switch transpiler to workspace.")
self._switch_deployment.install()

def uninstall(self, config: LakebridgeConfiguration):
# This will remove all the Lakebridge modules
Expand All @@ -116,9 +122,14 @@ def uninstall(self, config: LakebridgeConfiguration):
f"Won't remove transpile validation schema `{config.transpile.schema_name}` "
f"from catalog `{config.transpile.catalog_name}`. Please remove it manually."
)
self._uninstall_switch_job()

if config.reconcile:
self._recon_deployment.uninstall(config.reconcile)

self._installation.remove()
logger.info("Uninstallation completed successfully.")

def _uninstall_switch_job(self) -> None:
"""Remove Switch transpiler job if exists."""
self._switch_deployment.uninstall()
182 changes: 182 additions & 0 deletions src/databricks/labs/lakebridge/deployment/switch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import importlib.resources
import logging
from collections.abc import Generator, Sequence
from importlib.abc import Traversable
from pathlib import PurePosixPath
from typing import Any

from databricks.labs import switch
from databricks.labs.switch.__about__ import __version__ as switch_version
from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.blueprint.wheels import ProductInfo
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import InvalidParameterValue, NotFound
from databricks.sdk.service.jobs import JobParameterDefinition, JobSettings, NotebookTask, Source, Task

from databricks.labs.lakebridge.deployment.job import JobDeployment

logger = logging.getLogger(__name__)


class SwitchDeployment:
_INSTALL_STATE_KEY = "Switch"
_TRANSPILER_ID = "switch"

def __init__(
self,
ws: WorkspaceClient,
installation: Installation,
install_state: InstallState,
product_info: ProductInfo,
job_deployer: JobDeployment,
):
self._ws = ws
self._installation = installation
self._install_state = install_state
self._product_info = product_info
self._job_deployer = job_deployer

def install(self) -> None:
"""Deploy Switch to workspace and configure resources."""
logger.debug("Deploying Switch resources to workspace...")
self._deploy_resources_to_workspace()
self._setup_job()
logger.debug("Switch deployment completed")

def uninstall(self) -> None:
"""Remove Switch job from workspace."""
if self._INSTALL_STATE_KEY not in self._install_state.jobs:
logger.debug("No Switch job found in InstallState")
return

job_id = int(self._install_state.jobs[self._INSTALL_STATE_KEY])
try:
logger.info(f"Removing Switch job with job_id={job_id}")
del self._install_state.jobs[self._INSTALL_STATE_KEY]
self._ws.jobs.delete(job_id)
self._install_state.save()
except (InvalidParameterValue, NotFound):
logger.warning(f"Switch job {job_id} doesn't exist anymore")
self._install_state.save()

def _get_switch_workspace_path(self) -> WorkspacePath:
installation_root = self._installation.install_folder()
return WorkspacePath(self._ws, installation_root) / "switch"

def _deploy_resources_to_workspace(self) -> None:
"""Replicate the Switch package sources to the workspace."""
# TODO: This is temporary, instead the jobs should directly run the code from the deployed wheel/package.
resource_root = self._get_switch_workspace_path()
# Replace existing resources, to avoid stale files and potential confusion.
if resource_root.exists():
resource_root.rmdir(recursive=True)
resource_root.mkdir(parents=True)
already_created = {resource_root}
logger.info(f"Copying resources to {resource_root} in workspace.......")
for resource_path, resource in self._enumerate_package_files(switch):
# Resource path has a leading 'switch' that we want to strip off.
nested_path = resource_path.relative_to(PurePosixPath("switch"))
upload_path = resource_root / nested_path
if (parent := upload_path.parent) not in already_created:
logger.debug(f"Creating workspace directory: {parent}")
parent.mkdir()
already_created.add(parent)
logger.debug(f"Uploading: {resource_path} -> {upload_path}")
upload_path.write_bytes(resource.read_bytes())
logger.info(f"Completed Copying resources to {resource_root} in workspace...")

@staticmethod
def _enumerate_package_files(package) -> Generator[tuple[PurePosixPath, Traversable]]:
# Locate the root of the package, and then enumerate all its files recursively.
root = importlib.resources.files(package)

def _enumerate_resources(
resource: Traversable, parent: PurePosixPath = PurePosixPath(".")
) -> Generator[tuple[PurePosixPath, Traversable]]:
if resource.name.startswith("."):
# Skip hidden files and directories
return
if resource.is_dir():
next_parent = parent / resource.name
for child in resource.iterdir():
yield from _enumerate_resources(child, next_parent)
elif resource.is_file():
# Skip hidden files and compiled Python files
if not (name := resource.name).endswith((".pyc", ".pyo")):
yield parent / name, resource

yield from _enumerate_resources(root)

def _setup_job(self) -> None:
"""Create or update Switch job."""
existing_job_id = self._get_existing_job_id()
logger.info("Setting up Switch job in workspace...")
try:
job_id = self._create_or_update_switch_job(existing_job_id)
self._install_state.jobs[self._INSTALL_STATE_KEY] = job_id
self._install_state.save()
job_url = f"{self._ws.config.host}/jobs/{job_id}"
logger.info(f"Switch job created/updated: {job_url}")
except (RuntimeError, ValueError, InvalidParameterValue) as e:
logger.error(f"Failed to create/update Switch job: {e}")

def _get_existing_job_id(self) -> str | None:
"""Check if Switch job already exists in workspace."""
if self._INSTALL_STATE_KEY not in self._install_state.jobs:
return None
try:
job_id = self._install_state.jobs[self._INSTALL_STATE_KEY]
self._ws.jobs.get(int(job_id))
return job_id
except (InvalidParameterValue, NotFound, ValueError):
return None

def _create_or_update_switch_job(self, job_id: str | None) -> str:
"""Create or update Switch job, returning job ID."""
job_settings = self._get_switch_job_settings()

# Try to update existing job
if job_id:
try:
logger.info(f"Updating Switch job: {job_id}")
self._ws.jobs.reset(int(job_id), JobSettings(**job_settings))
return job_id
except (ValueError, InvalidParameterValue):
logger.warning("Previous Switch job not found, creating new one")

# Create new job
logger.info("Creating new Switch job")
new_job = self._ws.jobs.create(**job_settings)
new_job_id = str(new_job.job_id)
assert new_job_id is not None
return new_job_id

def _get_switch_job_settings(self) -> dict[str, Any]:
"""Build job settings for Switch transpiler."""
job_name = "Lakebridge_Switch"
notebook_path = self._get_switch_workspace_path() / "notebooks" / "00_main"

task = Task(
task_key="run_transpilation",
notebook_task=NotebookTask(notebook_path=str(notebook_path), source=Source.WORKSPACE),
disable_auto_optimization=True, # To disable retries on failure
)

return {
"name": job_name,
"tags": {"created_by": self._ws.current_user.me().user_name, "switch_version": f"v{switch_version}"},
"tasks": [task],
"parameters": self._get_switch_job_parameters(),
"max_concurrent_runs": 100, # Allow simultaneous transpilations
}

def _get_switch_job_parameters(self) -> Sequence[JobParameterDefinition]:
# Add required runtime parameters, static for now.
parameters = {
"source_tech": "",
"input_dir": "",
"output_dir": "",
}
return [JobParameterDefinition(name=key, default=value) for key, value in parameters.items()]
Loading
Loading