Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fae9880
feat: add llm-transpile command with Switch integration
hiroyukinakazato-db Oct 7, 2025
2ee157f
refactor: encapsulate Switch package path resolution in SwitchDeployment
hiroyukinakazato-db Oct 9, 2025
b736965
test: update Switch installation tests for refactored interface
hiroyukinakazato-db Oct 9, 2025
bacd5f6
fix: update error messages to include 'true' flag for install-transpi…
hiroyukinakazato-db Oct 9, 2025
21b6629
Merge branch 'main' into feature/llm-transpile
hiroyukinakazato-db Oct 9, 2025
81c32e5
fix: exclude wait_for_completion from Switch job parameters
hiroyukinakazato-db Oct 10, 2025
13bcc15
chore: update Switch wheel with wait_for_completion fix
hiroyukinakazato-db Oct 10, 2025
8dcf8f3
feat: add E2E test for Switch transpiler with environment variable co…
hiroyukinakazato-db Oct 14, 2025
83678b8
feat: enhance E2E testing for Switch with resource management and uni…
hiroyukinakazato-db Oct 14, 2025
ee5c892
chore: merge main into feature/llm-transpile
hiroyukinakazato-db Oct 14, 2025
ac382d6
Merge branch 'main' into feature/llm-transpile
sundarshankar89 Oct 27, 2025
4600583
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 27, 2025
7f0eaa4
Rebased from switch installer integration
sundarshankar89 Oct 27, 2025
0e22abe
Rebased from switch installer integration
sundarshankar89 Oct 27, 2025
43cc0f5
Intermediate check in
sundarshankar89 Oct 27, 2025
bb7c3d6
Intermediate check in
sundarshankar89 Oct 27, 2025
bd70638
Intermediate check in
sundarshankar89 Oct 27, 2025
0eb1570
Intermediate check in
sundarshankar89 Oct 27, 2025
9823201
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 28, 2025
eb46f24
initial tests
sundarshankar89 Oct 28, 2025
c54e68f
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 28, 2025
fab0e87
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 29, 2025
1e10b60
added flag to fail if users use regular transpile after installing sw…
sundarshankar89 Oct 30, 2025
394ad9d
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 30, 2025
aeff475
Merge branch 'feature/switch-installer-integration' into feature/llm-…
sundarshankar89 Oct 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ remorph_transpile/
/linter/src/main/antlr4/library/gen/
.databricks-login.json
.mypy_cache
.env
13 changes: 13 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,19 @@ commands:
{{range .}}{{.total_files_processed}}\t{{.total_queries_processed}}\t{{.analysis_error_count}}\t{{.parsing_error_count}}\t{{.validation_error_count}}\t{{.generation_error_count}}\t{{.error_log_file}}
{{end}}

- name: llm-transpile
description: Transpile source code to Databricks using LLM Transpiler (Switch)
flags:
- name: input-source
description: Input Script Folder or File (local path)
default: null
- name: output-ws-folder
description: Output folder path (Databricks Workspace path starting with /Workspace/)
default: null
- name: source-dialect
description: Source dialect name (e.g., 'snowflake', 'teradata')
default: null

- name: reconcile
description: Reconcile source and target data residing on Databricks

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ bad-functions = ["map", "input"]
# ignored-parents =

# Maximum number of arguments for function / method.
max-args = 12
max-args = 13

# Maximum number of attributes for a class (see R0902).
max-attributes = 13
Expand Down
78 changes: 77 additions & 1 deletion src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
from databricks.labs.lakebridge.transpiler.switch_runner import SwitchRunner
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine

from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity
Expand Down Expand Up @@ -92,7 +93,7 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):


@lakebridge.command
def transpile( # pylint: disable=too-many-arguments
def transpile(
*,
w: WorkspaceClient,
transpiler_config_path: str | None = None,
Expand Down Expand Up @@ -234,6 +235,10 @@ def _validate_transpiler_config_path(transpiler_config_path: str, msg: str) -> N
def use_transpiler_config_path(self, transpiler_config_path: str | None) -> None:
if transpiler_config_path is not None:
logger.debug(f"Setting transpiler_config_path to: {transpiler_config_path!r}")
# Switch is installed inside "/Users/<>/.lakebridge/transpilers/Switch/lsp/config.yml
if Path(transpiler_config_path).parent.parent.name == "Switch":
msg = "Switch transpiler is not supported through `transpile` run `llm-transpile` instead."
raise_validation_exception(msg)
self._validate_transpiler_config_path(
transpiler_config_path,
f"Invalid path for '--transpiler-config-path', does not exist: {transpiler_config_path}",
Expand Down Expand Up @@ -637,6 +642,77 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s
ctx.connect_config.cluster_id = cluster_id


@lakebridge.command
def llm_transpile(
*,
w: WorkspaceClient,
input_source: str,
output_ws_folder: str,
source_dialect: str,
ctx: ApplicationContext | None = None,
) -> None:
"""Transpile source code to Databricks using LLM Transpiler (Switch)"""
if ctx is None:
ctx = ApplicationContext(w)
del w
ctx.add_user_agent_extra("cmd", "transpile-switch")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the command is llm-transpile

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will modify this.

user = ctx.current_user
logger.debug(f"User: {user}")

job_list = ctx.install_state.jobs
if "Switch" not in job_list:
raise RuntimeError(
"Switch Job ID not found. "
"Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first."
)
logger.debug("Switch job ID found in InstallState")
job_id = int(job_list["Switch"])

# Upload File to Volume
transpile_config = ctx.transpile_config
if transpile_config is None or transpile_config.transpiler_options is None:
raise RuntimeError(
"Transpile configuration config.yml not found in workspace."
"Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first."
)

transpile_options = transpile_config.transpiler_options
logger.debug(f"Transpiler options: {transpile_options}")
if not isinstance(transpile_options, Mapping):
raise RuntimeError(
"Invalid transpile configuration: transpiler_options must be a mapping. "
"Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first."
)
catalog = transpile_options.get("catalog", None)
schema = transpile_options.get("schema", None)
volume = transpile_options.get("volume", None)

if catalog is None or schema is None or volume is None:
raise RuntimeError(
"Invalid transpile configuration: catalog, schema or volume is missing. "
"Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first."
)
assert isinstance(catalog, str)
assert isinstance(schema, str)
assert isinstance(volume, str)

try:
job_runner = SwitchRunner(ctx.workspace_client, ctx.installation)
volume_input_path = job_runner.upload_to_volume(
local_path=Path(input_source), catalog=catalog, schema=schema, volume=volume
)

response = job_runner.run(
volume_input_path=volume_input_path,
output_ws_folder=output_ws_folder,
source_tech=source_dialect,
job_id=job_id,
)
json.dump(response, sys.stdout, indent=2)
except Exception as ex:
raise RuntimeError(ex) from ex


@lakebridge.command
def reconcile(*, w: WorkspaceClient) -> None:
"""[EXPERIMENTAL] Reconciles source to Databricks datasets"""
Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/lakebridge/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class TranspileConfig:
error_file_path: str | None = None
sdk_config: dict[str, str] | None = None
skip_validation: bool = False
include_llm: bool = False
catalog_name: str = "remorph"
schema_name: str = "transpiler"
transpiler_options: JsonValue = None
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/lakebridge/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

class WorkspaceInstaller:
# TODO: Temporary suppression, is_interactive is pending removal.
def __init__( # pylint: disable=too-many-arguments
def __init__(
self,
ws: WorkspaceClient,
prompts: Prompts,
Expand Down
146 changes: 146 additions & 0 deletions src/databricks/labs/lakebridge/transpiler/switch_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import io
import logging
import os
import random
import string
from datetime import datetime, timezone
from pathlib import Path

from databricks.labs.blueprint.installation import Installation, RootJsonValue
from databricks.sdk import WorkspaceClient

logger = logging.getLogger(__name__)


class SwitchRunner:
"""Runner for Switch LLM transpilation jobs."""

def __init__(
self,
ws: WorkspaceClient,
installation: Installation,
):
self._ws = ws
self._installation = installation

def run(
self,
volume_input_path: str,
output_ws_folder: str,
source_tech: str,
job_id: int,
wait_for_completion: bool = False,
) -> RootJsonValue:
"""Upload local files to Volume and trigger Switch job."""

job_params = self._build_job_parameters(
input_dir=volume_input_path,
output_dir=output_ws_folder,
source_tech=source_tech,
)
logger.info(f"Triggering Switch job with job_id: {job_id}")

return self._run_job(job_id, job_params, wait_for_completion)

def upload_to_volume(
self,
local_path: Path,
catalog: str,
schema: str,
volume: str,
) -> str:
"""Upload local files to UC Volume with unique timestamped path."""
now = datetime.now(timezone.utc)
time_part = now.strftime("%Y%m%d%H%M%S")
random_part = ''.join(random.choices(string.ascii_lowercase + string.digits, k=4))
timestamp_suffix = f"{time_part}_{random_part}"
volume_base_path = f"/Volumes/{catalog}/{schema}/{volume}"
volume_input_path = f"{volume_base_path}/input_{timestamp_suffix}"

logger.info(f"Uploading {local_path} to {volume_input_path}...")

# File upload
if local_path.is_file():
volume_file_path = f"{volume_input_path}/{local_path.name}"
with open(local_path, 'rb') as f:
content = f.read()
self._ws.files.upload(file_path=volume_file_path, contents=io.BytesIO(content), overwrite=True)
logger.debug(f"Uploaded: {local_path} -> {volume_file_path}")

# Directory upload
else:
for root, _, files in os.walk(local_path):
for file in files:
local_file = Path(root) / file
relative_path = local_file.relative_to(local_path)
volume_file_path = f"{volume_input_path}/{relative_path}"

with open(local_file, 'rb') as f:
content = f.read()

self._ws.files.upload(file_path=volume_file_path, contents=io.BytesIO(content), overwrite=True)
logger.debug(f"Uploaded: {local_file} -> {volume_file_path}")

logger.info(f"Upload complete: {volume_input_path}")
return volume_input_path

def _build_job_parameters(
self,
input_dir: str,
output_dir: str,
source_tech: str,
switch_options: dict | None = None,
) -> dict[str, str]:
"""Build Switch job parameters."""
if switch_options is None:
switch_options = {}
return {
"input_dir": input_dir,
"output_dir": output_dir,
"source_tech": source_tech,
**switch_options,
}

def _run_job(
self,
job_id: int,
job_params: dict[str, str],
wait_for_completion: bool,
) -> RootJsonValue:
"""Execute Switch job and return run information."""
if wait_for_completion:
run = self._ws.jobs.run_now_and_wait(job_id, job_parameters=job_params)

if not run.run_id:
raise SystemExit(f"Job {job_id} execution failed.")

job_run_url = f"{self._ws.config.host}/jobs/{job_id}/runs/{run.run_id}"
logger.info(f"Switch LLM transpilation job completed: {job_run_url}")

return [
{
"job_id": job_id,
"run_id": run.run_id,
"run_url": job_run_url,
"state": (
run.state.life_cycle_state.value if run.state and run.state.life_cycle_state else "UNKNOWN"
),
"result_state": run.state.result_state.value if run.state and run.state.result_state else None,
}
]

wait = self._ws.jobs.run_now(job_id, job_parameters=job_params)

if not wait.run_id:
raise SystemExit(f"Job {job_id} execution failed.")

job_run_url = f"{self._ws.config.host}/jobs/{job_id}/runs/{wait.run_id}"
logger.info(f"Switch LLM transpilation job started: {job_run_url}")

return [
{
"job_id": job_id,
"run_id": wait.run_id,
"run_url": job_run_url,
}
]
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,21 @@ def morpheus_artifact() -> Path:
return artifact


@pytest.fixture
def switch_artifact() -> Path:
"""Get Switch wheel for testing."""
artifact = (
Path(__file__).parent
/ "resources"
/ "transpiler_configs"
/ "switch"
/ "wheel"
/ "databricks_switch_plugin-0.1.2-py3-none-any.whl"
)
assert artifact.exists(), f"Switch artifact not found: {artifact}"
return artifact


class FakeDataSource(DataSource):

def __init__(self, start_delimiter: str, end_delimiter: str):
Expand Down
Loading
Loading