|  | 
| 17 | 17 | from databricks.labs.blueprint.cli import App | 
| 18 | 18 | from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug | 
| 19 | 19 | from databricks.labs.blueprint.installation import RootJsonValue | 
|  | 20 | +from databricks.labs.blueprint.installer import InstallState | 
| 20 | 21 | from databricks.labs.blueprint.tui import Prompts | 
| 21 | 22 | 
 | 
| 22 | 23 | 
 | 
|  | 
| 35 | 36 | from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME | 
| 36 | 37 | from databricks.labs.lakebridge.transpiler.describe import TranspilersDescription | 
| 37 | 38 | from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile | 
| 38 |  | -from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine | 
|  | 39 | +from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig, LSPEngine | 
| 39 | 40 | from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository | 
| 40 | 41 | from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine | 
|  | 42 | +from databricks.labs.lakebridge.transpiler.switch_runner import SwitchConfig, SwitchRunner | 
| 41 | 43 | from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine | 
| 42 | 44 | 
 | 
| 43 | 45 | from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity | 
| @@ -534,6 +536,234 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s | 
| 534 | 536 |         ctx.connect_config.cluster_id = cluster_id | 
| 535 | 537 | 
 | 
| 536 | 538 | 
 | 
|  | 539 | +@lakebridge.command | 
|  | 540 | +def llm_transpile( | 
|  | 541 | +    *, | 
|  | 542 | +    w: WorkspaceClient, | 
|  | 543 | +    input_source: str | None = None, | 
|  | 544 | +    output_ws_folder: str | None = None, | 
|  | 545 | +    source_dialect: str | None = None, | 
|  | 546 | +    transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(), | 
|  | 547 | +) -> None: | 
|  | 548 | +    """Transpile source code to Databricks using LLM Transpiler (Switch)""" | 
|  | 549 | +    ctx = ApplicationContext(w) | 
|  | 550 | +    ctx.add_user_agent_extra("cmd", "llm-transpile") | 
|  | 551 | +    user = ctx.current_user | 
|  | 552 | +    logger.debug(f"User: {user}") | 
|  | 553 | + | 
|  | 554 | +    checker = _LLMTranspileConfigChecker(ctx.transpile_config, ctx.prompts, ctx.install_state, transpiler_repository) | 
|  | 555 | +    checker.use_input_source(input_source) | 
|  | 556 | +    checker.use_output_ws_folder(output_ws_folder) | 
|  | 557 | +    checker.use_source_dialect(source_dialect) | 
|  | 558 | +    params = checker.check() | 
|  | 559 | + | 
|  | 560 | +    result = _llm_transpile(ctx, params) | 
|  | 561 | +    print(json.dumps(result)) | 
|  | 562 | + | 
|  | 563 | + | 
|  | 564 | +class _LLMTranspileConfigChecker: | 
|  | 565 | +    """Helper class for 'llm-transpile' command configuration validation""" | 
|  | 566 | + | 
|  | 567 | +    _transpile_config: TranspileConfig | None | 
|  | 568 | +    _prompts: Prompts | 
|  | 569 | +    _install_state: InstallState | 
|  | 570 | +    _transpiler_repository: TranspilerRepository | 
|  | 571 | +    _input_source: str | None = None | 
|  | 572 | +    _output_ws_folder: str | None = None | 
|  | 573 | +    _source_dialect: str | None = None | 
|  | 574 | + | 
|  | 575 | +    def __init__( | 
|  | 576 | +        self, | 
|  | 577 | +        transpile_config: TranspileConfig | None, | 
|  | 578 | +        prompts: Prompts, | 
|  | 579 | +        install_state: InstallState, | 
|  | 580 | +        transpiler_repository: TranspilerRepository, | 
|  | 581 | +    ): | 
|  | 582 | +        self._transpile_config = transpile_config | 
|  | 583 | +        self._prompts = prompts | 
|  | 584 | +        self._install_state = install_state | 
|  | 585 | +        self._transpiler_repository = transpiler_repository | 
|  | 586 | + | 
|  | 587 | +    @staticmethod | 
|  | 588 | +    def _validate_input_source_path(input_source: str, msg: str) -> None: | 
|  | 589 | +        """Validate the input source: it must be a path that exists.""" | 
|  | 590 | +        if not Path(input_source).exists(): | 
|  | 591 | +            raise_validation_exception(msg) | 
|  | 592 | + | 
|  | 593 | +    def use_input_source(self, input_source: str | None) -> None: | 
|  | 594 | +        if input_source is not None: | 
|  | 595 | +            logger.debug(f"Setting input_source to: {input_source!r}") | 
|  | 596 | +            self._validate_input_source_path(input_source, f"Invalid path for '--input-source': {input_source}") | 
|  | 597 | +            self._input_source = input_source | 
|  | 598 | + | 
|  | 599 | +    def _prompt_input_source(self) -> None: | 
|  | 600 | +        default_input = None | 
|  | 601 | +        if self._transpile_config and self._transpile_config.input_source: | 
|  | 602 | +            default_input = self._transpile_config.input_source | 
|  | 603 | + | 
|  | 604 | +        if default_input: | 
|  | 605 | +            prompt_text = f"Enter input source path (press <enter> for default: {default_input})" | 
|  | 606 | +            prompted = self._prompts.question(prompt_text).strip() | 
|  | 607 | +            self._input_source = prompted if prompted else default_input | 
|  | 608 | +        else: | 
|  | 609 | +            prompted = self._prompts.question("Enter input source path (directory or file)").strip() | 
|  | 610 | +            self._input_source = prompted | 
|  | 611 | + | 
|  | 612 | +        logger.debug(f"Setting input_source to: {self._input_source!r}") | 
|  | 613 | +        self._validate_input_source_path(self._input_source, f"Invalid input source: {self._input_source}") | 
|  | 614 | + | 
|  | 615 | +    def _check_input_source(self) -> None: | 
|  | 616 | +        if self._input_source is None: | 
|  | 617 | +            self._prompt_input_source() | 
|  | 618 | + | 
|  | 619 | +    def use_output_ws_folder(self, output_ws_folder: str | None) -> None: | 
|  | 620 | +        if output_ws_folder is not None: | 
|  | 621 | +            logger.debug(f"Setting output_ws_folder to: {output_ws_folder!r}") | 
|  | 622 | +            self._validate_output_ws_folder_path( | 
|  | 623 | +                output_ws_folder, f"Invalid path for '--output-ws-folder': {output_ws_folder}" | 
|  | 624 | +            ) | 
|  | 625 | +            self._output_ws_folder = output_ws_folder | 
|  | 626 | + | 
|  | 627 | +    @staticmethod | 
|  | 628 | +    def _validate_output_ws_folder_path(output_ws_folder: str, msg: str) -> None: | 
|  | 629 | +        """Validate output folder is a Workspace path.""" | 
|  | 630 | +        if not output_ws_folder.startswith("/Workspace/"): | 
|  | 631 | +            raise_validation_exception(f"{msg}. Must start with /Workspace/") | 
|  | 632 | + | 
|  | 633 | +    def _prompt_output_ws_folder(self) -> None: | 
|  | 634 | +        prompted_output_ws_folder = self._prompts.question( | 
|  | 635 | +            "Enter output folder path (Databricks Workspace path starting with /Workspace/)" | 
|  | 636 | +        ).strip() | 
|  | 637 | +        logger.debug(f"Setting output_ws_folder to: {prompted_output_ws_folder!r}") | 
|  | 638 | +        self._validate_output_ws_folder_path( | 
|  | 639 | +            prompted_output_ws_folder, f"Invalid output folder: {prompted_output_ws_folder}" | 
|  | 640 | +        ) | 
|  | 641 | +        self._output_ws_folder = prompted_output_ws_folder | 
|  | 642 | + | 
|  | 643 | +    def _check_output_ws_folder(self) -> None: | 
|  | 644 | +        if self._output_ws_folder is None: | 
|  | 645 | +            self._prompt_output_ws_folder() | 
|  | 646 | + | 
|  | 647 | +    def use_source_dialect(self, source_dialect: str | None) -> None: | 
|  | 648 | +        if source_dialect is not None: | 
|  | 649 | +            logger.debug(f"Setting source_dialect to: {source_dialect!r}") | 
|  | 650 | +            self._source_dialect = source_dialect | 
|  | 651 | + | 
|  | 652 | +    def _prompt_source_dialect(self) -> None: | 
|  | 653 | +        """Prompt for source dialect from Switch dialects.""" | 
|  | 654 | +        available_dialects = self._get_switch_dialects() | 
|  | 655 | + | 
|  | 656 | +        if not available_dialects: | 
|  | 657 | +            raise_validation_exception( | 
|  | 658 | +                "No Switch dialects available. " | 
|  | 659 | +                "Install with: databricks labs lakebridge install-transpile --include-llm-transpiler" | 
|  | 660 | +            ) | 
|  | 661 | + | 
|  | 662 | +        logger.debug(f"Available dialects: {available_dialects!r}") | 
|  | 663 | +        source_dialect = self._prompts.choice("Select the source dialect:", list(sorted(available_dialects))) | 
|  | 664 | + | 
|  | 665 | +        self._source_dialect = source_dialect | 
|  | 666 | + | 
|  | 667 | +    def _check_source_dialect(self) -> None: | 
|  | 668 | +        """Validate and prompt for source dialect if not provided.""" | 
|  | 669 | +        available_dialects = self._get_switch_dialects() | 
|  | 670 | + | 
|  | 671 | +        if self._source_dialect is None: | 
|  | 672 | +            self._prompt_source_dialect() | 
|  | 673 | +        elif self._source_dialect not in available_dialects: | 
|  | 674 | +            supported = ", ".join(sorted(available_dialects)) | 
|  | 675 | +            raise_validation_exception(f"Invalid source-dialect: '{self._source_dialect}'. " f"Available: {supported}") | 
|  | 676 | + | 
|  | 677 | +    def _get_switch_dialects(self) -> set[str]: | 
|  | 678 | +        """Get Switch dialects from config.yml using LSPConfig.""" | 
|  | 679 | +        config_path = self._transpiler_repository.transpiler_config_path("Switch") | 
|  | 680 | +        if not config_path.exists(): | 
|  | 681 | +            return set() | 
|  | 682 | + | 
|  | 683 | +        try: | 
|  | 684 | +            lsp_config = LSPConfig.load(config_path) | 
|  | 685 | +            return set(lsp_config.remorph.dialects) | 
|  | 686 | +        except (OSError, ValueError) as e: | 
|  | 687 | +            logger.warning(f"Failed to load Switch dialects: {e}") | 
|  | 688 | +            return set() | 
|  | 689 | + | 
|  | 690 | +    def _get_switch_options_with_defaults(self) -> dict[str, str]: | 
|  | 691 | +        """Get default values for Switch options from config.yml.""" | 
|  | 692 | +        config_path = self._transpiler_repository.transpiler_config_path("Switch") | 
|  | 693 | +        if not config_path.exists(): | 
|  | 694 | +            return {} | 
|  | 695 | + | 
|  | 696 | +        try: | 
|  | 697 | +            lsp_config = LSPConfig.load(config_path) | 
|  | 698 | +        except (OSError, ValueError) as e: | 
|  | 699 | +            logger.warning(f"Failed to load Switch options: {e}") | 
|  | 700 | +            return {} | 
|  | 701 | + | 
|  | 702 | +        options_all = lsp_config.options_for_dialect("all") | 
|  | 703 | +        result = {} | 
|  | 704 | +        for option in options_all: | 
|  | 705 | +            if option.default and option.default != "<none>": | 
|  | 706 | +                result[option.flag] = option.default | 
|  | 707 | + | 
|  | 708 | +        logger.debug(f"Loaded {len(result)} Switch options with defaults from config.yml") | 
|  | 709 | +        return result | 
|  | 710 | + | 
|  | 711 | +    def _validate_switch_options(self, options: dict[str, str]) -> None: | 
|  | 712 | +        """Validate options against config.yml choices.""" | 
|  | 713 | +        config_path = self._transpiler_repository.transpiler_config_path("Switch") | 
|  | 714 | +        if not config_path.exists(): | 
|  | 715 | +            return | 
|  | 716 | + | 
|  | 717 | +        try: | 
|  | 718 | +            lsp_config = LSPConfig.load(config_path) | 
|  | 719 | +        except (OSError, ValueError) as e: | 
|  | 720 | +            logger.warning(f"Failed to validate Switch options: {e}") | 
|  | 721 | +            return | 
|  | 722 | + | 
|  | 723 | +        options_all = lsp_config.options_for_dialect("all") | 
|  | 724 | +        for option in options_all: | 
|  | 725 | +            if option.flag in options and option.choices: | 
|  | 726 | +                value = options[option.flag] | 
|  | 727 | +                if value not in option.choices: | 
|  | 728 | +                    raise_validation_exception( | 
|  | 729 | +                        f"Invalid value for '{option.flag}': {value!r}. " f"Must be one of: {', '.join(option.choices)}" | 
|  | 730 | +                    ) | 
|  | 731 | + | 
|  | 732 | +    def check(self) -> dict: | 
|  | 733 | +        """Validate all parameters and return configuration dict.""" | 
|  | 734 | +        logger.debug("Checking llm-transpile configuration") | 
|  | 735 | + | 
|  | 736 | +        self._check_input_source() | 
|  | 737 | +        self._check_output_ws_folder() | 
|  | 738 | +        self._check_source_dialect() | 
|  | 739 | + | 
|  | 740 | +        switch_options = self._get_switch_options_with_defaults() | 
|  | 741 | +        self._validate_switch_options(switch_options) | 
|  | 742 | + | 
|  | 743 | +        wait_for_completion = str(switch_options.pop("wait_for_completion", "false")).lower() == "true" | 
|  | 744 | + | 
|  | 745 | +        return { | 
|  | 746 | +            "input_source": self._input_source, | 
|  | 747 | +            "output_ws_folder": self._output_ws_folder, | 
|  | 748 | +            "source_dialect": self._source_dialect, | 
|  | 749 | +            "switch_options": switch_options, | 
|  | 750 | +            "wait_for_completion": wait_for_completion, | 
|  | 751 | +        } | 
|  | 752 | + | 
|  | 753 | + | 
|  | 754 | +def _llm_transpile(ctx: ApplicationContext, params: dict) -> RootJsonValue: | 
|  | 755 | +    """Execute LLM transpilation via Switch job.""" | 
|  | 756 | +    config = SwitchConfig(ctx.install_state) | 
|  | 757 | +    resources = config.get_resources() | 
|  | 758 | +    job_id = config.get_job_id() | 
|  | 759 | + | 
|  | 760 | +    runner = SwitchRunner(ctx.workspace_client, ctx.installation) | 
|  | 761 | + | 
|  | 762 | +    return runner.run( | 
|  | 763 | +        catalog=resources["catalog"], schema=resources["schema"], volume=resources["volume"], job_id=job_id, **params | 
|  | 764 | +    ) | 
|  | 765 | + | 
|  | 766 | + | 
| 537 | 767 | @lakebridge.command | 
| 538 | 768 | def reconcile(*, w: WorkspaceClient) -> None: | 
| 539 | 769 |     """[EXPERIMENTAL] Reconciles source to Databricks datasets""" | 
|  | 
0 commit comments