remove un-necessary backend changes

varun-edachali-dbx · varun-edachali-dbx · commit 20822e462e8a · 2025-06-23T06:04:10.000Z
Signed-off-by: varun-edachali-dbx &lt;varun.edachali@databricks.com&gt;
diff --git a/src/databricks/sql/backend/sea/backend.py b/src/databricks/sql/backend/sea/backend.py
@@ -1,16 +1,16 @@
 import logging
-import uuid
 import time
 import re
-from typing import Dict, Tuple, List, Optional, Any, Union, TYPE_CHECKING, Set
+from typing import Any, Dict, Tuple, List, Optional, Union, TYPE_CHECKING, Set
 
-from databricks.sql.backend.sea.models.base import ExternalLink
+from databricks.sql.backend.sea.models.base import ResultManifest
 from databricks.sql.backend.sea.utils.constants import (
     ALLOWED_SESSION_CONF_TO_DEFAULT_VALUES_MAP,
     ResultFormat,
     ResultDisposition,
     ResultCompression,
     WaitTimeout,
+    MetadataCommands,
 )
 
 if TYPE_CHECKING:
@@ -25,9 +25,8 @@
     BackendType,
     ExecuteResponse,
 )
-from databricks.sql.exc import ServerOperationError
+from databricks.sql.exc import DatabaseError, ServerOperationError
 from databricks.sql.backend.sea.utils.http_client import SeaHttpClient
-from databricks.sql.thrift_api.TCLIService import ttypes
 from databricks.sql.types import SSLOptions
 
 from databricks.sql.backend.sea.models import (
@@ -41,12 +40,11 @@
     ExecuteStatementResponse,
     GetStatementResponse,
     CreateSessionResponse,
-    GetChunksResponse,
 )
 from databricks.sql.backend.sea.models.responses import (
-    parse_status,
-    parse_manifest,
-    parse_result,
+    _parse_status,
+    _parse_manifest,
+    _parse_result,
 )
 
 logger = logging.getLogger(__name__)
@@ -92,7 +90,9 @@ class SeaDatabricksClient(DatabricksClient):
     STATEMENT_PATH = BASE_PATH + "statements"
     STATEMENT_PATH_WITH_ID = STATEMENT_PATH + "/{}"
     CANCEL_STATEMENT_PATH_WITH_ID = STATEMENT_PATH + "/{}/cancel"
-    CHUNK_PATH_WITH_ID_AND_INDEX = STATEMENT_PATH + "/{}/result/chunks/{}"
+
+    # SEA constants
+    POLL_INTERVAL_SECONDS = 0.2
 
     def __init__(
         self,
@@ -124,7 +124,7 @@ def __init__(
             http_path,
         )
 
-        super().__init__(ssl_options, **kwargs)
+        self._max_download_threads = kwargs.get("max_download_threads", 10)
 
         # Extract warehouse ID from http_path
         self.warehouse_id = self._extract_warehouse_id(http_path)
@@ -136,7 +136,7 @@ def __init__(
             http_path=http_path,
             http_headers=http_headers,
             auth_provider=auth_provider,
-            ssl_options=self._ssl_options,
+            ssl_options=ssl_options,
             **kwargs,
         )
 
@@ -291,28 +291,28 @@ def get_allowed_session_configurations() -> List[str]:
         """
         return list(ALLOWED_SESSION_CONF_TO_DEFAULT_VALUES_MAP.keys())
 
-    def _extract_description_from_manifest(self, manifest_obj) -> Optional[List]:
+    def _extract_description_from_manifest(
+        self, manifest: ResultManifest
+    ) -> Optional[List]:
         """
-        Extract column description from a manifest object.
+        Extract column description from a manifest object, in the format defined by
+        the spec: https://peps.python.org/pep-0249/#description
 
         Args:
-            manifest_obj: The ResultManifest object containing schema information
+            manifest: The ResultManifest object containing schema information
 
         Returns:
             Optional[List]: A list of column tuples or None if no columns are found
         """
 
-        schema_data = manifest_obj.schema
+        schema_data = manifest.schema
         columns_data = schema_data.get("columns", [])
 
         if not columns_data:
             return None
 
         columns = []
         for col_data in columns_data:
-            if not isinstance(col_data, dict):
-                continue
-
             # Format: (name, type_code, display_size, internal_size, precision, scale, null_ok)
             columns.append(
                 (
@@ -328,38 +328,9 @@ def _extract_description_from_manifest(self, manifest_obj) -> Optional[List]:
 
         return columns if columns else None
 
-    def get_chunk_link(self, statement_id: str, chunk_index: int) -> ExternalLink:
-        """
-        Get links for chunks starting from the specified index.
-
-        Args:
-            statement_id: The statement ID
-            chunk_index: The starting chunk index
-
-        Returns:
-            ExternalLink: External link for the chunk
-        """
-
-        response_data = self.http_client._make_request(
-            method="GET",
-            path=self.CHUNK_PATH_WITH_ID_AND_INDEX.format(statement_id, chunk_index),
-        )
-        response = GetChunksResponse.from_dict(response_data)
-
-        links = response.external_links
-        link = next((l for l in links if l.chunk_index == chunk_index), None)
-        if not link:
-            raise ServerOperationError(
-                f"No link found for chunk index {chunk_index}",
-                {
-                    "operation-id": statement_id,
-                    "diagnostic-info": None,
-                },
-            )
-
-        return link
-
-    def _results_message_to_execute_response(self, sea_response, command_id):
+    def _results_message_to_execute_response(
+        self, response: GetStatementResponse
+    ) -> ExecuteResponse:
         """
         Convert a SEA response to an ExecuteResponse and extract result data.
 
@@ -368,33 +339,65 @@ def _results_message_to_execute_response(self, sea_response, command_id):
             command_id: The command ID
 
         Returns:
-            tuple: (ExecuteResponse, ResultData, ResultManifest) - The normalized execute response,
-                  result data object, and manifest object
+            ExecuteResponse: The normalized execute response
         """
 
-        # Parse the response
-        status = parse_status(sea_response)
-        manifest_obj = parse_manifest(sea_response)
-        result_data_obj = parse_result(sea_response)
-
         # Extract description from manifest schema
-        description = self._extract_description_from_manifest(manifest_obj)
+        description = self._extract_description_from_manifest(response.manifest)
 
         # Check for compression
-        lz4_compressed = manifest_obj.result_compression == "LZ4_FRAME"
+        lz4_compressed = (
+            response.manifest.result_compression == ResultCompression.LZ4_FRAME
+        )
 
         execute_response = ExecuteResponse(
-            command_id=command_id,
-            status=status.state,
+            command_id=CommandId.from_sea_statement_id(response.statement_id),
+            status=response.status.state,
             description=description,
             has_been_closed_server_side=False,
             lz4_compressed=lz4_compressed,
             is_staging_operation=False,
             arrow_schema_bytes=None,
-            result_format=manifest_obj.format,
+            result_format=response.manifest.format,
         )
 
-        return execute_response, result_data_obj, manifest_obj
+        return execute_response
+
+    def _check_command_not_in_failed_or_closed_state(
+        self, state: CommandState, command_id: CommandId
+    ) -> None:
+        if state == CommandState.CLOSED:
+            raise DatabaseError(
+                "Command {} unexpectedly closed server side".format(command_id),
+                {
+                    "operation-id": command_id,
+                },
+            )
+        if state == CommandState.FAILED:
+            raise ServerOperationError(
+                "Command {} failed".format(command_id),
+                {
+                    "operation-id": command_id,
+                },
+            )
+
+    def _wait_until_command_done(
+        self, response: ExecuteStatementResponse
+    ) -> CommandState:
+        """
+        Wait until a command is done.
+        """
+
+        state = response.status.state
+        command_id = CommandId.from_sea_statement_id(response.statement_id)
+
+        while state in [CommandState.PENDING, CommandState.RUNNING]:
+            time.sleep(self.POLL_INTERVAL_SECONDS)
+            state = self.get_query_state(command_id)
+
+        self._check_command_not_in_failed_or_closed_state(state, command_id)
+
+        return state
 
     def execute_command(
         self,
@@ -405,7 +408,7 @@ def execute_command(
         lz4_compression: bool,
         cursor: "Cursor",
         use_cloud_fetch: bool,
-        parameters: List,
+        parameters: List[Dict[str, Any]],
         async_op: bool,
         enforce_embedded_schema_correctness: bool,
     ) -> Union["ResultSet", None]:
@@ -439,9 +442,9 @@ def execute_command(
             for param in parameters:
                 sea_parameters.append(
                     StatementParameter(
-                        name=param.name,
-                        value=param.value,
-                        type=param.type if hasattr(param, "type") else None,
+                        name=param["name"],
+                        value=param["value"],
+                        type=param["type"] if "type" in param else None,
                     )
                 )
 
@@ -493,24 +496,7 @@ def execute_command(
         if async_op:
             return None
 
-        # For synchronous operation, wait for the statement to complete
-        status = response.status
-        state = status.state
-
-        # Keep polling until we reach a terminal state
-        while state in [CommandState.PENDING, CommandState.RUNNING]:
-            time.sleep(0.5)  # add a small delay to avoid excessive API calls
-            state = self.get_query_state(command_id)
-
-        if state != CommandState.SUCCEEDED:
-            raise ServerOperationError(
-                f"Statement execution did not succeed: {status.error.message if status.error else 'Unknown error'}",
-                {
-                    "operation-id": command_id.to_sea_statement_id(),
-                    "diagnostic-info": None,
-                },
-            )
-
+        self._wait_until_command_done(response)
         return self.get_execution_result(command_id, cursor)
 
     def cancel_command(self, command_id: CommandId) -> None:
@@ -622,25 +608,21 @@ def get_execution_result(
             path=self.STATEMENT_PATH_WITH_ID.format(sea_statement_id),
             data=request.to_dict(),
         )
+        response = GetStatementResponse.from_dict(response_data)
 
         # Create and return a SeaResultSet
         from databricks.sql.result_set import SeaResultSet
 
-        # Convert the response to an ExecuteResponse and extract result data
-        (
-            execute_response,
-            result_data,
-            manifest,
-        ) = self._results_message_to_execute_response(response_data, command_id)
+        execute_response = self._results_message_to_execute_response(response)
 
         return SeaResultSet(
             connection=cursor.connection,
             execute_response=execute_response,
             sea_client=self,
             buffer_size_bytes=cursor.buffer_size_bytes,
             arraysize=cursor.arraysize,
-            result_data=result_data,
-            manifest=manifest,
+            result_data=response.result,
+            manifest=response.manifest,
         )
 
     # == Metadata Operations ==
@@ -654,7 +636,7 @@ def get_catalogs(
     ) -> "ResultSet":
         """Get available catalogs by executing 'SHOW CATALOGS'."""
         result = self.execute_command(
-            operation="SHOW CATALOGS",
+            operation=MetadataCommands.SHOW_CATALOGS.value,
             session_id=session_id,
             max_rows=max_rows,
             max_bytes=max_bytes,
@@ -681,10 +663,10 @@ def get_schemas(
         if not catalog_name:
             raise ValueError("Catalog name is required for get_schemas")
 
-        operation = f"SHOW SCHEMAS IN `{catalog_name}`"
+        operation = MetadataCommands.SHOW_SCHEMAS.value.format(catalog_name)
 
         if schema_name:
-            operation += f" LIKE '{schema_name}'"
+            operation += MetadataCommands.LIKE_PATTERN.value.format(schema_name)
 
         result = self.execute_command(
             operation=operation,
@@ -716,17 +698,19 @@ def get_tables(
         if not catalog_name:
             raise ValueError("Catalog name is required for get_tables")
 
-        operation = "SHOW TABLES IN " + (
-            "ALL CATALOGS"
+        operation = (
+            MetadataCommands.SHOW_TABLES_ALL_CATALOGS.value
             if catalog_name in [None, "*", "%"]
-            else f"CATALOG `{catalog_name}`"
+            else MetadataCommands.SHOW_TABLES.value.format(
+                MetadataCommands.CATALOG_SPECIFIC.value.format(catalog_name)
+            )
         )
 
         if schema_name:
-            operation += f" SCHEMA LIKE '{schema_name}'"
+            operation += MetadataCommands.SCHEMA_LIKE_PATTERN.value.format(schema_name)
 
         if table_name:
-            operation += f" LIKE '{table_name}'"
+            operation += MetadataCommands.LIKE_PATTERN.value.format(table_name)
 
         result = self.execute_command(
             operation=operation,
@@ -742,7 +726,7 @@ def get_tables(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Apply client-side filtering by table_types if specified
+        # Apply client-side filtering by table_types
         from databricks.sql.backend.filters import ResultSetFilter
 
         result = ResultSetFilter.filter_tables_by_type(result, table_types)
@@ -764,16 +748,16 @@ def get_columns(
         if not catalog_name:
             raise ValueError("Catalog name is required for get_columns")
 
-        operation = f"SHOW COLUMNS IN CATALOG `{catalog_name}`"
+        operation = MetadataCommands.SHOW_COLUMNS.value.format(catalog_name)
 
         if schema_name:
-            operation += f" SCHEMA LIKE '{schema_name}'"
+            operation += MetadataCommands.SCHEMA_LIKE_PATTERN.value.format(schema_name)
 
         if table_name:
-            operation += f" TABLE LIKE '{table_name}'"
+            operation += MetadataCommands.TABLE_LIKE_PATTERN.value.format(table_name)
 
         if column_name:
-            operation += f" LIKE '{column_name}'"
+            operation += MetadataCommands.LIKE_PATTERN.value.format(column_name)
 
         result = self.execute_command(
             operation=operation,