1
1
import logging
2
- import uuid
3
2
import time
4
3
import re
5
- from typing import Dict , Tuple , List , Optional , Any , Union , TYPE_CHECKING , Set
6
-
7
- from databricks .sql .backend .sea .models .base import ExternalLink
4
+ from typing import Any , Dict , Tuple , List , Optional , Union , TYPE_CHECKING , Set
8
5
6
+ from databricks .sql .backend .sea .models .base import ExternalLink , ResultManifest
9
7
from databricks .sql .backend .sea .utils .constants import (
10
8
ALLOWED_SESSION_CONF_TO_DEFAULT_VALUES_MAP ,
11
- MetadataCommands ,
12
9
ResultFormat ,
13
10
ResultDisposition ,
14
11
ResultCompression ,
15
12
WaitTimeout ,
13
+ MetadataCommands ,
16
14
)
17
15
18
16
if TYPE_CHECKING :
29
27
)
30
28
from databricks .sql .exc import DatabaseError , ServerOperationError
31
29
from databricks .sql .backend .sea .utils .http_client import SeaHttpClient
32
- from databricks .sql .thrift_api .TCLIService import ttypes
33
30
from databricks .sql .types import SSLOptions
34
31
35
32
from databricks .sql .backend .sea .models import (
43
40
ExecuteStatementResponse ,
44
41
GetStatementResponse ,
45
42
CreateSessionResponse ,
43
+ )
44
+ from databricks .sql .backend .sea .models .responses import (
46
45
GetChunksResponse ,
47
46
)
48
47
@@ -91,6 +90,9 @@ class SeaDatabricksClient(DatabricksClient):
91
90
CANCEL_STATEMENT_PATH_WITH_ID = STATEMENT_PATH + "/{}/cancel"
92
91
CHUNK_PATH_WITH_ID_AND_INDEX = STATEMENT_PATH + "/{}/result/chunks/{}"
93
92
93
+ # SEA constants
94
+ POLL_INTERVAL_SECONDS = 0.2
95
+
94
96
def __init__ (
95
97
self ,
96
98
server_hostname : str ,
@@ -121,7 +123,7 @@ def __init__(
121
123
http_path ,
122
124
)
123
125
124
- super ().__init__ (ssl_options , ** kwargs )
126
+ super ().__init__ (ssl_options = ssl_options , ** kwargs )
125
127
126
128
# Extract warehouse ID from http_path
127
129
self .warehouse_id = self ._extract_warehouse_id (http_path )
@@ -288,28 +290,28 @@ def get_allowed_session_configurations() -> List[str]:
288
290
"""
289
291
return list (ALLOWED_SESSION_CONF_TO_DEFAULT_VALUES_MAP .keys ())
290
292
291
- def _extract_description_from_manifest (self , manifest_obj ) -> Optional [List ]:
293
+ def _extract_description_from_manifest (
294
+ self , manifest : ResultManifest
295
+ ) -> Optional [List ]:
292
296
"""
293
- Extract column description from a manifest object.
297
+ Extract column description from a manifest object, in the format defined by
298
+ the spec: https://peps.python.org/pep-0249/#description
294
299
295
300
Args:
296
- manifest_obj : The ResultManifest object containing schema information
301
+ manifest : The ResultManifest object containing schema information
297
302
298
303
Returns:
299
304
Optional[List]: A list of column tuples or None if no columns are found
300
305
"""
301
306
302
- schema_data = manifest_obj .schema
307
+ schema_data = manifest .schema
303
308
columns_data = schema_data .get ("columns" , [])
304
309
305
310
if not columns_data :
306
311
return None
307
312
308
313
columns = []
309
314
for col_data in columns_data :
310
- if not isinstance (col_data , dict ):
311
- continue
312
-
313
315
# Format: (name, type_code, display_size, internal_size, precision, scale, null_ok)
314
316
columns .append (
315
317
(
@@ -325,38 +327,9 @@ def _extract_description_from_manifest(self, manifest_obj) -> Optional[List]:
325
327
326
328
return columns if columns else None
327
329
328
- def get_chunk_link (self , statement_id : str , chunk_index : int ) -> ExternalLink :
329
- """
330
- Get links for chunks starting from the specified index.
331
-
332
- Args:
333
- statement_id: The statement ID
334
- chunk_index: The starting chunk index
335
-
336
- Returns:
337
- ExternalLink: External link for the chunk
338
- """
339
-
340
- response_data = self .http_client ._make_request (
341
- method = "GET" ,
342
- path = self .CHUNK_PATH_WITH_ID_AND_INDEX .format (statement_id , chunk_index ),
343
- )
344
- response = GetChunksResponse .from_dict (response_data )
345
-
346
- links = response .external_links
347
- link = next ((l for l in links if l .chunk_index == chunk_index ), None )
348
- if not link :
349
- raise ServerOperationError (
350
- f"No link found for chunk index { chunk_index } " ,
351
- {
352
- "operation-id" : statement_id ,
353
- "diagnostic-info" : None ,
354
- },
355
- )
356
-
357
- return link
358
-
359
- def _results_message_to_execute_response (self , response : GetStatementResponse , command_id : CommandId ):
330
+ def _results_message_to_execute_response (
331
+ self , response : GetStatementResponse
332
+ ) -> ExecuteResponse :
360
333
"""
361
334
Convert a SEA response to an ExecuteResponse and extract result data.
362
335
@@ -365,18 +338,19 @@ def _results_message_to_execute_response(self, response: GetStatementResponse, c
365
338
command_id: The command ID
366
339
367
340
Returns:
368
- tuple: (ExecuteResponse, ResultData, ResultManifest) - The normalized execute response,
369
- result data object, and manifest object
341
+ ExecuteResponse: The normalized execute response
370
342
"""
371
343
372
344
# Extract description from manifest schema
373
345
description = self ._extract_description_from_manifest (response .manifest )
374
346
375
347
# Check for compression
376
- lz4_compressed = response .manifest .result_compression == ResultCompression .LZ4_FRAME .value
348
+ lz4_compressed = (
349
+ response .manifest .result_compression == ResultCompression .LZ4_FRAME .value
350
+ )
377
351
378
352
execute_response = ExecuteResponse (
379
- command_id = command_id ,
353
+ command_id = CommandId . from_sea_statement_id ( response . statement_id ) ,
380
354
status = response .status .state ,
381
355
description = description ,
382
356
has_been_closed_server_side = False ,
@@ -433,7 +407,7 @@ def execute_command(
433
407
lz4_compression : bool ,
434
408
cursor : "Cursor" ,
435
409
use_cloud_fetch : bool ,
436
- parameters : List ,
410
+ parameters : List [ Dict [ str , Any ]] ,
437
411
async_op : bool ,
438
412
enforce_embedded_schema_correctness : bool ,
439
413
) -> Union ["ResultSet" , None ]:
@@ -467,9 +441,9 @@ def execute_command(
467
441
for param in parameters :
468
442
sea_parameters .append (
469
443
StatementParameter (
470
- name = param . name ,
471
- value = param . value ,
472
- type = param . type if hasattr ( param , "type" ) else None ,
444
+ name = param [ " name" ] ,
445
+ value = param [ " value" ] ,
446
+ type = param [ " type" ] if "type" in param else None ,
473
447
)
474
448
)
475
449
@@ -638,8 +612,7 @@ def get_execution_result(
638
612
# Create and return a SeaResultSet
639
613
from databricks .sql .result_set import SeaResultSet
640
614
641
- # Convert the response to an ExecuteResponse and extract result data
642
- execute_response = self ._results_message_to_execute_response (response , command_id )
615
+ execute_response = self ._results_message_to_execute_response (response )
643
616
644
617
return SeaResultSet (
645
618
connection = cursor .connection ,
@@ -651,6 +624,35 @@ def get_execution_result(
651
624
manifest = response .manifest ,
652
625
)
653
626
627
+ def get_chunk_link (self , statement_id : str , chunk_index : int ) -> ExternalLink :
628
+ """
629
+ Get links for chunks starting from the specified index.
630
+ Args:
631
+ statement_id: The statement ID
632
+ chunk_index: The starting chunk index
633
+ Returns:
634
+ ExternalLink: External link for the chunk
635
+ """
636
+
637
+ response_data = self .http_client ._make_request (
638
+ method = "GET" ,
639
+ path = self .CHUNK_PATH_WITH_ID_AND_INDEX .format (statement_id , chunk_index ),
640
+ )
641
+ response = GetChunksResponse .from_dict (response_data )
642
+
643
+ links = response .external_links
644
+ link = next ((l for l in links if l .chunk_index == chunk_index ), None )
645
+ if not link :
646
+ raise ServerOperationError (
647
+ f"No link found for chunk index { chunk_index } " ,
648
+ {
649
+ "operation-id" : statement_id ,
650
+ "diagnostic-info" : None ,
651
+ },
652
+ )
653
+
654
+ return link
655
+
654
656
# == Metadata Operations ==
655
657
656
658
def get_catalogs (
@@ -692,7 +694,7 @@ def get_schemas(
692
694
operation = MetadataCommands .SHOW_SCHEMAS .value .format (catalog_name )
693
695
694
696
if schema_name :
695
- operation += f" LIKE ' { schema_name } '"
697
+ operation += MetadataCommands . LIKE_PATTERN . value . format ( schema_name )
696
698
697
699
result = self .execute_command (
698
700
operation = operation ,
@@ -724,17 +726,19 @@ def get_tables(
724
726
if not catalog_name :
725
727
raise ValueError ("Catalog name is required for get_tables" )
726
728
727
- operation = MetadataCommands . SHOW_TABLES . value . format (
729
+ operation = (
728
730
MetadataCommands .SHOW_TABLES_ALL_CATALOGS .value
729
731
if catalog_name in [None , "*" , "%" ]
730
- else MetadataCommands .CATALOG_SPECIFIC .value .format (catalog_name )
732
+ else MetadataCommands .SHOW_TABLES .value .format (
733
+ MetadataCommands .CATALOG_SPECIFIC .value .format (catalog_name )
734
+ )
731
735
)
732
736
733
737
if schema_name :
734
- operation += f" SCHEMA LIKE ' { schema_name } '"
738
+ operation += MetadataCommands . SCHEMA_LIKE_PATTERN . value . format ( schema_name )
735
739
736
740
if table_name :
737
- operation += f" LIKE ' { table_name } '"
741
+ operation += MetadataCommands . LIKE_PATTERN . value . format ( table_name )
738
742
739
743
result = self .execute_command (
740
744
operation = operation ,
@@ -750,7 +754,7 @@ def get_tables(
750
754
)
751
755
assert result is not None , "execute_command returned None in synchronous mode"
752
756
753
- # Apply client-side filtering by table_types if specified
757
+ # Apply client-side filtering by table_types
754
758
from databricks .sql .backend .filters import ResultSetFilter
755
759
756
760
result = ResultSetFilter .filter_tables_by_type (result , table_types )
0 commit comments