Skip to content

Commit b1acc5b

Browse files
remove _get_schema_bytes (for now)
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent 398ca70 commit b1acc5b

File tree

1 file changed

+1
-76
lines changed

1 file changed

+1
-76
lines changed

src/databricks/sql/backend/sea/backend.py

Lines changed: 1 addition & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -301,74 +301,6 @@ def get_allowed_session_configurations() -> List[str]:
301301
"""
302302
return list(ALLOWED_SESSION_CONF_TO_DEFAULT_VALUES_MAP.keys())
303303

304-
def _get_schema_bytes(self, sea_response) -> Optional[bytes]:
305-
"""
306-
Extract schema bytes from the SEA response.
307-
308-
For ARROW format, we need to get the schema bytes from the first chunk.
309-
If the first chunk is not available, we need to get it from the server.
310-
311-
Args:
312-
sea_response: The response from the SEA API
313-
314-
Returns:
315-
bytes: The schema bytes or None if not available
316-
"""
317-
import requests
318-
import lz4.frame
319-
320-
# Check if we have the first chunk in the response
321-
result_data = sea_response.get("result", {})
322-
external_links = result_data.get("external_links", [])
323-
324-
if not external_links:
325-
return None
326-
327-
# Find the first chunk (chunk_index = 0)
328-
first_chunk = None
329-
for link in external_links:
330-
if link.get("chunk_index") == 0:
331-
first_chunk = link
332-
break
333-
334-
if not first_chunk:
335-
# Try to fetch the first chunk from the server
336-
statement_id = sea_response.get("statement_id")
337-
if not statement_id:
338-
return None
339-
340-
chunks_response = self.get_chunk_links(statement_id, 0)
341-
if not chunks_response.external_links:
342-
return None
343-
344-
first_chunk = chunks_response.external_links[0].__dict__
345-
346-
# Download the first chunk to get the schema bytes
347-
external_link = first_chunk.get("external_link")
348-
http_headers = first_chunk.get("http_headers", {})
349-
350-
if not external_link:
351-
return None
352-
353-
# Use requests to download the first chunk
354-
http_response = requests.get(
355-
external_link,
356-
headers=http_headers,
357-
verify=self.ssl_options.tls_verify,
358-
)
359-
360-
if http_response.status_code != 200:
361-
raise Error(f"Failed to download schema bytes: {http_response.text}")
362-
363-
# Extract schema bytes from the Arrow file
364-
# The schema is at the beginning of the file
365-
data = http_response.content
366-
if sea_response.get("manifest", {}).get("result_compression") == "LZ4_FRAME":
367-
data = lz4.frame.decompress(data)
368-
369-
# Return the schema bytes
370-
return data
371-
372304
def _results_message_to_execute_response(self, sea_response, command_id):
373305
"""
374306
Convert a SEA response to an ExecuteResponse and extract result data.
@@ -411,13 +343,6 @@ def _results_message_to_execute_response(self, sea_response, command_id):
411343
)
412344
description = columns if columns else None
413345

414-
# Extract schema bytes for Arrow format
415-
schema_bytes = None
416-
format = manifest_data.get("format")
417-
if format == "ARROW_STREAM":
418-
# For ARROW format, we need to get the schema bytes
419-
schema_bytes = self._get_schema_bytes(sea_response)
420-
421346
# Check for compression
422347
lz4_compressed = manifest_data.get("result_compression") == "LZ4_FRAME"
423348

@@ -472,7 +397,7 @@ def _results_message_to_execute_response(self, sea_response, command_id):
472397
has_been_closed_server_side=False,
473398
lz4_compressed=lz4_compressed,
474399
is_staging_operation=False,
475-
arrow_schema_bytes=schema_bytes,
400+
arrow_schema_bytes=None, # to be extracted during fetch phase for ARROW
476401
result_format=manifest_data.get("format"),
477402
)
478403

0 commit comments

Comments
 (0)