Merge branch 'fetch-json-inline' into ext-links-sea

varun-edachali-dbx · varun-edachali-dbx · commit be17812f27c5 · 2025-06-17T03:17:17.000Z
diff --git a/examples/experimental/tests/test_sea_async_query.py b/examples/experimental/tests/test_sea_async_query.py
@@ -77,33 +77,12 @@ def test_sea_async_query_with_cloud_fetch():
 
         logger.info("Query is no longer pending, getting results...")
         cursor.get_async_execution_result()
-
-        # Use a mix of fetch methods to retrieve all rows
-        logger.info("Retrieving data using a mix of fetch methods")
-        
-        # First, get one row with fetchone
-        first_row = cursor.fetchone()
-        if not first_row:
-            logger.error("FAIL: fetchone returned None, expected a row")
-            return False
-        
-        logger.info(f"Successfully retrieved first row with ID: {first_row[0]}")
-        retrieved_rows = [first_row]
-        
-        # Then, get a batch of rows with fetchmany
-        batch_size = 100
-        batch_rows = cursor.fetchmany(batch_size)
-        logger.info(f"Successfully retrieved {len(batch_rows)} rows with fetchmany")
-        retrieved_rows.extend(batch_rows)
-        
-        # Finally, get all remaining rows with fetchall
-        remaining_rows = cursor.fetchall()
-        logger.info(f"Successfully retrieved {len(remaining_rows)} rows with fetchall")
-        retrieved_rows.extend(remaining_rows)
-        
-        # Calculate total row count
-        actual_row_count = len(retrieved_rows)
         
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        logger.info(f"{len(results)} rows retrieved against 100 requested")
+
         logger.info(
             f"Requested {requested_row_count} rows, received {actual_row_count} rows"
         )
@@ -200,33 +179,11 @@ def test_sea_async_query_without_cloud_fetch():
 
         logger.info("Query is no longer pending, getting results...")
         cursor.get_async_execution_result()
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        logger.info(f"{len(results)} rows retrieved against 100 requested")
 
-        # Use a mix of fetch methods to retrieve all rows
-        logger.info("Retrieving data using a mix of fetch methods")
-        
-        # First, get one row with fetchone
-        first_row = cursor.fetchone()
-        if not first_row:
-            logger.error("FAIL: fetchone returned None, expected a row")
-            return False
-        
-        logger.info(f"Successfully retrieved first row with ID: {first_row[0]}")
-        retrieved_rows = [first_row]
-        
-        # Then, get a batch of rows with fetchmany
-        batch_size = 10  # Smaller batch size for non-cloud fetch
-        batch_rows = cursor.fetchmany(batch_size)
-        logger.info(f"Successfully retrieved {len(batch_rows)} rows with fetchmany")
-        retrieved_rows.extend(batch_rows)
-        
-        # Finally, get all remaining rows with fetchall
-        remaining_rows = cursor.fetchall()
-        logger.info(f"Successfully retrieved {len(remaining_rows)} rows with fetchall")
-        retrieved_rows.extend(remaining_rows)
-        
-        # Calculate total row count
-        actual_row_count = len(retrieved_rows)
-        
         logger.info(
             f"Requested {requested_row_count} rows, received {actual_row_count} rows"
         )
diff --git a/examples/experimental/tests/test_sea_sync_query.py b/examples/experimental/tests/test_sea_sync_query.py
@@ -62,46 +62,10 @@ def test_sea_sync_query_with_cloud_fetch():
         logger.info(
             f"Executing synchronous query with cloud fetch to generate {requested_row_count} rows"
         )
-        cursor.execute(query)
-
-        # Use a mix of fetch methods to retrieve all rows
-        logger.info("Retrieving data using a mix of fetch methods")
-        
-        # First, get one row with fetchone
-        first_row = cursor.fetchone()
-        if not first_row:
-            logger.error("FAIL: fetchone returned None, expected a row")
-            return False
-        
-        logger.info(f"Successfully retrieved first row with ID: {first_row[0]}")
-        retrieved_rows = [first_row]
-        
-        # Then, get a batch of rows with fetchmany
-        batch_size = 100
-        batch_rows = cursor.fetchmany(batch_size)
-        logger.info(f"Successfully retrieved {len(batch_rows)} rows with fetchmany")
-        retrieved_rows.extend(batch_rows)
-        
-        # Finally, get all remaining rows with fetchall
-        remaining_rows = cursor.fetchall()
-        logger.info(f"Successfully retrieved {len(remaining_rows)} rows with fetchall")
-        retrieved_rows.extend(remaining_rows)
-        
-        # Calculate total row count
-        actual_row_count = len(retrieved_rows)
-        
-        logger.info(
-            f"Requested {requested_row_count} rows, received {actual_row_count} rows"
-        )
-        
-        # Verify total row count
-        if actual_row_count != requested_row_count:
-            logger.error(
-                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
-            )
-            return False
-        
-        logger.info("PASS: Received correct number of rows with cloud fetch and all fetch methods work correctly")
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        logger.info(f"{len(results)} rows retrieved against 100 requested")
 
         # Close resources
         cursor.close()
@@ -163,56 +127,15 @@ def test_sea_sync_query_without_cloud_fetch():
         # For non-cloud fetch, use a smaller row count to avoid exceeding inline limits
         requested_row_count = 100
         cursor = connection.cursor()
-        query = f"""
-        SELECT 
-            id, 
-            concat('value_', repeat('a', 100)) as test_value
-        FROM range(1, {requested_row_count} + 1) AS t(id)
-        """
-
-        logger.info(
-            f"Executing synchronous query without cloud fetch to generate {requested_row_count} rows"
+        logger.info("Executing synchronous query without cloud fetch: SELECT 100 rows")
+        cursor.execute(
+            "SELECT id, 'test_value_' || CAST(id as STRING) as test_value FROM range(1, 101)"
         )
-        cursor.execute(query)
-
-        # Use a mix of fetch methods to retrieve all rows
-        logger.info("Retrieving data using a mix of fetch methods")
-        
-        # First, get one row with fetchone
-        first_row = cursor.fetchone()
-        if not first_row:
-            logger.error("FAIL: fetchone returned None, expected a row")
-            return False
-        
-        logger.info(f"Successfully retrieved first row with ID: {first_row[0]}")
-        retrieved_rows = [first_row]
-        
-        # Then, get a batch of rows with fetchmany
-        batch_size = 10  # Smaller batch size for non-cloud fetch
-        batch_rows = cursor.fetchmany(batch_size)
-        logger.info(f"Successfully retrieved {len(batch_rows)} rows with fetchmany")
-        retrieved_rows.extend(batch_rows)
-        
-        # Finally, get all remaining rows with fetchall
-        remaining_rows = cursor.fetchall()
-        logger.info(f"Successfully retrieved {len(remaining_rows)} rows with fetchall")
-        retrieved_rows.extend(remaining_rows)
-        
-        # Calculate total row count
-        actual_row_count = len(retrieved_rows)
-        
-        logger.info(
-            f"Requested {requested_row_count} rows, received {actual_row_count} rows"
-        )
-        
-        # Verify total row count
-        if actual_row_count != requested_row_count:
-            logger.error(
-                f"FAIL: Row count mismatch. Expected {requested_row_count}, got {actual_row_count}"
-            )
-            return False
-        
-        logger.info("PASS: Received correct number of rows without cloud fetch and all fetch methods work correctly")
+
+        results = [cursor.fetchone()]
+        results.extend(cursor.fetchmany(10))
+        results.extend(cursor.fetchall())
+        logger.info(f"{len(results)} rows retrieved against 100 requested")
 
         # Close resources
         cursor.close()
diff --git a/src/databricks/sql/result_set.py b/src/databricks/sql/result_set.py
@@ -155,16 +155,6 @@ def fetchall(self) -> List[Row]:
         """Fetch all remaining rows of a query result."""
         pass
 
-    @abstractmethod
-    def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
-        """Fetch the next set of rows as an Arrow table."""
-        pass
-
-    @abstractmethod
-    def fetchall_arrow(self) -> "pyarrow.Table":
-        """Fetch all remaining rows as an Arrow table."""
-        pass
-
     def close(self) -> None:
         """
         Close the result set.
@@ -478,19 +468,13 @@ def __init__(
         # Build the results queue
         results_queue = None
 
+        results_queue = None
         if result_data:
-            from typing import cast, List
-
-            # Convert description to the expected format
-            desc = None
-            if execute_response.description:
-                desc = cast(List[Tuple[Any, ...]], execute_response.description)
-
             results_queue = SeaResultSetQueueFactory.build_queue(
                 result_data,
                 manifest,
-                str(self.statement_id),
-                description=desc,
+                str(execute_response.command_id.to_sea_statement_id()),
+                description=execute_response.description,
                 max_download_threads=sea_client.max_download_threads,
                 ssl_options=sea_client.ssl_options,
                 sea_client=sea_client,
@@ -536,38 +520,6 @@ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
         n_remaining_rows = size - results.num_rows
         self._next_row_index += results.num_rows
 
-        while n_remaining_rows > 0:
-            partial_results = self.results.next_n_rows(n_remaining_rows)
-            results = pyarrow.concat_tables([results, partial_results])
-            n_remaining_rows = n_remaining_rows - partial_results.num_rows
-            self._next_row_index += partial_results.num_rows
-
-        return results
-
-    def fetchall_arrow(self) -> "pyarrow.Table":
-        """
-        Fetch all remaining rows as an Arrow table.
-
-        Returns:
-            PyArrow Table containing all remaining rows
-
-        Raises:
-            ImportError: If PyArrow is not installed
-        """
-        results = self.results.remaining_rows()
-        self._next_row_index += results.num_rows
-
-        # If PyArrow is installed and we have a ColumnTable result, convert it to PyArrow Table
-        # Valid only for metadata commands result set
-        if isinstance(results, ColumnTable) and pyarrow:
-            data = {
-                name: col
-                for name, col in zip(results.column_names, results.column_table)
-            }
-            return pyarrow.Table.from_pydict(data)
-
-        return results
-
     def fetchmany_json(self, size: int):
         """
         Fetch the next set of rows as a columnar table.
@@ -585,15 +537,8 @@ def fetchmany_json(self, size: int):
             raise ValueError(f"size argument for fetchmany is {size} but must be >= 0")
 
         results = self.results.next_n_rows(size)
-        n_remaining_rows = size - len(results)
         self._next_row_index += len(results)
 
-        while n_remaining_rows > 0:
-            partial_results = self.results.next_n_rows(n_remaining_rows)
-            results = results + partial_results
-            n_remaining_rows = n_remaining_rows - len(partial_results)
-            self._next_row_index += len(partial_results)
-
         return results
 
     def fetchall_json(self):
diff --git a/tests/unit/test_sea_result_set.py b/tests/unit/test_sea_result_set.py