Skip to content

Commit 7ce9d28

Browse files
ease log warnings
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent b765e33 commit 7ce9d28

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

examples/experimental/sea_connector_test.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,8 @@ def test_sea_result_set_with_multiple_chunks():
508508

509509
# Verify results using row IDs instead of row counts
510510
# Calculate the sum of rows from the manifest chunks
511-
manifest_rows_sum = sum(chunk.get('row_count', 0) for chunk in manifest.get('chunks', []))
512-
logger.info(f"Expected rows from manifest chunks: {manifest_rows_sum}")
511+
total_rows_from_manifest = sum(chunk.get('row_count', 0) for chunk in manifest.get('chunks', []))
512+
logger.info(f"Expected rows from manifest chunks: {total_rows_from_manifest}")
513513

514514
# Collect all row IDs to check for duplicates and completeness
515515
all_row_ids = set()
@@ -561,7 +561,7 @@ def test_sea_result_set_with_multiple_chunks():
561561
logger.warning("⚠️ Duplicate row IDs detected")
562562

563563
# Check if we got all expected rows
564-
if max_id == manifest_rows_sum:
564+
if max_id == total_rows_from_manifest:
565565
logger.info("✅ Last row ID matches expected row count from manifest")
566566

567567
# Let's try one more time with a fresh cursor to fetch all rows at once
@@ -603,7 +603,7 @@ def test_sea_result_set_with_multiple_chunks():
603603
logger.warning("⚠️ Arrow results: Duplicate row IDs detected")
604604

605605
# Compare with manifest row count
606-
if arrow_max_id == manifest_rows_sum:
606+
if arrow_max_id == total_rows_from_manifest:
607607
logger.info("✅ Arrow results: Last row ID matches expected row count from manifest")
608608

609609
# Compare with sequential fetch results
@@ -619,8 +619,16 @@ def test_sea_result_set_with_multiple_chunks():
619619
logger.warning(f"IDs only in sequential fetch: {len(only_in_sequential)} rows")
620620

621621
# Check if we got all rows
622-
logger.info(f"Expected rows from manifest chunks: {manifest_rows_sum}")
622+
total_rows_from_manifest = sum(chunk.get('row_count', 0) for chunk in manifest.get('chunks', []))
623+
logger.info(f"Expected rows from manifest chunks: {total_rows_from_manifest}")
623624
logger.info(f"Actual rows in arrow table: {arrow_table.num_rows}")
625+
626+
# Note: The server might return more rows than specified in the manifest due to query optimization
627+
# This is expected behavior and not an error
628+
if arrow_table.num_rows >= total_rows_from_manifest:
629+
logger.info("✅ Retrieved at least as many rows as expected from the manifest")
630+
else:
631+
logger.warning(f"⚠️ Retrieved fewer rows ({arrow_table.num_rows}) than expected from manifest ({total_rows_from_manifest})")
624632
except Exception as e:
625633
logger.error(f"Error fetching all rows as Arrow: {e}")
626634

src/databricks/sql/cloudfetch/downloader.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,13 @@ def run(self) -> DownloadedFile:
113113
else compressed_data
114114
)
115115

116-
# The size of the downloaded file should match the size specified from TSparkArrowResultLink
117-
if len(decompressed_data) != self.link.bytesNum:
118-
logger.debug(
116+
# For compressed files, the decompressed size will be larger than the bytesNum (which is the compressed size)
117+
# Only log a warning if the file is not compressed and sizes don't match
118+
if (
119+
not self.settings.is_lz4_compressed
120+
and len(decompressed_data) != self.link.bytesNum
121+
):
122+
logger.warning(
119123
"ResultSetDownloadHandler: downloaded file size {} does not match the expected value {}".format(
120124
len(decompressed_data), self.link.bytesNum
121125
)

0 commit comments

Comments
 (0)