@@ -334,6 +334,7 @@ def __init__(
334
334
335
335
# Track the current chunk we're processing
336
336
self ._current_chunk_link : Optional ["ExternalLink" ] = initial_link
337
+ self ._download_current_link ()
337
338
338
339
# Initialize table and position
339
340
self .table = self ._create_next_table ()
@@ -351,8 +352,22 @@ def _convert_to_thrift_link(self, link: "ExternalLink") -> TSparkArrowResultLink
351
352
httpHeaders = link .http_headers or {},
352
353
)
353
354
355
+ def _download_current_link (self ):
356
+ """Download the current chunk link."""
357
+ if not self ._current_chunk_link :
358
+ return None
359
+
360
+ if not self .download_manager :
361
+ logger .debug ("SeaCloudFetchQueue: No download manager, returning" )
362
+ return None
363
+
364
+ thrift_link = self ._convert_to_thrift_link (self ._current_chunk_link )
365
+ self .download_manager .add_link (thrift_link )
366
+
354
367
def _progress_chunk_link (self ):
355
368
"""Progress to the next chunk link."""
369
+ if not self ._current_chunk_link :
370
+ return None
356
371
357
372
next_chunk_index = self ._current_chunk_link .next_chunk_index
358
373
@@ -369,24 +384,19 @@ def _progress_chunk_link(self):
369
384
next_chunk_index , e
370
385
)
371
386
)
387
+ return None
388
+
372
389
logger .debug (
373
390
f"SeaCloudFetchQueue: Progressed to link for chunk { next_chunk_index } : { self ._current_chunk_link } "
374
391
)
392
+ self ._download_current_link ()
375
393
376
394
def _create_next_table (self ) -> Union ["pyarrow.Table" , None ]:
377
395
"""Create next table by retrieving the logical next downloaded file."""
378
396
if not self ._current_chunk_link :
379
- logger .debug ("SeaCloudFetchQueue: No current chunk link, returning None " )
397
+ logger .debug ("SeaCloudFetchQueue: No current chunk link, returning" )
380
398
return None
381
399
382
- logger .debug (
383
- f"SeaCloudFetchQueue: Trying to get downloaded file for chunk { self ._current_chunk_link .chunk_index } "
384
- )
385
-
386
- if self .download_manager :
387
- thrift_link = self ._convert_to_thrift_link (self ._current_chunk_link )
388
- self .download_manager .add_link (thrift_link )
389
-
390
400
row_offset = self ._current_chunk_link .row_offset
391
401
arrow_table = self ._create_table_at_offset (row_offset )
392
402
0 commit comments