From 9c46399b935e997184232813de2b1497e997834b Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Wed, 14 May 2025 18:46:46 -0500 Subject: [PATCH] Only dispatch requests to Source if we actually will use them The notifications queue in _ranges_or_baskets_to_arrays cannot be aware of chunk requests in flight because the queue only receives the returned chunks. If we have the necessary array in cache, then we exit this function and fire-and-forget chunk requests causing extra network traffic, and more to the point, potential race conditions between receiving chunks and closing the file on teardown. Fixes https://github.com/scikit-hep/coffea/issues/1331 --- src/uproot/behaviors/TBranch.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index ebf86b6e8..7da4d0243 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -3096,8 +3096,6 @@ def _ranges_or_baskets_to_arrays( ): branchid_to_branch[cache_key]._awkward_check(interpretation) - hasbranches._file.source.chunks(ranges, notifications=notifications) - def replace(ranges_or_baskets, original_index, basket): branch, basket_num, range_or_basket = ranges_or_baskets[original_index] ranges_or_baskets[original_index] = branch, basket_num, basket @@ -3176,6 +3174,13 @@ def basket_to_array(basket): else: notifications.put(None) + if len(arrays) == len(branchid_interpretation): + # all arrays are already in the cache + return + + # Request all chunks and then poll notifications queue until we have all the arrays we expect + hasbranches._file.source.chunks(ranges, notifications=notifications) + while len(arrays) < len(branchid_interpretation): obj = notifications.get()