@@ -48,6 +48,14 @@ class StreamInfoPath:
48
48
file : str
49
49
50
50
51
+ @dataclass
52
+ class StreamInfoData :
53
+ '''Contains information on accessing ServiceX data via converted data
54
+ '''
55
+ data : Any
56
+ file : str
57
+
58
+
51
59
class ServiceXDataset (ServiceXABC ):
52
60
'''
53
61
Used to access an instance of ServiceX at an end point on the internet. Support convieration
@@ -220,6 +228,16 @@ async def get_data_awkward_async(self, selection_query: str):
220
228
return self ._converter .combine_awkward (await self ._data_return (
221
229
selection_query , lambda f : self ._converter .convert_to_awkward (f )))
222
230
231
+ async def get_data_awkward_stream (self , selection_query : str ):
232
+ async for a in self ._stream_return (selection_query ,
233
+ lambda f : self ._converter .convert_to_awkward (f )):
234
+ yield a
235
+
236
+ async def get_data_pandas_stream (self , selection_query : str ):
237
+ async for a in self ._stream_return (selection_query ,
238
+ lambda f : self ._converter .convert_to_pandas (f )):
239
+ yield a
240
+
223
241
async def get_data_rootfiles_url_stream (self , selection_query : str ) \
224
242
-> AsyncIterator [StreamInfoUrl ]:
225
243
'''Returns, as an async iterator, each completed batch of work from ServiceX.
@@ -345,10 +363,10 @@ async def _data_return(self, selection_query: str,
345
363
data Data converted to the "proper" format, depending
346
364
on the converter call.
347
365
'''
348
- as_data = (( f . file , asyncio . ensure_future ( converter ( f . path )))
349
- async for f in self . _stream_local_files ( selection_query , data_format ))
350
-
351
- all_data = { d [ 0 ]: await d [ 1 ] async for d in as_data }
366
+ all_data = {
367
+ f . file : f . data
368
+ async for f in self . _stream_return ( selection_query , converter , data_format )
369
+ }
352
370
353
371
# Convert them to the proper format
354
372
@@ -358,6 +376,33 @@ async def _data_return(self, selection_query: str,
358
376
359
377
return ordered_data
360
378
379
+ async def _stream_return (self , selection_query : str ,
380
+ converter : Callable [[Path ], Awaitable [Any ]],
381
+ data_format : str = 'root-file' ) -> AsyncIterator [StreamInfoData ]:
382
+ '''Given a query, return the data, in the order it arrives back
383
+ converted as appropriate.
384
+
385
+ For certian types of exceptions, the queries will be repeated. For example,
386
+ if `ServiceX` indicates that it was restarted in the middle of the query, then
387
+ the query will be re-submitted.
388
+
389
+ Arguments:
390
+
391
+ selection_query `qastle` data that makes up the selection request.
392
+ converter A `Callable` that will convert the data returned from
393
+ `ServiceX` as a set of files.
394
+
395
+ Returns:
396
+
397
+ data Data converted to the "proper" format, depending
398
+ on the converter call.
399
+ '''
400
+ as_data = (StreamInfoData (await asyncio .ensure_future (converter (f .path )), f .file )
401
+ async for f in self ._stream_local_files (selection_query , data_format ))
402
+
403
+ async for r in as_data :
404
+ yield r
405
+
361
406
@on_exception (backoff .constant , ServiceXUnknownRequestID , interval = 0.1 , max_tries = 3 )
362
407
async def _stream_local_files (self , selection_query : str ,
363
408
data_format : str = 'root-file' ):
0 commit comments