31
31
32
32
33
33
class StreamInfoBase :
34
+ '''Contains base information about results that are streamed back from
35
+ ServiceX.
36
+ '''
34
37
def __init__ (self , file : str ):
35
38
self ._file = file
36
39
37
40
@property
38
41
def file (self ) -> str :
42
+ '''Returns the ServiceX filename
43
+
44
+ This filename is unique in the dataset, and will be the same accross different queries
45
+ against the dataset. It can be used as a key to sort results.
46
+
47
+ Notes:
48
+
49
+ - May contains non-file system characters
50
+
51
+ Returns:
52
+ str: servicex filename
53
+ '''
39
54
return self ._file
40
55
41
56
42
57
class StreamInfoUrl (StreamInfoBase ):
58
+ '''Contains information about results that are streamed back from ServiceX.
59
+ Used when a URL to access the data directly from ServiceX is requested.
60
+ '''
43
61
def __init__ (self , file : str , url : str , bucket : str ):
44
62
super ().__init__ (file )
45
63
self ._url = url
46
64
self ._bucket = bucket
47
65
48
66
@property
49
67
def url (self ) -> str :
68
+ '''URL that can can be used to stream data back from ServiceX.
69
+
70
+ Returns:
71
+ str: The URL of the transformed data for this file.
72
+ '''
50
73
return self ._url
51
74
52
75
@property
@@ -55,22 +78,40 @@ def bucket(self) -> str:
55
78
56
79
57
80
class StreamInfoPath (StreamInfoBase ):
81
+ '''Contains information about results that are streamed back from ServiceX.
82
+ Used when the user has requested streaming, but copying the file locally first.
83
+ '''
58
84
def __init__ (self , file : str , path : Path ):
59
85
super ().__init__ (file )
60
86
self ._path = path
61
87
62
88
@property
63
89
def path (self ) -> Path :
90
+ '''Path the the local file of ServiceX data that represents this query.
91
+
92
+ Returns:
93
+ Path: The path object that points to the data requested.
94
+ '''
64
95
return self ._path
65
96
66
97
67
98
class StreamInfoData (StreamInfoBase ):
99
+ '''Contains information about results that are streamed back from ServiceX.
100
+ Used when data (`pandas` or `awkward`) is requested. The data is downloaded from
101
+ ServiceX, converted into the requested format, and then streamed to the user in these
102
+ chunks. There is a single chunk per file.
103
+ '''
68
104
def __init__ (self , file : str , data : Any ):
69
105
super ().__init__ (file )
70
106
self ._data = data
71
107
72
108
@property
73
109
def data (self ) -> Any :
110
+ '''The `pandas.DataFrame` or `awkward` array return
111
+
112
+ Returns:
113
+ Any: The ServiceX transformed data for this file.
114
+ '''
74
115
return self ._data
75
116
76
117
@@ -246,12 +287,38 @@ async def get_data_awkward_async(self, selection_query: str):
246
287
return self ._converter .combine_awkward (await self ._data_return (
247
288
selection_query , lambda f : self ._converter .convert_to_awkward (f )))
248
289
249
- async def get_data_awkward_stream (self , selection_query : str ):
290
+ async def get_data_awkward_stream (self , selection_query : str ) \
291
+ -> AsyncGenerator [StreamInfoData , None ]:
292
+ '''Returns, as an async iterator, each completed batch of work from Servicex
293
+ as a separate `awkward` array. The data is returned in a `StreamInfoData` object.
294
+
295
+ Args:
296
+ selection_query (str): The `qastle` query for the data to retreive.
297
+
298
+ Yields:
299
+ AsyncIterator[StreamInfoData]: As ServiceX completes the data, and it is downloaded
300
+ to the local machine, the async iterator returns
301
+ a `StreamInfoData` which can be used to access the
302
+ data that has been loaded from the file.
303
+ '''
250
304
async for a in self ._stream_return (selection_query ,
251
305
lambda f : self ._converter .convert_to_awkward (f )):
252
306
yield a
253
307
254
- async def get_data_pandas_stream (self , selection_query : str ):
308
+ async def get_data_pandas_stream (self , selection_query : str ) \
309
+ -> AsyncGenerator [StreamInfoData , None ]:
310
+ '''Returns, as an async iterator, each completed batch of work from Servicex
311
+ as a separate `pandas.DataFrame` array. The data is returned in a `StreamInfoData` object.
312
+
313
+ Args:
314
+ selection_query (str): The `qastle` query for the data to retreive.
315
+
316
+ Yields:
317
+ AsyncIterator[StreamInfoData]: As ServiceX completes the data, and it is downloaded
318
+ to the local machine, the async iterator returns
319
+ a `StreamInfoData` which can be used to access the
320
+ data that has been loaded from the file.
321
+ '''
255
322
async for a in self ._stream_return (selection_query ,
256
323
lambda f : self ._converter .convert_to_pandas (f )):
257
324
yield a
0 commit comments