Skip to content

Commit 6550120

Browse files
committed
Fix up documentation
1 parent f8a1293 commit 6550120

File tree

1 file changed

+69
-2
lines changed

1 file changed

+69
-2
lines changed

servicex/servicex.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,45 @@
3131

3232

3333
class StreamInfoBase:
34+
'''Contains base information about results that are streamed back from
35+
ServiceX.
36+
'''
3437
def __init__(self, file: str):
3538
self._file = file
3639

3740
@property
3841
def file(self) -> str:
42+
'''Returns the ServiceX filename
43+
44+
This filename is unique in the dataset, and will be the same accross different queries
45+
against the dataset. It can be used as a key to sort results.
46+
47+
Notes:
48+
49+
- May contains non-file system characters
50+
51+
Returns:
52+
str: servicex filename
53+
'''
3954
return self._file
4055

4156

4257
class StreamInfoUrl(StreamInfoBase):
58+
'''Contains information about results that are streamed back from ServiceX.
59+
Used when a URL to access the data directly from ServiceX is requested.
60+
'''
4361
def __init__(self, file: str, url: str, bucket: str):
4462
super().__init__(file)
4563
self._url = url
4664
self._bucket = bucket
4765

4866
@property
4967
def url(self) -> str:
68+
'''URL that can can be used to stream data back from ServiceX.
69+
70+
Returns:
71+
str: The URL of the transformed data for this file.
72+
'''
5073
return self._url
5174

5275
@property
@@ -55,22 +78,40 @@ def bucket(self) -> str:
5578

5679

5780
class StreamInfoPath(StreamInfoBase):
81+
'''Contains information about results that are streamed back from ServiceX.
82+
Used when the user has requested streaming, but copying the file locally first.
83+
'''
5884
def __init__(self, file: str, path: Path):
5985
super().__init__(file)
6086
self._path = path
6187

6288
@property
6389
def path(self) -> Path:
90+
'''Path the the local file of ServiceX data that represents this query.
91+
92+
Returns:
93+
Path: The path object that points to the data requested.
94+
'''
6495
return self._path
6596

6697

6798
class StreamInfoData(StreamInfoBase):
99+
'''Contains information about results that are streamed back from ServiceX.
100+
Used when data (`pandas` or `awkward`) is requested. The data is downloaded from
101+
ServiceX, converted into the requested format, and then streamed to the user in these
102+
chunks. There is a single chunk per file.
103+
'''
68104
def __init__(self, file: str, data: Any):
69105
super().__init__(file)
70106
self._data = data
71107

72108
@property
73109
def data(self) -> Any:
110+
'''The `pandas.DataFrame` or `awkward` array return
111+
112+
Returns:
113+
Any: The ServiceX transformed data for this file.
114+
'''
74115
return self._data
75116

76117

@@ -246,12 +287,38 @@ async def get_data_awkward_async(self, selection_query: str):
246287
return self._converter.combine_awkward(await self._data_return(
247288
selection_query, lambda f: self._converter.convert_to_awkward(f)))
248289

249-
async def get_data_awkward_stream(self, selection_query: str):
290+
async def get_data_awkward_stream(self, selection_query: str) \
291+
-> AsyncGenerator[StreamInfoData, None]:
292+
'''Returns, as an async iterator, each completed batch of work from Servicex
293+
as a separate `awkward` array. The data is returned in a `StreamInfoData` object.
294+
295+
Args:
296+
selection_query (str): The `qastle` query for the data to retreive.
297+
298+
Yields:
299+
AsyncIterator[StreamInfoData]: As ServiceX completes the data, and it is downloaded
300+
to the local machine, the async iterator returns
301+
a `StreamInfoData` which can be used to access the
302+
data that has been loaded from the file.
303+
'''
250304
async for a in self._stream_return(selection_query,
251305
lambda f: self._converter.convert_to_awkward(f)):
252306
yield a
253307

254-
async def get_data_pandas_stream(self, selection_query: str):
308+
async def get_data_pandas_stream(self, selection_query: str) \
309+
-> AsyncGenerator[StreamInfoData, None]:
310+
'''Returns, as an async iterator, each completed batch of work from Servicex
311+
as a separate `pandas.DataFrame` array. The data is returned in a `StreamInfoData` object.
312+
313+
Args:
314+
selection_query (str): The `qastle` query for the data to retreive.
315+
316+
Yields:
317+
AsyncIterator[StreamInfoData]: As ServiceX completes the data, and it is downloaded
318+
to the local machine, the async iterator returns
319+
a `StreamInfoData` which can be used to access the
320+
data that has been loaded from the file.
321+
'''
255322
async for a in self._stream_return(selection_query,
256323
lambda f: self._converter.convert_to_pandas(f)):
257324
yield a

0 commit comments

Comments
 (0)