@@ -106,7 +106,7 @@ def url(self) -> str:
106
106
107
107
@property
108
108
def bucket (self ) -> str :
109
- """Returns the buck name - unique and constant accross transformations.
109
+ """Returns the buck name - unique and constant across transformations.
110
110
Can be used to order the results
111
111
112
112
Returns:
@@ -166,7 +166,7 @@ def data(self) -> Any:
166
166
167
167
class ServiceXDataset (ServiceXABC ):
168
168
"""
169
- Used to access an instance of ServiceX at an end point on the internet. Support convieration
169
+ Used to access an instance of ServiceX at an end point on the internet. Support conversion
170
170
by configuration object `config_adaptor` or by creating the adaptors defined in the `__init__`
171
171
function.
172
172
"""
@@ -210,7 +210,7 @@ def __init__(
210
210
Defaults to object-store, but could be used to save
211
211
results to a posix volume
212
212
servicex_adaptor Object to control communication with the servicex instance
213
- at a particular ip address with certian login credentials.
213
+ at a particular ip address with certain login credentials.
214
214
Will be configured via the `config_adaptor` by default.
215
215
minio_adaptor Object to control communication with the minio servicex
216
216
instance.
@@ -347,7 +347,7 @@ async def get_data_rootfiles_stream(
347
347
access the data.
348
348
349
349
Args:
350
- selection_query (str): The `qastle` query for the data to retreive .
350
+ selection_query (str): The `qastle` query for the data to retrieve .
351
351
352
352
Yields:
353
353
AsyncIterator[StreamInfoPath]: As ServiceX completes the data, and it is downloaded
@@ -375,7 +375,7 @@ async def get_data_parquet_stream(
375
375
access the data.
376
376
377
377
Args:
378
- selection_query (str): The `qastle` query for the data to retreive .
378
+ selection_query (str): The `qastle` query for the data to retrieve .
379
379
380
380
Yields:
381
381
AsyncIterator[StreamInfoPath]: As ServiceX completes the data, and it is downloaded
@@ -393,9 +393,13 @@ async def get_data_parquet_stream(
393
393
async def get_data_pandas_df_async (
394
394
self , selection_query : str , title : Optional [str ] = None
395
395
):
396
+ data_format = self ._return_types [0 ]
396
397
return self ._converter .combine_pandas (
397
398
await self ._data_return (
398
- selection_query , lambda f : self ._converter .convert_to_pandas (f ), title
399
+ selection_query ,
400
+ lambda f : self ._converter .convert_to_pandas (f ),
401
+ title ,
402
+ data_format = data_format ,
399
403
)
400
404
)
401
405
@@ -404,9 +408,13 @@ async def get_data_pandas_df_async(
404
408
async def get_data_awkward_async (
405
409
self , selection_query : str , title : Optional [str ] = None
406
410
):
411
+ data_format = self ._return_types [0 ]
407
412
return self ._converter .combine_awkward (
408
413
await self ._data_return (
409
- selection_query , lambda f : self ._converter .convert_to_awkward (f ), title
414
+ selection_query ,
415
+ lambda f : self ._converter .convert_to_awkward (f ),
416
+ title ,
417
+ data_format = data_format ,
410
418
)
411
419
)
412
420
@@ -417,7 +425,7 @@ async def get_data_awkward_stream(
417
425
as a separate `awkward` array. The data is returned in a `StreamInfoData` object.
418
426
419
427
Args:
420
- selection_query (str): The `qastle` query for the data to retreive .
428
+ selection_query (str): The `qastle` query for the data to retrieve .
421
429
422
430
Yields:
423
431
AsyncIterator[StreamInfoData]: As ServiceX completes the data, and it is downloaded
@@ -437,7 +445,7 @@ async def get_data_pandas_stream(
437
445
as a separate `pandas.DataFrame` array. The data is returned in a `StreamInfoData` object.
438
446
439
447
Args:
440
- selection_query (str): The `qastle` query for the data to retreive .
448
+ selection_query (str): The `qastle` query for the data to retrieve .
441
449
442
450
Yields:
443
451
AsyncIterator[StreamInfoData]: As ServiceX completes the data, and it is downloaded
@@ -539,7 +547,7 @@ async def _file_return(
539
547
Given a query, return the list of files, in a unique order, that hold
540
548
the data for the query.
541
549
542
- For certian types of exceptions, the queries will be repeated. For example,
550
+ For certain types of exceptions, the queries will be repeated. For example,
543
551
if `ServiceX` indicates that it was restarted in the middle of the query, then
544
552
the query will be re-submitted.
545
553
@@ -670,7 +678,7 @@ async def _data_return(
670
678
"""Given a query, return the data, in a unique order, that hold
671
679
the data for the query.
672
680
673
- For certian types of exceptions, the queries will be repeated. For example,
681
+ For certain types of exceptions, the queries will be repeated. For example,
674
682
if `ServiceX` indicates that it was restarted in the middle of the query, then
675
683
the query will be re-submitted.
676
684
@@ -710,7 +718,7 @@ async def _stream_return(
710
718
"""Given a query, return the data, in the order it arrives back
711
719
converted as appropriate.
712
720
713
- For certian types of exceptions, the queries will be repeated. For example,
721
+ For certain types of exceptions, the queries will be repeated. For example,
714
722
if `ServiceX` indicates that it was restarted in the middle of the query, then
715
723
the query will be re-submitted.
716
724
@@ -741,7 +749,7 @@ async def _stream_local_files(
741
749
that contain the results of the query. This is an async generator, and files
742
750
are returned as they arrive.
743
751
744
- For certian types of exceptions, the queries will be repeated. For example,
752
+ For certain types of exceptions, the queries will be repeated. For example,
745
753
if `ServiceX` indicates that it was restarted in the middle of the query, then
746
754
the query will be re-submitted.
747
755
@@ -779,7 +787,7 @@ async def _get_files(
779
787
Return a list of files from servicex as they have been downloaded to this machine. The
780
788
return type is an awaitable that will yield the path to the file.
781
789
782
- For certian types of `ServiceX` failures we will automatically attempt a few retries:
790
+ For certain types of `ServiceX` failures we will automatically attempt a few retries:
783
791
784
792
- When `ServiceX` forgets the query. This sometimes happens when a user submits a
785
793
query, and then disconnects from the network, `ServiceX` is restarted, and then the
@@ -815,7 +823,7 @@ async def _get_files(
815
823
self ._cache .lookup_query_status (request_id )
816
824
)
817
825
818
- # Look up the cache, and then fetch an iterator going thorugh the results
826
+ # Look up the cache, and then fetch an iterator going through the results
819
827
# from either servicex or the cache, depending.
820
828
try :
821
829
cached_files = self ._cache .lookup_files (request_id )
@@ -1052,7 +1060,7 @@ def _build_json_query(
1052
1060
json_query : Dict [str , Union [str , Iterable [str ]]] = {
1053
1061
"selection" : selection_query ,
1054
1062
"result-destination" : self ._result_destination ,
1055
- "result-format" : "parquet" if data_format == "parquet" else "root-file" ,
1063
+ "result-format" : data_format ,
1056
1064
"chunk-size" : "1000" ,
1057
1065
"workers" : str (self ._max_workers ),
1058
1066
}
0 commit comments