@@ -38,7 +38,7 @@ class ServiceXDataset(ServiceXABC):
38
38
def __init__ (self ,
39
39
dataset : str ,
40
40
backend_type : Optional [str ] = None ,
41
- image : str = 'sslhep/servicex_func_adl_xaod_transformer:v1.0.0-rc.2' , # NOQA
41
+ image : str = None ,
42
42
max_workers : int = 20 ,
43
43
servicex_adaptor : ServiceXAdaptor = None ,
44
44
minio_adaptor : Union [MinioAdaptor , MinioAdaptorFactory ] = None ,
@@ -60,7 +60,9 @@ def __init__(self,
60
60
will default to xaod, unless you have any endpoint listed
61
61
in your servicex file. It will default to best match there,
62
62
in that case.
63
- image Name of transformer image to use to transform the data
63
+ image Name of transformer image to use to transform the data. If
64
+ left as default, `None`, then the default image for the
65
+ ServiceX backend will be used.
64
66
max_workers Maximum number of transformers to run simultaneously on
65
67
ServiceX.
66
68
servicex_adaptor Object to control communication with the servicex instance
@@ -83,8 +85,8 @@ def __init__(self,
83
85
and `awkward`, including default settings for expected
84
86
datatypes from the backend.
85
87
ignore_cache Always ignore the cache on any query for this dataset. This
86
- is only meaningful if no cache adaptor is provided. Defaults
87
- to false - the cache is used if possible.
88
+ is only meaningful if no cache adaptor is provided.
89
+ Defaults to false - the cache is used if possible.
88
90
89
91
Notes:
90
92
@@ -95,6 +97,8 @@ def __init__(self,
95
97
takes `(total_files, transformed, downloaded, skipped)` as an argument. The
96
98
`total_files` parameter may be `None` until the system knows how many files need to
97
99
be processed (and some files can even be completed before that is known).
100
+ - The full description of calling parameters is recorded in the local cache, including
101
+ things like `image` name and tag.
98
102
'''
99
103
ServiceXABC .__init__ (self , dataset , image , max_workers ,
100
104
status_callback_factory ,
@@ -414,16 +418,20 @@ def _build_json_query(self, selection_query: str, data_type: str) -> Dict[str, s
414
418
Notes:
415
419
- Internal routine.
416
420
'''
421
+ # Items that must always be present
417
422
json_query : Dict [str , str ] = {
418
423
"did" : self ._dataset ,
419
424
"selection" : selection_query ,
420
- "image" : self ._image ,
421
425
"result-destination" : "object-store" ,
422
426
"result-format" : 'parquet' if data_type == 'parquet' else "root-file" ,
423
427
"chunk-size" : '1000' ,
424
428
"workers" : str (self ._max_workers )
425
429
}
426
430
431
+ # Optional items
432
+ if self ._image is not None :
433
+ json_query ['image' ] = self ._image
434
+
427
435
logging .getLogger (__name__ ).debug (f'JSON to be sent to servicex: { str (json_query )} ' )
428
436
429
437
return json_query
0 commit comments