Skip to content

Commit c7b1585

Browse files
authored
Merge pull request #263 from ssl-hep/UX_multiple_codegen
Merging the latest production release (2.6.2) changes into the master.
2 parents 59f07c3 + e06b1dd commit c7b1585

File tree

8 files changed

+209
-18
lines changed

8 files changed

+209
-18
lines changed

.vscode/settings.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"aiohttp",
1010
"AOD's",
1111
"asyncio",
12+
"atlasxaod",
1213
"awks",
1314
"AZNLOCTEQ",
1415
"backend's",
@@ -19,6 +20,7 @@
1920
"caplog",
2021
"cernopendata",
2122
"codecov",
23+
"codegen",
2224
"Comming",
2325
"coveragerc",
2426
"DAOD",

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ The file can contain an `api_endpoint` as mentioned earlier. In addition the oth
194194

195195
All strings are expanded using python's [os.path.expand](https://docs.python.org/3/library/os.path.html#os.path.expandvars) method - so `$NAME` and `${NAME}` will work to expand existing environment variables.
196196

197+
For non-standard use cases, the user can specify:
198+
- The code generator that is used by the backend. This is done by passing a `codegen` argument to ServiceXDataset. This argument is normally inherited from the backend type set in `servicex.yaml`, but can be overridden with any valid `codegen` contained in the default type listing. A `codegen` entry can also be added to a backend in the yaml file to use as default.
199+
- The type of backend, using the `backend_type` argument on ServiceXDataset. This overrides the backend type setting in the `servicex.yaml` file.
200+
197201
## Features
198202

199203
Implemented:

servicex/config_default.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@ default_return_data: parquet
3131
backend_types:
3232
- type: xaod
3333
return_data: root-file
34+
codegen: atlasxaod
3435
- type: uproot
3536
return_data: parquet
37+
codegen: uproot
3638
- type: cms_run1_aod
3739
return_data: root-file
40+
codegen: cms
41+
- type: python
42+
return_data: parquet
43+
codegen: python

servicex/servicex.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ def __init__(
175175
self,
176176
dataset: DatasetType,
177177
backend_name: Optional[str] = None,
178+
backend_type: Optional[str] = None,
179+
codegen: Optional[str] = None,
178180
image: Optional[str] = None,
179181
max_workers: int = 20,
180182
result_destination: str = "object-store",
@@ -201,6 +203,13 @@ def __init__(
201203
will default to xaod, unless you have any endpoint listed
202204
in your servicex file. It will default to best match there,
203205
or fail if a name has been given.
206+
backend_type The type of backend. Overrides the `type` in the `yaml`
207+
config file.
208+
codegen The type of code generator passed to the backend to
209+
generate the code that powers the requested transform.
210+
Don't use unless you know what you are doing - sometimes
211+
the return filetype is also needed. Better to use
212+
`backend_type`!
204213
image Name of transformer image to use to transform the data. If
205214
left as default, `None`, then the default image for the
206215
ServiceX backend will be used.
@@ -246,6 +255,7 @@ def __init__(
246255
ServiceXABC.__init__(
247256
self,
248257
dataset,
258+
codegen,
249259
image,
250260
max_workers,
251261
result_destination,
@@ -272,7 +282,9 @@ def __init__(
272282

273283
if not servicex_adaptor:
274284
# Given servicex adaptor is none, this should be ok. Fixes type checkers
275-
end_point, token = config.get_servicex_adaptor_config(backend_name)
285+
end_point, token = config.get_servicex_adaptor_config(
286+
backend_name, backend_type=backend_type
287+
)
276288
servicex_adaptor = ServiceXAdaptor(end_point, token)
277289
self._servicex_adaptor = servicex_adaptor
278290

@@ -292,13 +304,24 @@ def __init__(
292304
else default_client_session
293305
)
294306

295-
self._return_types = [config.get_default_returned_datatype(backend_name)]
307+
self._return_types = [
308+
config.get_default_returned_datatype(
309+
backend_name, backend_type=backend_type
310+
)
311+
]
296312
self._converter = (
297313
data_convert_adaptor
298314
if data_convert_adaptor is not None
299315
else DataConverterAdaptor(self._return_types[0])
300316
)
301317

318+
# TODO - this should not be in the ABC backend since we have to have some intelligence
319+
# in setting it, it turns out.
320+
if self._codegen is None:
321+
self._codegen = config.get_backend_info(
322+
backend_name, "codegen", backend_type=backend_type
323+
)
324+
302325
def first_supported_datatype(
303326
self, datatypes: Union[List[str], str]
304327
) -> Optional[str]:
@@ -1057,12 +1080,15 @@ def _build_json_query(
10571080
assert data_format in g_allowed_formats
10581081

10591082
# Items that must always be present
1083+
codegen = self._codegen if self._codegen is not None else "default"
1084+
10601085
json_query: Dict[str, Union[str, Iterable[str]]] = {
10611086
"selection": selection_query,
10621087
"result-destination": self._result_destination,
10631088
"result-format": data_format,
10641089
"chunk-size": "1000",
10651090
"workers": str(self._max_workers),
1091+
"codegen": codegen,
10661092
}
10671093

10681094
# Add the appropriate did.

servicex/servicex_config.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class ServiceXConfigAdaptor:
1616
def __init__(self, config: Optional[ConfigView] = None):
1717
"""The config needed for the app.
1818
19-
Note: The config is held onto and only queired when the information is required.
19+
Note: The config is held onto and only queried when the information is required.
2020
2121
Args:
2222
config (ConfigView): The config information for the app. If null, then we just use the
@@ -37,8 +37,10 @@ def settings(self) -> ConfigView:
3737
"""
3838
return self._settings
3939

40-
def get_default_returned_datatype(self, backend_name: Optional[str]) -> str:
41-
"""Return the default return data type, given the backend is a certian type.
40+
def get_default_returned_datatype(
41+
self, backend_name: Optional[str], backend_type: Optional[str] = None
42+
) -> str:
43+
"""Return the default return data type, given the backend is a certain type.
4244
4345
Args:
4446
backend_name (Optional[str]): The backend type string (`xaod`, `uproot`, etc)
@@ -47,7 +49,9 @@ def get_default_returned_datatype(self, backend_name: Optional[str]) -> str:
4749
str: The backend datatype, like `root` or `parquet`.
4850
"""
4951
# Check to see if we know about the backend info
50-
r = self.get_backend_info(backend_name, "return_data")
52+
r = self.get_backend_info(
53+
backend_name, "return_data", backend_type=backend_type
54+
)
5155

5256
if r is None:
5357
raise ServiceXException(
@@ -57,22 +61,27 @@ def get_default_returned_datatype(self, backend_name: Optional[str]) -> str:
5761

5862
return r
5963

60-
def get_backend_info(self, backend_name: Optional[str], key: str) -> Optional[str]:
64+
def get_backend_info(
65+
self, backend_name: Optional[str], key: str, backend_type: Optional[str] = None
66+
) -> Optional[str]:
6167
"""Find an item in the backend info, searching first for the backend
6268
name/type and then the defaults with a given type.
6369
6470
Args:
6571
backend_name (str): Backend name
6672
key (str): The key for the info we are after
73+
backend_type (str): The backend type, if we are overriding normal lookup
6774
6875
Returns:
6976
Optional[str]: Return a string for the info we are after, or return None if we can't
7077
find it.
7178
"""
72-
config = self._get_backend_info(backend_name)
79+
config = self._get_backend_info(backend_name, backend_type)
7380
return config[key] if key in config else None
7481

75-
def _get_backend_info(self, backend_name: Optional[str]) -> Dict[str, str]:
82+
def _get_backend_info(
83+
self, backend_name: Optional[str], backend_type: Optional[str]
84+
) -> Dict[str, str]:
7685
"""Returns all the info for a backend name/type.
7786
7887
Search algorithm is non-trivial:
@@ -89,6 +98,8 @@ def _get_backend_info(self, backend_name: Optional[str]) -> Dict[str, str]:
8998
9099
Args:
91100
backend_name (str): Name or type of the api end point we are going to look up.
101+
backend_type (str): Override the backend type, if we are looking for a specific
102+
type behavior.
92103
93104
Returns:
94105
Dict[str, str]: Attributes for this backend's configuration
@@ -159,8 +170,14 @@ def _get_backend_info(self, backend_name: Optional[str]) -> Dict[str, str]:
159170
)
160171

161172
# Now, extract the type and see if we can figure out any defaults from the
162-
# `backend_types` info.
163-
type_lookup = config["type"] if "type" in config else backend_name
173+
# `backend_types` info. Skip this if we have a type we are passed in.
174+
type_lookup = (
175+
backend_type
176+
if backend_type is not None
177+
else config["type"]
178+
if "type" in config
179+
else backend_name
180+
)
164181
if type_lookup is None:
165182
return config
166183

@@ -181,7 +198,9 @@ def _get_backend_info(self, backend_name: Optional[str]) -> Dict[str, str]:
181198
return config
182199

183200
def get_servicex_adaptor_config(
184-
self, backend_name: Optional[str] = None
201+
self,
202+
backend_name: Optional[str] = None,
203+
backend_type: Optional[str] = None,
185204
) -> Tuple[str, Optional[str]]:
186205
"""Return the servicex (endpoint, token) from a given backend configuration.
187206
@@ -193,7 +212,7 @@ def get_servicex_adaptor_config(
193212
Tuple[str, str]: The tuple of info to create a `ServiceXAdaptor`: end point,
194213
token (optionally).
195214
"""
196-
config = self._get_backend_info(backend_name)
215+
config = self._get_backend_info(backend_name, backend_type)
197216

198217
endpoint = config["endpoint"]
199218
token = config["token"] if "token" in config else None

servicex/servicexabc.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class ServiceXABC(ABC):
2929
def __init__(
3030
self,
3131
dataset: DatasetType,
32+
codegen: Optional[str] = None,
3233
image: Optional[str] = None,
3334
max_workers: int = 20,
3435
result_destination: str = "object-store",
@@ -40,6 +41,8 @@ def __init__(
4041
Arguments
4142
4243
dataset Name of a dataset from which queries will be selected.
44+
codegen The type of code generator passed to the backend to
45+
generate the code that powers the requested transform.
4346
image Name of transformer image to use to transform the data. If
4447
None the default implementation is used.
4548
cache_adaptor Runs the caching for data and queries that are sent up and
@@ -65,6 +68,7 @@ def __init__(
6568
be processed (and some files can even be completed before that is known).
6669
"""
6770
self._dataset = dataset
71+
self._codegen = codegen
6872
self._image = image
6973
self._max_workers = max_workers
7074
self._result_destination = result_destination

tests/test_servicex.py

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,12 @@ def test_default_ctor(mocker):
4949

5050
fe.ServiceXDataset("localds://dude", "uproot-ftw", config_adaptor=config)
5151

52-
config.get_servicex_adaptor_config.assert_called_with("uproot-ftw")
53-
config.get_default_returned_datatype.assert_called_with("uproot-ftw")
52+
config.get_servicex_adaptor_config.assert_called_with(
53+
"uproot-ftw", backend_type=None
54+
)
55+
config.get_default_returned_datatype.assert_called_with(
56+
"uproot-ftw", backend_type=None
57+
)
5458

5559

5660
def test_sx_name(mocker):
@@ -79,8 +83,8 @@ def test_default_ctor_no_type(mocker):
7983

8084
fe.ServiceXDataset("localds://dude", config_adaptor=config)
8185

82-
config.get_servicex_adaptor_config.assert_called_with(None)
83-
config.get_default_returned_datatype.assert_called_with(None)
86+
config.get_servicex_adaptor_config.assert_called_with(None, backend_type=None)
87+
config.get_default_returned_datatype.assert_called_with(None, backend_type=None)
8488

8589

8690
def test_default_ctor_cache(mocker):
@@ -166,6 +170,21 @@ def test_get_datatypes_good(mocker):
166170
assert r.first_supported_datatype(["root", "parquet"]) == "root"
167171

168172

173+
def test_get_datatypes_background_type(mocker):
174+
"Test that we return a good datatype"
175+
176+
# config = mocker.MagicMock(spec=ServiceXConfigAdaptor)
177+
# config.settings = Configuration("servicex", "servicex")
178+
# config.get_servicex_adaptor_config.return_value = (
179+
# "http://no-way.dude",
180+
# "no_spoon_there_is",
181+
# )
182+
183+
r = fe.ServiceXDataset("localds://dude", "uproot", backend_type="xaod")
184+
185+
assert r.first_supported_datatype(["root-file", "parquet"]) == "root-file"
186+
187+
169188
def test_get_datatypes_single(mocker):
170189
"Test that we return a good datatype"
171190

@@ -716,8 +735,8 @@ async def test_stream_root_files_from_minio(mocker):
716735
assert r.file == "one_minio_entry"
717736
assert r.url == "http://the.url.com"
718737

719-
assert mock_servicex_adaptor.query_json["result-format"] == "root-file"
720738
assert mock_minio_adaptor.access_called_with == ("123-456", "one_minio_entry")
739+
assert mock_servicex_adaptor.query_json["result-format"] == "root-file"
721740

722741

723742
@pytest.mark.asyncio
@@ -1300,6 +1319,77 @@ async def test_no_title_spec(mocker, good_awkward_file_data):
13001319
assert "title" not in called
13011320

13021321

1322+
@pytest.mark.asyncio
1323+
async def test_codegen_override(mocker, good_awkward_file_data):
1324+
mock_cache = build_cache_mock(mocker)
1325+
mock_logger = mocker.MagicMock(spec=log_adaptor)
1326+
mock_servicex_adaptor = MockServiceXAdaptor(mocker, "123-456")
1327+
mock_minio_adaptor = MockMinioAdaptor(mocker, files=["one_minio_entry"])
1328+
data_adaptor = mocker.MagicMock(spec=DataConverterAdaptor)
1329+
1330+
ds = fe.ServiceXDataset(
1331+
"localds://mc16_tev:13",
1332+
servicex_adaptor=mock_servicex_adaptor, # type: ignore
1333+
minio_adaptor=mock_minio_adaptor, # type: ignore
1334+
cache_adaptor=mock_cache,
1335+
data_convert_adaptor=data_adaptor,
1336+
local_log=mock_logger,
1337+
max_workers=50,
1338+
codegen="good_codegen",
1339+
)
1340+
await ds.get_data_rootfiles_async("(valid qastle string)")
1341+
1342+
called = mock_servicex_adaptor.query_json
1343+
assert called["codegen"] == "good_codegen"
1344+
1345+
1346+
@pytest.mark.asyncio
1347+
async def test_codegen_backend_type(mocker, good_awkward_file_data):
1348+
mock_cache = build_cache_mock(mocker)
1349+
mock_logger = mocker.MagicMock(spec=log_adaptor)
1350+
mock_servicex_adaptor = MockServiceXAdaptor(mocker, "123-456")
1351+
mock_minio_adaptor = MockMinioAdaptor(mocker, files=["one_minio_entry"])
1352+
data_adaptor = mocker.MagicMock(spec=DataConverterAdaptor)
1353+
1354+
ds = fe.ServiceXDataset(
1355+
"localds://mc16_tev:13",
1356+
servicex_adaptor=mock_servicex_adaptor, # type: ignore
1357+
minio_adaptor=mock_minio_adaptor, # type: ignore
1358+
cache_adaptor=mock_cache,
1359+
data_convert_adaptor=data_adaptor,
1360+
local_log=mock_logger,
1361+
max_workers=50,
1362+
backend_type="uproot",
1363+
)
1364+
await ds.get_data_rootfiles_async("(valid qastle string)")
1365+
1366+
called = mock_servicex_adaptor.query_json
1367+
assert called["codegen"] == "uproot"
1368+
1369+
1370+
@pytest.mark.asyncio
1371+
async def test_codegen_default_by_backend(mocker, good_awkward_file_data):
1372+
mock_cache = build_cache_mock(mocker)
1373+
mock_logger = mocker.MagicMock(spec=log_adaptor)
1374+
mock_servicex_adaptor = MockServiceXAdaptor(mocker, "123-456")
1375+
mock_minio_adaptor = MockMinioAdaptor(mocker, files=["one_minio_entry"])
1376+
data_adaptor = mocker.MagicMock(spec=DataConverterAdaptor)
1377+
1378+
ds = fe.ServiceXDataset(
1379+
"localds://mc16_tev:13",
1380+
servicex_adaptor=mock_servicex_adaptor, # type: ignore
1381+
minio_adaptor=mock_minio_adaptor, # type: ignore
1382+
cache_adaptor=mock_cache,
1383+
data_convert_adaptor=data_adaptor,
1384+
local_log=mock_logger,
1385+
max_workers=50,
1386+
)
1387+
await ds.get_data_rootfiles_async("(valid qastle string)")
1388+
1389+
called = mock_servicex_adaptor.query_json
1390+
assert called["codegen"] == "atlasxaod"
1391+
1392+
13031393
@pytest.mark.asyncio
13041394
@pytest.mark.parametrize(
13051395
"n_ds, n_query", [(1, 4), (4, 1), (1, 100), (100, 1), (4, 4), (20, 20)]

0 commit comments

Comments
 (0)