Skip to content

Commit aaba3a9

Browse files
authored
Allow multiple endpoints in a .servicex file (#103)
- Add .servicex entries for doing multiple end-points - Update the documentation Fixes #101 This is a breaking change. What worked before will no longer work!
1 parent add49dd commit aaba3a9

File tree

7 files changed

+121
-29
lines changed

7 files changed

+121
-29
lines changed

.vscode/settings.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"Reconstructor",
1717
"STDM",
1818
"SXPASS",
19+
"SXTYPE",
1920
"SXUSER",
2021
"Servivce",
2122
"Topo",
@@ -39,6 +40,7 @@
3940
"leftfoot",
4041
"linq",
4142
"localds",
43+
"mcrn",
4244
"minio",
4345
"miniouser",
4446
"mino",

README.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,17 @@ If no endpoint is specified, then the library defaults to the developer endpoint
3838
Create a `.servicex` file, in the `yaml` format, in the appropriate place for your work that contains the following:
3939

4040
```yaml
41-
api_endpoint:
42-
endpoint: <your-endpoint>
43-
email: <api-email>
44-
password: <api-password>
41+
api_endpoints:
42+
- endpoint: <your-endpoint>
43+
email: <api-email>
44+
password: <api-password>
45+
type: xaod
4546
```
4647
4748
All strings are expanded using python's [os.path.expand](https://docs.python.org/3/library/os.path.html#os.path.expandvars) method - so `$NAME` and `${NAME}` will work to expand existing environment variables.
4849

50+
You can list multiple end points by repeating the block of 4 dictionary items, but using a different type. For example, `uproot`.
51+
4952
Finally, you can create the objects `ServiceXAdaptor` and `MinioAdaptor` by hand in your code, passing them as arguments to `ServiceXDataset` and inject custom endpoints and credentials, avoiding the configuration system. This is probably only useful for advanced users.
5053

5154
## Usage
@@ -56,7 +59,7 @@ The following lines will return a `pandas.DataFrame` containing all the jet pT's
5659
from servicex import ServiceX
5760
query = "(call ResultTTree (call Select (call SelectMany (call EventDataset (list 'localds:bogus')) (lambda (list e) (call (attr e 'Jets') 'AntiKt4EMTopoJets'))) (lambda (list j) (/ (call (attr j 'pt')) 1000.0))) (list 'JetPt') 'analysis' 'junk.root')"
5861
dataset = "mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00"
59-
ds = ServiceXDataset(dataset)
62+
ds = ServiceXDataset(dataset, 'xaod')
6063
r = ds.get_data_pandas_df(query)
6164
print(r)
6265
```
@@ -140,6 +143,7 @@ Everything is based around the `ServiceXDataset` object. Below is the documentat
140143

141144
```python
142145
ServiceXDataset(dataset: str,
146+
backend_type: Optional[str] = None,
143147
image: str = 'sslhep/servicex_func_adl_xaod_transformer:v0.4',
144148
max_workers: int = 20,
145149
servicex_adaptor: ServiceXAdaptor = None,
@@ -148,12 +152,17 @@ Everything is based around the `ServiceXDataset` object. Below is the documentat
148152
status_callback_factory: Optional[StatusUpdateFactory] = _run_default_wrapper,
149153
local_log: log_adaptor = None,
150154
session_generator: Callable[[], Awaitable[aiohttp.ClientSession]] = None,
151-
config_adaptor: ConfigView = None)
155+
config_adaptor: ConfigView = None):
156+
'''
152157
Create and configure a ServiceX object for a dataset.
153158
154159
Arguments
155160
156161
dataset Name of a dataset from which queries will be selected.
162+
backend_type The type of backend. Used only if we need to find an
163+
end-point. If we do not have a `servicex_adaptor` then this
164+
cannot be null. Possible types are `uproot`, `xaod`,
165+
and anything that finds a match in the `.servicex` file.
157166
image Name of transformer image to use to transform the data
158167
max_workers Maximum number of transformers to run simultaneously on
159168
ServiceX.
@@ -183,6 +192,7 @@ Everything is based around the `ServiceXDataset` object. Below is the documentat
183192
takes `(total_files, transformed, downloaded, skipped)` as an argument. The
184193
`total_files` parameter may be `None` until the system knows how many files need to
185194
be processed (and some files can even be completed before that is known).
195+
'''
186196
```
187197
188198
To get the data use one of the `get_data` method. They all have the same API, differing only by what they return.

scripts/run_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
def run_query(endpoint: Optional[ServiceXAdaptor]) -> None:
1313
ds = ServiceXDataset(
1414
"mc16_13TeV:mc16_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.deriv.DAOD_STDM3.e3601_e5984_s3126_r10201_r10210_p3975_tid20425969_00", # NOQA
15+
backend_type='xaod',
1516
max_workers=100,
1617
servicex_adaptor=endpoint)
1718

servicex/servicex.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class ServiceXDataset(ServiceXABC):
3737
'''
3838
def __init__(self,
3939
dataset: str,
40+
backend_type: Optional[str] = None,
4041
image: str = 'sslhep/servicex_func_adl_xaod_transformer:v1.0.0-rc.2', # NOQA
4142
max_workers: int = 20,
4243
servicex_adaptor: ServiceXAdaptor = None,
@@ -52,6 +53,10 @@ def __init__(self,
5253
Arguments
5354
5455
dataset Name of a dataset from which queries will be selected.
56+
backend_type The type of backend. Used only if we need to find an
57+
end-point. If we do not have a `servicex_adaptor` then this
58+
cannot be null. Possible types are `uproot`, `xaod`,
59+
and anything that finds a match in the `.servicex` file.
5560
image Name of transformer image to use to transform the data
5661
max_workers Maximum number of transformers to run simultaneously on
5762
ServiceX.
@@ -86,6 +91,10 @@ def __init__(self,
8691
status_callback_factory,
8792
)
8893

94+
# Make sure the arguments are reasonable
95+
if backend_type is None and servicex_adaptor is None:
96+
raise ServiceXException('Specify backend_type or servicex_adaptor')
97+
8998
# Get the local settings
9099
config = config_adaptor if config_adaptor is not None \
91100
else ConfigSettings('servicex', 'servicex')
@@ -96,7 +105,9 @@ def __init__(self,
96105
else cache_adaptor
97106

98107
if not servicex_adaptor:
99-
servicex_adaptor = servicex_adaptor_factory(config)
108+
# Given servicex adaptor is none, this should be ok. Fixes type checkers
109+
assert backend_type is not None
110+
servicex_adaptor = servicex_adaptor_factory(config, backend_type)
100111
self._servicex_adaptor = servicex_adaptor
101112

102113
if not minio_adaptor:

servicex/servicex_adaptor.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,6 @@
2020
servicex_status_poll_time = 5.0
2121

2222

23-
def servicex_adaptor_factory(c: ConfigView):
24-
# It is an error if this is not specified somewhere.
25-
endpoint = c['api_endpoint']['endpoint'].as_str_expanded()
26-
27-
# We can default these to "None"
28-
email = c['api_endpoint']['email'].as_str_expanded() if 'email' in c['api_endpoint'] else None
29-
password = c['api_endpoint']['password'].as_str_expanded() if 'password' in c['api_endpoint'] \
30-
else None
31-
return ServiceXAdaptor(endpoint, email, password)
32-
33-
3423
# Low level routines for interacting with a ServiceX instance via the WebAPI
3524
class ServiceXAdaptor:
3625
def __init__(self, endpoint, email=None, password=None):
@@ -260,3 +249,35 @@ async def trap_servicex_failures(stream: AsyncIterator[TransformTuple]) \
260249
f'processed: {processed}).')
261250

262251
yield p
252+
253+
254+
def servicex_adaptor_factory(c: ConfigView, backend_type: str) -> ServiceXAdaptor:
255+
'''Given a configuration and the backend, find an appropriate configuration
256+
for us to grab and create a `servicex_adaptor`.
257+
258+
Args:
259+
c (ConfigView): The config information loaded form files.
260+
backend_type (str): The backend type we need to match
261+
262+
Returns:
263+
[ServiceXAdaptor]: A servicex adaptor.
264+
'''
265+
# Find a list of all endpoints.
266+
# It is an error if this is not specified somewhere.
267+
endpoints = c['api_endpoints']
268+
seen_types = []
269+
for ep in endpoints:
270+
if ep['type'].as_str_expanded() == backend_type:
271+
endpoint = ep['endpoint'].as_str_expanded()
272+
email = ep['email'].as_str_expanded() if 'email' in ep else None
273+
password = ep['password'].as_str_expanded() if 'password' in ep \
274+
else None
275+
276+
# We can default these to "None"
277+
return ServiceXAdaptor(endpoint, email, password)
278+
else:
279+
seen_types.append(ep['type'].as_str_expanded())
280+
281+
# If we are here, we found no matching type.
282+
raise ServiceXException(f'Unable to find type {backend_type} '
283+
f'in configuration. Saw: {", ".join(seen_types)}')

tests/test_servicex.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
from pathlib import Path
3+
from servicex.servicex_adaptor import servicex_adaptor_factory
34
from typing import Optional
45

56
import pandas as pd
@@ -22,8 +23,23 @@ def clean_fname(fname: str):
2223
.replace(':', '_')
2324

2425

25-
def test_default_ctor():
26-
fe.ServiceXDataset('localds://dude')
26+
def test_default_ctor(mocker):
27+
'''Test the default ctor. This requires that a .servicex file be present to work.
28+
'''
29+
factory = mocker.MagicMock(spec=servicex_adaptor_factory)
30+
mocker.patch('servicex.servicex.servicex_adaptor_factory', factory)
31+
32+
fe.ServiceXDataset('localds://dude', "uproot-ftw")
33+
34+
factory.assert_called_once()
35+
assert factory.call_args[0][1] == 'uproot-ftw'
36+
37+
38+
def test_default_ctor_no_type():
39+
with pytest.raises(ServiceXException) as e:
40+
fe.ServiceXDataset('localds://dude')
41+
42+
assert "type" in str(e.value)
2743

2844

2945
@pytest.mark.asyncio

tests/test_servicex_adaptor.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -501,30 +501,61 @@ def test_servicex_adaptor_settings():
501501
from confuse import Configuration
502502
c = Configuration('bogus', 'bogus')
503503
c.clear()
504-
c['api_endpoint']['endpoint'] = 'http://my-left-foot.com:5000'
505-
c['api_endpoint']['email'] = 'thegoodplace@example.com'
506-
c['api_endpoint']['password'] = 'forkingshirtballs'
507-
508-
sx = servicex_adaptor_factory(c)
504+
c['api_endpoints'] = [
505+
{
506+
'type': 'my-type',
507+
'endpoint': 'http://my-left-foot.com:5000',
508+
'email': 'thegoodplace@example.com',
509+
'password': 'forkingshirtballs',
510+
}
511+
]
512+
513+
sx = servicex_adaptor_factory(c, 'my-type')
509514
assert sx._endpoint == 'http://my-left-foot.com:5000'
510515
assert sx._email == 'thegoodplace@example.com'
511516
assert sx._password == 'forkingshirtballs'
512517

513518

519+
def test_servicex_adaptor_settings_wrong_type():
520+
from confuse import Configuration
521+
c = Configuration('bogus', 'bogus')
522+
c.clear()
523+
c['api_endpoints'] = [
524+
{
525+
'type': 'my-type',
526+
'endpoint': 'http://my-left-foot.com:5000',
527+
'email': 'thegoodplace@example.com',
528+
'password': 'forkingshirtballs',
529+
}
530+
]
531+
532+
with pytest.raises(ServiceXException) as e:
533+
servicex_adaptor_factory(c, 'your-type')
534+
535+
assert 'Unable to find type' in str(e.value)
536+
assert 'my-type' in str(e.value)
537+
538+
514539
def test_servicex_adaptor_settings_env():
515540
from confuse import Configuration
516541
c = Configuration('bogus', 'bogus')
517542
c.clear()
518-
c['api_endpoint']['endpoint'] = '${ENDPOINT}:5000'
519-
c['api_endpoint']['email'] = '${SXUSER}'
520-
c['api_endpoint']['password'] = '${SXPASS}'
543+
c['api_endpoints'] = [
544+
{
545+
'type': '${SXTYPE}',
546+
'endpoint': '${ENDPOINT}:5000',
547+
'email': '${SXUSER}',
548+
'password': '${SXPASS}',
549+
}
550+
]
521551

522552
from os import environ
523553
environ['ENDPOINT'] = 'http://tachi.com'
524554
environ['SXUSER'] = 'Holden'
525555
environ['SXPASS'] = 'protomolecule'
556+
environ['SXTYPE'] = 'mcrn'
526557

527-
sx = servicex_adaptor_factory(c)
558+
sx = servicex_adaptor_factory(c, 'mcrn')
528559
assert sx._endpoint == 'http://tachi.com:5000'
529560
assert sx._email == 'Holden'
530561
assert sx._password == 'protomolecule'

0 commit comments

Comments
 (0)