Skip to content

Commit 973723f

Browse files
committed
Merge branch 'master' into gordonwatts/issue132
2 parents 6550120 + 29b475d commit 973723f

11 files changed

+133
-143
lines changed

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ Before you can use this library you'll need:
2828

2929
The API access information is normally placed in a `.servicex` file (to keep this confidential information form accidentally getting checked into a public repository). The `servicex` library searches for configuration information in several locations to determine what end-point it should connect to, in the following order:
3030

31-
1. A `.servicex` file in the current working directory
31+
1. A `.servicex` file in the current working directory (it can also be named `servicex.yaml` or `servicex.yml`)
3232
1. A `.servicex` file in the user's home directory (`$HOME` on Linux and Mac, and your profile
3333
directory on Windows).
3434
1. The `config_defaults.yaml` file distributed with the `servicex` package.
3535

36-
If no endpoint is specified, then the library defaults to the developer endpoint, which is `http://localhost:5000` for the web-service API, and `localhost:9000` for the `minio` endpoint. No passwords are required.
36+
If no endpoint is specified, then the library defaults to the developer endpoint, which is `http://localhost:5000` for the web-service API. No passwords are used in this case.
3737

3838
Create a `.servicex` file, in the `yaml` format, in the appropriate place for your work that contains the following (for the `xaod` backend; use `uproot` for the uproot backend):
3939

@@ -136,15 +136,21 @@ do_query(ds) # Cache is not ignored
136136
As mentioned above, the `.servicex` file is read to pull a configuration. The search path for this file:
137137

138138
1. Your current working directory
139-
2. Your home directory
139+
2. Any working directory above your current working directory.
140+
3. Your home directory
141+
142+
The file can be named any of the following (ordered by precedence):
143+
144+
- `servicex.yaml`
145+
- `servicex.yml`
146+
- `.servicex`
140147

141148
The file can contain an `api_endpoint` as mentioned above. In addition the other following things can be put in:
142149

143150
- `cache_path`: Location where queries, data, and a record of queries are written. This should be an absolute path the person running the library has r/w access to. On windows, make sure to escape `\` - and best to follow standard `yaml` conventions and put the path in quotes - especially if it contains a space. Top level yaml item (don't indent it accidentally!). Defaults to `/tmp/servicex` (with the temp directory as appropriate for your platform) Examples:
144151
- Windows: `cache_path: "C:\\Users\\gordo\\Desktop\\cacheme"`
145152
- Linux: `cache_path: "/home/servicex-cache"`
146153

147-
- `minio_endpoint`, `minio_username`, `minio_password` - these are only interesting if you are using a pre-RC2 release of `servicex` - when the `minio` information wasn't part of the API exchange. This feature is depreciated and will be removed around the time `servicex` moves to RC3.
148154
- `backend_types` - a list of yaml dictionaries that contains some defaults for the backends. By default only the `return_data` is there, which for `xaod` is `root` and `uproot` is `parquet`. Allows `servicex` to convert to `pandas.DataFrame` or `awkward` if requested by the user.
149155

150156
All strings are expanded using python's [os.path.expand](https://docs.python.org/3/library/os.path.html#os.path.expandvars) method - so `$NAME` and `${NAME}` will work to expand existing environment variables.

servicex/ConfigSettings.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,35 @@ def _add_local_source(self):
2121
'''
2222
Look for a '.xxx" file in the local directory
2323
'''
24-
p = Path(f'.{self.appname}')
25-
self._add_from_path(p)
24+
self._add_from_path(Path(f'{self.appname}.yaml'), walk_up_tree=True)
25+
self._add_from_path(Path(f'{self.appname}.yml'), walk_up_tree=True)
26+
self._add_from_path(Path(f'.{self.appname}'), walk_up_tree=True)
2627

27-
def _add_from_path(self, p: Path):
28-
if p.exists():
29-
yaml_data = yaml_util.load_yaml(p, loader=self.loader) or {}
30-
self.add(ConfigSource(yaml_data, str(p.resolve())))
28+
# def _add_from_path(self, p: Path):
29+
# if p.exists():
30+
# yaml_data = yaml_util.load_yaml(p, loader=self.loader) or {}
31+
# self.add(ConfigSource(yaml_data, str(p.resolve())))
32+
33+
def _add_from_path(self, p: Path, walk_up_tree: bool = False):
34+
p.resolve()
35+
name = p.name
36+
dir = p.parent.resolve()
37+
while True:
38+
f = dir / name
39+
if f.exists():
40+
yaml_data = yaml_util.load_yaml(f, loader=self.loader) or {}
41+
self.add(ConfigSource(yaml_data, str(f)))
42+
break
43+
if not walk_up_tree:
44+
break
45+
if dir == dir.parent:
46+
break
47+
dir = dir.parent
3148

3249
def _add_home_source(self):
3350
'''
3451
Look for a '.xxx" file in the local directory
3552
'''
53+
self._add_from_path(Path.home() / f'{self.appname}.yaml')
54+
self._add_from_path(Path.home() / f'{self.appname}.yml')
3655
self._add_from_path(Path.home() / f'.{self.appname}')

servicex/cache.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
_ignore_cache = False
99

10+
# Make sure that generated download path names are below this to avoid os errors
11+
MAX_PATH_LEN = 235
12+
1013

1114
@contextmanager
1215
def ignore_cache():
@@ -175,6 +178,11 @@ def data_file_location(self, request_id: str, data_name: str) -> Path:
175178
'''
176179
Return the path to the file that should be written out for this
177180
data_name. This is where the output file should get stored.
181+
Truncate the leftmost characters from filenames to avoid throwing a
182+
OSError: [Errno 63] File name too long error Assume that the most unique part of
183+
the name is the right hand side
178184
'''
179-
(self._path / 'data' / request_id).mkdir(parents=True, exist_ok=True)
180-
return self._path / 'data' / request_id / sanitize_filename(data_name)
185+
parent = self._path / 'data' / request_id
186+
parent.mkdir(parents=True, exist_ok=True)
187+
sanitized = sanitize_filename(data_name)
188+
return parent / sanitized[-1 * (MAX_PATH_LEN - len(parent.name)):]

servicex/config_default.yaml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,6 @@ api_endpoints:
55
- endpoint: http://localhost:5000
66
# token: xxx
77

8-
# These are default settings, depreciated, and should not be used.
9-
# They will be removed in the next version.
10-
api_endpoint:
11-
minio_endpoint: localhost:9000
12-
# The username and password for accessing files generated by servicex.
13-
# NOTE:
14-
# 1. If minio_username and password are specified they will be used.
15-
# 2. If only an endpoint username password is specified they will be used.
16-
# 3. If nothing is specified, then the default_xxxx username passwords will be used.
17-
# minio_password: xxxx
18-
# minio_username: xxxx
19-
default_minio_username: miniouser
20-
default_minio_password: leftfoot1
21-
228
# This is the path of the cache. The "/tmp" will be translated, platform appropriate.
239
cache_path: /tmp/servicex
2410

servicex/minio_adaptor.py

Lines changed: 7 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,11 @@
2828
import asyncio
2929
from concurrent.futures.thread import ThreadPoolExecutor
3030
from pathlib import Path
31-
from typing import Any, AsyncIterator, Optional, cast, Dict
31+
from typing import Any, AsyncIterator, List, Optional, Dict, cast
3232
import logging
3333

3434
import backoff
3535
from backoff import on_exception
36-
from confuse import ConfigView
3736
from minio import Minio, ResponseError
3837

3938
from .utils import ServiceXException
@@ -59,8 +58,8 @@ def __init__(self, minio_endpoint: str,
5958
secure=self._secured)
6059

6160
@on_exception(backoff.constant, ResponseError, interval=0.1)
62-
def get_files(self, request_id):
63-
return [f.object_name for f in self._client.list_objects(request_id)]
61+
def get_files(self, request_id) -> List[str]:
62+
return [str(f.object_name) for f in self._client.list_objects(request_id)]
6463

6564
def get_access_url(self, request_id: str, object_name: str) -> str:
6665
'''Given a request ID and the file name in that request id, return a URL
@@ -122,8 +121,7 @@ class MinioAdaptorFactory:
122121
'''A factor that will return, when asked, the proper minio adaptor to use for a request
123122
to get files from ServiceX.
124123
'''
125-
def __init__(self, c: Optional[ConfigView] = None,
126-
always_return: Optional[MinioAdaptor] = None):
124+
def __init__(self, always_return: Optional[MinioAdaptor] = None):
127125
'''Create the factor with a possible adaptor to always use
128126
129127
Args:
@@ -133,8 +131,6 @@ def __init__(self, c: Optional[ConfigView] = None,
133131
# Get the defaults setup.
134132
self._always = always_return
135133
self._config_adaptor = None
136-
if self._always is None and c is not None:
137-
self._config_adaptor = self._from_config(c)
138134

139135
def from_best(self, transaction_info: Optional[Dict[str, str]] = None) -> MinioAdaptor:
140136
'''Using the information we have, create the proper Minio Adaptor with the correct
@@ -162,38 +158,8 @@ def from_best(self, transaction_info: Optional[Dict[str, str]] = None) -> MinioA
162158
bool(transaction_info['minio-secured']),
163159
transaction_info['minio-access-key'],
164160
transaction_info['minio-secret-key'])
165-
if self._config_adaptor is not None:
166-
logging.getLogger(__name__).debug('Using the config-file minio_adaptor')
167-
return self._config_adaptor
168-
raise ServiceXException("Do not know how to create a Minio Login info")
169-
170-
def _from_config(self, c: ConfigView) -> MinioAdaptor:
171-
'''Extract the Minio config information from the config file(s). This will be used
172-
if minio login information isn't returned from the request.
173-
174-
Args:
175-
c (ConfigView): The loaded config
176-
177-
Returns:
178-
MinioAdaptor: The adaptor that uses the config's login information.
179-
'''
180-
c_api = c['api_endpoint']
181-
end_point = cast(str, c_api['minio_endpoint'].as_str_expanded())
182-
183-
# Grab the username and password if they are explicitly listed.
184-
if 'minio_username' in c_api:
185-
username = c_api['minio_username'].as_str_expanded()
186-
password = c_api['minio_password'].as_str_expanded()
187-
elif 'username' in c_api:
188-
username = c_api['username'].as_str_expanded()
189-
password = c_api['password'].as_str_expanded()
190-
else:
191-
username = c_api['default_minio_username'].as_str_expanded()
192-
password = c_api['default_minio_password'].as_str_expanded()
193-
194-
return MinioAdaptor(end_point,
195-
access_key=cast(str, username),
196-
secretkey=cast(str, password))
161+
raise ServiceXException("Do not know or have enough information to create a Minio "
162+
f"Login info ({transaction_info})")
197163

198164

199165
async def find_new_bucket_files(adaptor: MinioAdaptor,
@@ -206,7 +172,7 @@ async def find_new_bucket_files(adaptor: MinioAdaptor,
206172
seen = []
207173
async for _ in update:
208174
# Sadly, this is blocking, and so may hold things up
209-
files = adaptor.get_files(request_id)
175+
files = cast(List[str], adaptor.get_files(request_id))
210176

211177
# If there are new files, pass them on
212178
for f in files:

servicex/servicex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,8 @@ async def _get_files(self, selection_query: str, data_type: str,
572572
yield r
573573

574574
# Cache the final status
575-
await self._update_query_status(client, request_id)
575+
if cached_files is None:
576+
await self._update_query_status(client, request_id)
576577

577578
except ServiceXUnknownRequestID as e:
578579
self._cache.remove_query(query)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
author_email="gwatts@uw.edu",
3333
maintainer="Gordon Watts (IRIS-HEP/UW Seattle)",
3434
maintainer_email="gwatts@uw.edu",
35-
url="https://github.com/iris-hep/func_adl_xAOD",
35+
url="https://github.com/ssl-hep/ServiceX_frontend",
3636
license="TBD",
3737
python_requires='>=3.6',
3838
test_suite="tests",
@@ -47,7 +47,7 @@
4747
'make_it_sync==1.0.0',
4848
'google-auth==1.17',
4949
'confuse==1.3.0',
50-
'pyarrow>=1.0, <2.0'
50+
'pyarrow>=1.0, <3.0'
5151
],
5252
extras_require={
5353
'test': [

tests/test_ConfigSettings.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,59 @@ def test_home_file_default(confuse_config):
4848

4949
finally:
5050
config_file.unlink()
51+
52+
53+
def test_local_more_important_than_home(confuse_config):
54+
'Make sure that we pick the local directory over the home one'
55+
local_config_file = Path('.servicex_test_settings')
56+
home_config_file = Path.home() / '.servicex_test_settings'
57+
try:
58+
with home_config_file.open('w') as f:
59+
f.write('testing_value: 30\n')
60+
with local_config_file.open('w') as f:
61+
f.write('testing_value: 20\n')
62+
63+
confuse_config.clear()
64+
confuse_config.read()
65+
66+
assert get_it(confuse_config) == 20
67+
68+
finally:
69+
local_config_file.unlink()
70+
home_config_file.unlink()
71+
72+
73+
def test_one_level_up(confuse_config):
74+
'Make sure the config file that is one file up is found'
75+
config_file = Path('.').resolve().parent / ('.servicex_test_settings')
76+
try:
77+
with config_file.open('w') as f:
78+
f.write('testing_value: 30\n')
79+
80+
confuse_config.clear()
81+
confuse_config.read()
82+
83+
assert get_it(confuse_config) == 30
84+
85+
finally:
86+
config_file.unlink()
87+
88+
89+
def test_local_more_importnat_than_one_level_up(confuse_config):
90+
'Assure that our local file is found first'
91+
one_up_config_file = Path('.').resolve().parent / ('.servicex_test_settings')
92+
config_file = Path('.').resolve() / ('.servicex_test_settings')
93+
try:
94+
with config_file.open('w') as f:
95+
f.write('testing_value: 30\n')
96+
with one_up_config_file.open('w') as f:
97+
f.write('testing_value: 20\n')
98+
99+
confuse_config.clear()
100+
confuse_config.read()
101+
102+
assert get_it(confuse_config) == 30
103+
104+
finally:
105+
config_file.unlink()
106+
one_up_config_file.unlink()

tests/test_cache.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import string
2+
import random
13
from servicex.utils import ServiceXException
24
import pytest
35

@@ -148,6 +150,18 @@ def test_data_file_location(tmp_path):
148150
assert str(p).startswith(str(tmp_path))
149151

150152

153+
def test_data_file_location_long_path(tmp_path):
154+
c = Cache(tmp_path)
155+
letters = string.ascii_lowercase
156+
file_significant_name = 'junk.root'
157+
long_file_name = ''.join(random.choice(letters) for i in range(230))
158+
159+
p = c.data_file_location('123-456', long_file_name+file_significant_name)
160+
161+
assert(len(p.name) == 235 - len(p.parent.name))
162+
assert p.name.endswith(file_significant_name)
163+
164+
151165
def test_data_file_location_twice(tmp_path):
152166
c = Cache(tmp_path)
153167
_ = c.data_file_location('123-456', 'junk1.root')

0 commit comments

Comments
 (0)