Skip to content

Commit 00bc94c

Browse files
authored
Add Path-Like Arguments for Querying Files (#87)
* add shortcut-args: directory, path, path-end, path-start, & path-regex * bump circleci to py3.8 (to match Dockerfile) * add filename arg (replace start & end) * "filename" arg fix * add module block comments to tests * add test_argbuilder.py * _handle_path_args() -> _resolve_path_args(): make in-place updates * "directory" & "filename" wildcard bug fix * test_00_path_args now passing * misc * remove needless variables * _resolve_path_args RETURNS the resolved path (& only POPS from kwargs) * add test_13_files_path_like_args() integration test * simplify directory/filename regex
1 parent a68b27f commit 00bc94c

File tree

6 files changed

+237
-26
lines changed

6 files changed

+237
-26
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 2
22
jobs:
33
test:
44
docker:
5-
- image: circleci/python:3.7.2
5+
- image: circleci/python:3.8
66
environment:
77
# Both TEST_DATABASE_HOST and TEST_DATABASE_PORT must be defined.
88
# Things will break if host is defined and port isn't or vice-versa.

file_catalog/argbuilder.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
"""Builder utility functions for arg/kwargs dicts."""
22

3-
from typing import Any, Dict
43

5-
from tornado.escape import json_decode
4+
from typing import Any, Dict, Optional, Union
65

7-
# local imports
86
from file_catalog.mongo import AllKeys
7+
from tornado.escape import json_decode
98

109

1110
def build_limit(kwargs: Dict[str, Any], config: Dict[str, Any]) -> None:
@@ -31,6 +30,34 @@ def build_start(kwargs: Dict[str, Any]) -> None:
3130
raise Exception("start is negative")
3231

3332

33+
def _resolve_path_args(kwargs: Dict[str, Any]) -> Optional[Union[Dict[str, Any], str]]:
34+
"""Resolve the path-type shortcut arguments by precedence.
35+
36+
Pop each key from `kwargs`, even if it's not used.
37+
"""
38+
arg: Optional[Union[Dict[str, Any], str]] = None
39+
40+
# regex
41+
if "path-regex" in kwargs:
42+
arg = {"$regex": kwargs.pop("path-regex")}
43+
44+
# normal path
45+
if "path" in kwargs:
46+
arg = kwargs.pop("path")
47+
if "logical_name" in kwargs:
48+
arg = kwargs.pop("logical_name")
49+
50+
# directory & filename
51+
if "directory" in kwargs or "filename" in kwargs:
52+
if not (dpath := kwargs.pop("directory", "").rstrip("/")):
53+
dpath = r".*"
54+
if not (fname := kwargs.pop("filename", "").lstrip("/")):
55+
fname = r".*"
56+
arg = {"$regex": rf"^{dpath}/(.*/)?{fname}$"}
57+
58+
return arg
59+
60+
3461
def build_files_query(kwargs: Dict[str, Any]) -> None:
3562
"""Build `"query"` dict with formatted/fully-named arguments.
3663
@@ -49,8 +76,8 @@ def build_files_query(kwargs: Dict[str, Any]) -> None:
4976
query["locations.archive"] = None
5077

5178
# shortcut query params
52-
if "logical_name" in kwargs:
53-
query["logical_name"] = kwargs.pop("logical_name")
79+
if path := _resolve_path_args(kwargs):
80+
query["logical_name"] = path
5481
if "run_number" in kwargs:
5582
query["run.run_number"] = kwargs.pop("run_number")
5683
if "dataset" in kwargs:

tests/test_argbuilder.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""Test argbuilder.py functions."""
2+
3+
# pylint: disable=W0212
4+
5+
import pprint
6+
from typing import Any, Dict, List, Optional, TypedDict, Union
7+
8+
from file_catalog import argbuilder
9+
10+
11+
def test_00_path_args() -> None:
12+
"""Test _handle_path_args."""
13+
14+
class KwargsTests(TypedDict): # pylint: disable=C0115
15+
kwargs_in: Dict[str, Any]
16+
kwargs_after: Dict[str, Any]
17+
ret: Optional[Union[Dict[str, Any], str]]
18+
19+
kwargs_test_dicts: List[KwargsTests] = [
20+
# null case
21+
{"kwargs_in": {}, "kwargs_after": {}, "ret": None},
22+
# no path-args
23+
{
24+
"kwargs_in": {"an-extra-argument": [12, 34, 56]},
25+
"kwargs_after": {"an-extra-argument": [12, 34, 56]},
26+
"ret": None,
27+
},
28+
# only "path-regex"
29+
{
30+
"kwargs_in": {"path-regex": r"/reg-ex/this.*/(file/)?path"},
31+
"kwargs_after": {},
32+
"ret": {"$regex": r"/reg-ex/this.*/(file/)?path"},
33+
},
34+
# only "path"
35+
{
36+
"kwargs_in": {"an-extra-argument": [12, 34, 56], "path": "PATH"},
37+
"kwargs_after": {"an-extra-argument": [12, 34, 56]},
38+
"ret": "PATH",
39+
},
40+
# only "logical_name"
41+
{
42+
"kwargs_in": {"logical_name": "LOGICAL_NAME"},
43+
"kwargs_after": {},
44+
"ret": "LOGICAL_NAME",
45+
},
46+
# only "directory"
47+
{
48+
"kwargs_in": {"directory": "/path/to/dir/"},
49+
"kwargs_after": {},
50+
"ret": {"$regex": r"^/path/to/dir/(.*/)?.*$"},
51+
},
52+
# only "directory" w/o trailing '/'
53+
{
54+
"kwargs_in": {"directory": "/path/to/dir"},
55+
"kwargs_after": {},
56+
"ret": {"$regex": r"^/path/to/dir/(.*/)?.*$"},
57+
},
58+
# only "filename"
59+
{
60+
"kwargs_in": {"filename": "my-file"},
61+
"kwargs_after": {},
62+
"ret": {"$regex": r"^.*/(.*/)?my-file$"},
63+
},
64+
# only "filename" w/ a sub-directory
65+
{
66+
"kwargs_in": {"filename": "/sub-dir/my-file"},
67+
"kwargs_after": {},
68+
"ret": {"$regex": r"^.*/(.*/)?sub-dir/my-file$"},
69+
},
70+
# "directory" & "filename"
71+
{
72+
"kwargs_in": {"directory": "/path/to/dir/", "filename": "my-file"},
73+
"kwargs_after": {},
74+
"ret": {"$regex": r"^/path/to/dir/(.*/)?my-file$"},
75+
},
76+
]
77+
78+
for ktd in kwargs_test_dicts:
79+
pprint.pprint(ktd)
80+
print()
81+
assert argbuilder._resolve_path_args(ktd["kwargs_in"]) == ktd["ret"]
82+
assert ktd["kwargs_in"] == ktd["kwargs_after"]
83+
84+
# test multiple path-args (each loop pops the arg of the highest precedence)
85+
args = [ # list in decreasing order of precedence
86+
("directory", "/path/to/dir/", {"$regex": r"^/path/to/dir/(.*/)?.*$"}),
87+
# not testing "filename" b/c that is equal to "directory" in precedence
88+
("logical_name", "LOGICAL_NAME", "LOGICAL_NAME"),
89+
("path", "PATH", "PATH"),
90+
("path-regex", r"this.*is?a.path", {"$regex": r"this.*is?a.path"}),
91+
]
92+
while args:
93+
kwargs = {k: v for (k, v, _) in args}
94+
pprint.pprint(kwargs)
95+
assert argbuilder._resolve_path_args(kwargs) == args[0][2]
96+
assert not kwargs # everything was popped
97+
args.pop(0)

tests/test_collections.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1+
"""Test /api/collections."""
2+
3+
# fmt:off
4+
# pylint: skip-file
5+
16
from __future__ import absolute_import, division, print_function
27

38
import os
49
import unittest
510

611
from rest_tools.client import RestClient
712

8-
from .test_server import TestServerAPI
913
from .test_files import hex
14+
from .test_server import TestServerAPI
15+
1016

1117
class TestCollectionsAPI(TestServerAPI):
1218
def test_10_collections(self):
@@ -183,7 +189,7 @@ def test_71_snapshot_find(self):
183189
self.assertIn('file', data)
184190
url = data['file']
185191
file_uid = url.split('/')[-1]
186-
192+
187193
# old snapshot stays empty
188194
data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid))
189195
self.assertEqual(data['files'], [])
@@ -201,7 +207,7 @@ def test_71_snapshot_find(self):
201207
self.assertEqual(len(data['files']), 1)
202208
self.assertEqual(data['files'][0]['uuid'], file_uid)
203209
self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
204-
210+
205211

206212
if __name__ == '__main__':
207213
suite = unittest.TestLoader().loadTestsFromTestCase(TestStringMethods)

tests/test_files.py

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,28 @@
1+
"""Test /api/files."""
2+
13
# fmt:off
4+
# pylint: skip-file
25

36
from __future__ import absolute_import, division, print_function
47

58
import hashlib
69
import os
710
import unittest
11+
from typing import Any, Dict, List, Optional
812

9-
from tornado.escape import json_decode, json_encode
10-
11-
# local imports
12-
from rest_tools.client import RestClient
13+
from rest_tools.client import RestClient # type: ignore[import]
14+
from tornado.escape import json_encode
1315

1416
from .test_server import TestServerAPI
1517

1618

17-
def hex(data):
19+
def hex(data: Any) -> str:
20+
"""Get sha512."""
1821
if isinstance(data, str):
19-
data = data.encode('utf-8')
22+
data = data.encode("utf-8")
2023
return hashlib.sha512(data).hexdigest()
2124

25+
2226
class TestFilesAPI(TestServerAPI):
2327
def test_10_files(self):
2428
self.start_server()
@@ -111,7 +115,7 @@ def test_12_files_keys(self):
111115
"locations",
112116
"extra",
113117
"supplemental",
114-
"meta_modify_date"
118+
"meta_modify_date",
115119
}
116120

117121
# w/ all-keys = False
@@ -130,7 +134,7 @@ def test_12_files_keys(self):
130134
"locations",
131135
"extra",
132136
"supplemental",
133-
"meta_modify_date"
137+
"meta_modify_date",
134138
}
135139

136140
# w/ all-keys = False & keys
@@ -143,6 +147,78 @@ def test_12_files_keys(self):
143147
data = r.request_seq("GET", "/api/files", args)
144148
assert set(data["files"][0].keys()) == {"checksum", "file_size"}
145149

150+
def test_13_files_path_like_args(self):
151+
"""Test the path-like base/shortcut arguments.
152+
153+
"logical_name", "directory", "filename", "path", & "path-regex".
154+
"""
155+
self.start_server() # type: ignore[no-untyped-call]
156+
token = self.get_token() # type: ignore[no-untyped-call]
157+
r = RestClient(self.address, token, timeout=1, retries=1)
158+
159+
metadata_objs = [
160+
{
161+
"logical_name": "/foo/bar/baz/bat.txt",
162+
"checksum": {"sha512": hex("1")},
163+
"file_size": 1,
164+
"locations": [{"site": "test", "path": "foo/bar/baz/bat.txt"}],
165+
},
166+
{
167+
"logical_name": "/foo/bar/ham.txt",
168+
"checksum": {"sha512": hex("2")},
169+
"file_size": 2,
170+
"locations": [{"site": "test", "path": "/foo/bar/ham.txt"}],
171+
},
172+
{
173+
"logical_name": "/green/eggs/and/ham.txt",
174+
"checksum": {"sha512": hex("3")},
175+
"file_size": 3,
176+
"locations": [{"site": "test", "path": "/green/eggs/and/ham.txt"}],
177+
},
178+
{
179+
"logical_name": "/john/paul/george/ringo/ham.txt",
180+
"checksum": {"sha512": hex("4")},
181+
"file_size": 4,
182+
"locations": [
183+
{"site": "test", "path": "/john/paul/george/ringo/ham.txt"}
184+
],
185+
},
186+
]
187+
for meta in metadata_objs:
188+
r.request_seq("POST", "/api/files", meta)
189+
190+
def get_paths(args: Optional[Dict[str, str]] = None) -> List[str]:
191+
if not args:
192+
args = {}
193+
ret = r.request_seq("GET", "/api/files", args)
194+
print(ret)
195+
return [f["logical_name"] for f in ret["files"]]
196+
197+
assert len(get_paths()) == 4
198+
# logical_name
199+
assert len(get_paths({"logical_name": "/foo/bar/ham.txt"})) == 1
200+
# path
201+
assert len(get_paths({"path": "/green/eggs/and/ham.txt"})) == 1
202+
# directory
203+
paths = get_paths({"directory": "/foo/bar"})
204+
assert set(paths) == {"/foo/bar/ham.txt", "/foo/bar/baz/bat.txt"}
205+
assert len(get_paths({"directory": "/fo"})) == 0
206+
# filename
207+
paths = get_paths({"filename": "ham.txt"})
208+
assert set(paths) == {
209+
"/foo/bar/ham.txt",
210+
"/green/eggs/and/ham.txt",
211+
"/john/paul/george/ringo/ham.txt",
212+
}
213+
assert len(get_paths({"filename": ".txt"})) == 0
214+
# directory & filename
215+
paths = get_paths({"directory": "/foo", "filename": "ham.txt"})
216+
assert paths == ["/foo/bar/ham.txt"]
217+
# path-regex
218+
paths = get_paths({"path-regex": r".*george/ringo.*"})
219+
assert paths == ["/john/paul/george/ringo/ham.txt"]
220+
assert len(get_paths({"path-regex": r".*"})) == 4
221+
146222
def test_15_files_auth(self):
147223
self.start_server(config_override={'SECRET':'secret'})
148224
token = self.get_token()

tests/test_server.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
1+
"""Test REST Server."""
2+
3+
# fmt:off
4+
# pylint: skip-file
5+
16
from __future__ import absolute_import, division, print_function
27

8+
import hashlib
39
import os
4-
import time
5-
import tempfile
6-
import shutil
710
import random
11+
import shutil
812
import subprocess
13+
import tempfile
14+
import time
15+
import unittest
916
from functools import partial
1017
from threading import Thread
11-
import unittest
12-
import hashlib
1318

14-
from tornado.escape import json_encode,json_decode
15-
from tornado.ioloop import IOLoop
1619
import requests
20+
from file_catalog.urlargparse import encode as jquery_encode
1721
from pymongo import MongoClient
22+
from tornado.escape import json_decode, json_encode
23+
from tornado.ioloop import IOLoop
1824

19-
from file_catalog.urlargparse import encode as jquery_encode
2025

2126
class TestServerAPI(unittest.TestCase):
2227
def setUp(self):
@@ -35,7 +40,7 @@ def setUp(self):
3540
'--logpath', dblog])
3641
self.addCleanup(partial(time.sleep, 0.3))
3742
self.addCleanup(m.terminate)
38-
43+
3944
def clean_db(self, host, port):
4045
db = MongoClient(host=host, port=port).file_catalog
4146
colls = db.list_collection_names()

0 commit comments

Comments
 (0)