Skip to content

Commit 054776d

Browse files
committed
Changes for more generic query support, uproot-raw
1 parent a7f11f5 commit 054776d

File tree

4 files changed

+153
-17
lines changed

4 files changed

+153
-17
lines changed

servicex/databinder_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from pydantic import BaseModel, Field, root_validator, constr, validator
3131

3232
from servicex.dataset_identifier import RucioDatasetIdentifier, FileListDataset
33-
from servicex.func_adl import func_adl_dataset
33+
from servicex.query import Query as SXQuery, QueryStringGenerator
3434

3535

3636
class Sample(BaseModel):
@@ -40,7 +40,7 @@ class Sample(BaseModel):
4040
XRootDFiles: Optional[Union[str, List[str]]]
4141
NFiles: Optional[int] = Field(default=None)
4242
Function: Optional[Union[str, Callable]] = Field(default=None)
43-
Query: Optional[Union[str, func_adl_dataset.Query]] = Field(default=None)
43+
Query: Optional[Union[str, SXQuery, QueryStringGenerator]] = Field(default=None)
4444
Tree: Optional[str] = Field(default=None)
4545
IgnoreLocalCache: bool = False
4646

servicex/query.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,3 +492,32 @@ async def as_signed_urls_async(self, display_progress: bool = True,
492492
dataset_group=dataset_group)
493493

494494
as_signed_urls = make_sync(as_signed_urls_async)
495+
496+
497+
class QueryStringGenerator(ABC):
498+
'''This abstract class just defines an interface to give the selection string'''
499+
@abc.abstractmethod
500+
def generate_selection_string(self) -> str:
501+
pass
502+
503+
504+
class GenericQueryStringGenerator(QueryStringGenerator):
505+
'''Return the string from the initializer'''
506+
def __init__(self, query: str):
507+
self.query = query
508+
509+
def generate_selection_string(self) -> str:
510+
return self.query
511+
512+
513+
class GenericQuery(Query):
514+
'''
515+
This class gives a "generic" Query object which doesn't require
516+
overloading the constructor
517+
'''
518+
query_string_generator: Optional[QueryStringGenerator] = None
519+
520+
def generate_selection_string(self) -> str:
521+
if self.query_string_generator is None:
522+
raise RuntimeError('query string generator not set')
523+
return self.query_string_generator.generate_selection_string()

servicex/servicex_client.py

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,14 @@
2525
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2626
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28-
from typing import Optional, List, TypeVar, Any, Type
28+
from typing import Optional, List, TypeVar, Any, Type, Union
2929

3030
from servicex.configuration import Configuration
3131
from servicex.func_adl.func_adl_dataset import FuncADLQuery
3232
from servicex.models import ResultFormat, TransformStatus
3333
from servicex.query_cache import QueryCache
3434
from servicex.servicex_adapter import ServiceXAdapter
35+
from servicex.query import Query, GenericQuery, QueryStringGenerator, GenericQueryStringGenerator
3536
from servicex.types import DID
3637
from servicex.python_dataset import PythonQuery
3738
from servicex.dataset_group import DatasetGroup
@@ -57,21 +58,26 @@ def get_codegen(_sample: Sample, _general: General):
5758
datasets = []
5859
for sample in config.Sample:
5960
if sample.Query:
60-
if type(sample.Query) is str:
61-
qastle_query = qastle.python_ast_to_text_ast(ast.parse(sample.Query)) # NOQA E501
62-
sample.Query = FuncADLQuery()
61+
# if string or QueryStringGenerator, turn into a Query
62+
if isinstance(sample.Query, str) or isinstance(sample.Query, QueryStringGenerator):
63+
sample.Query = sx.generic_query(dataset_identifier=sample.dataset_identifier,
64+
title=sample.Name,
65+
codegen=get_codegen(sample, config.General),
66+
result_format=config.General.OutputFormat,
67+
ignore_cache=sample.IgnoreLocalCache,
68+
query=sample.Query)
69+
# query._q_ast = sample.Query._q_ast
70+
# query._item_type = sample.Query._item_type
71+
if isinstance(sample.Query, FuncADLQuery):
72+
query = sx.func_adl_dataset(sample.dataset_identifier, sample.Name,
73+
get_codegen(sample, config.General),
74+
config.General.OutputFormat)
75+
query._q_ast = sample.Query._q_ast
76+
query._item_type = sample.Query._item_type
77+
if sample.Tree:
78+
query = query.set_tree(sample.Tree)
79+
sample.Query = query
6380

64-
sample.Query.set_provided_qastle(qastle_query)
65-
66-
query = sx.func_adl_dataset(sample.dataset_identifier, sample.Name,
67-
get_codegen(sample, config.General),
68-
config.General.OutputFormat)
69-
query._q_ast = sample.Query._q_ast
70-
query._item_type = sample.Query._item_type
71-
if sample.Tree:
72-
query = query.set_tree(sample.Tree)
73-
74-
sample.Query = query
7581
sample.Query.ignore_cache = sample.IgnoreLocalCache
7682

7783
datasets.append(sample.Query)
@@ -258,3 +264,49 @@ def python_dataset(
258264
result_format=result_format,
259265
ignore_cache=ignore_cache
260266
)
267+
268+
def generic_query(
269+
self,
270+
dataset_identifier: DID,
271+
codegen: str,
272+
query: Union[str, QueryStringGenerator],
273+
title: str = "ServiceX Client",
274+
result_format: ResultFormat = ResultFormat.parquet,
275+
ignore_cache: bool = False
276+
) -> Query:
277+
r"""
278+
Generate a Query object for a generic codegen specification
279+
280+
:param dataset_identifier: The dataset identifier or filelist to be the source of files
281+
:param title: Title to be applied to the transform. This is also useful for
282+
relating transform results.
283+
:param codegen: Name of the code generator to use with this transform
284+
:param result_format: Do you want Paqrquet or Root? This can be set later with
285+
the set_result_format method
286+
:param ignore_cache: Ignore the query cache and always run the query
287+
:return: A Query object
288+
289+
"""
290+
291+
if codegen not in self.code_generators:
292+
raise NameError(
293+
f"{codegen} code generator not supported by serviceX "
294+
f"deployment at {self.servicex.url}"
295+
)
296+
297+
if isinstance(query, str):
298+
query = GenericQueryStringGenerator(query)
299+
if not isinstance(query, QueryStringGenerator):
300+
raise ValueError("query argument must be string or QueryStringGenerator")
301+
302+
qobj = GenericQuery(dataset_identifier=dataset_identifier,
303+
sx_adapter=self.servicex,
304+
title=title,
305+
codegen=codegen,
306+
config=self.config,
307+
query_cache=self.query_cache,
308+
result_format=result_format,
309+
ignore_cache=ignore_cache
310+
)
311+
qobj.query_string_generator = query
312+
return qobj

servicex/uproot_raw/uproot_raw.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (c) 2024, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
29+
# pydantic 1.10 API
30+
31+
import pydantic
32+
from typing import List, Union, Mapping, Optional
33+
from ..query import QueryStringGenerator
34+
35+
class SubQuery(pydantic.BaseModel):
36+
treename: Union[Mapping[str,str], List[str], str]
37+
expressions: Optional[Union[List[str], str]]
38+
cut: Optional[str]
39+
filter_name: Optional[Union[List[str], str]]
40+
filter_typename: Optional[Union[List[str], str]]
41+
aliases: Optional[Mapping[str,str]]
42+
copy_histogram: Optional[bool]
43+
44+
45+
@pydantic.dataclasses.dataclass
46+
class UprootRawQuery(QueryStringGenerator):
47+
query: Union[List[SubQuery], SubQuery]
48+
49+
def generate_selection_string(self):
50+
import json
51+
if isinstance(self.query, SubQuery):
52+
final_query = [self.query]
53+
else:
54+
final_query = self.query
55+
return json.dumps([json.loads(_.json()) for _ in final_query])

0 commit comments

Comments
 (0)