Skip to content

Commit c548ed2

Browse files
authored
Add TopCP transformer query (#566)
add query class for TopCP transformer
1 parent 3468b90 commit c548ed2

File tree

8 files changed

+301
-0
lines changed

8 files changed

+301
-0
lines changed

examples/TopCP_Dict.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from servicex import query, dataset, deliver
2+
3+
4+
spec = {
5+
"Sample": [
6+
{
7+
"Name": "TopCP_Dict",
8+
"Dataset": dataset.FileList(
9+
[
10+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
11+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
12+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
13+
]
14+
),
15+
"Query": query.TopCP(reco="reco.yaml", max_events=1000),
16+
}
17+
]
18+
}
19+
20+
print(f"Files: {deliver(spec)}")

examples/TopCP_Typed.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from servicex import Sample, ServiceXSpec, query, dataset, deliver
2+
3+
4+
spec = ServiceXSpec(
5+
Sample=[
6+
Sample(
7+
Name="TopCP_Typed",
8+
Dataset=dataset.FileList(
9+
[
10+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
11+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
12+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
13+
]
14+
),
15+
Query=query.TopCP(reco="reco.yaml", max_events=1000),
16+
)
17+
]
18+
)
19+
20+
print(f"Files: {deliver(spec)}")

examples/config_TopCP.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# File: config_TopCP.yaml
2+
# {"reco_yaml":"examples/reco.yaml", "max_events":"1000"}
3+
4+
Sample:
5+
- Name: TopCP_YAML
6+
Dataset: !FileList
7+
[
8+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1",
9+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1",
10+
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1",
11+
]
12+
Query: !TopCP |
13+
reco=examples/reco.yaml,
14+
max_events="1000"

examples/reco.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
CommonServices:
2+
runSystematics: False
3+
4+
Muons:
5+
- containerName: AnaMuons
6+
WorkingPoint:
7+
- selectionName: loose
8+
quality: Loose
9+
isolation: NonIso
10+
- selectionName: medium
11+
quality: Medium
12+
isolation: NonIso
13+
14+
Output:
15+
treeName: 'reco'
16+
vars: []
17+
metVars: []
18+
containers:
19+
# Format should follow: '<suffix>:<output container>'
20+
mu_: 'AnaMuons'
21+
'': 'EventInfo'
22+
commands: []

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ FuncADL_ATLASxAOD = "servicex.func_adl.func_adl_dataset:FuncADLQuery_ATLASxAOD"
104104
FuncADL_CMS = "servicex.func_adl.func_adl_dataset:FuncADLQuery_CMS"
105105
PythonFunction = "servicex.python_dataset:PythonFunction"
106106
UprootRaw = "servicex.uproot_raw.uproot_raw:UprootRawQuery"
107+
TopCP = "servicex.topcp.topcp:TopCPQuery"
107108

108109
[project.entry-points.'servicex.dataset']
109110
Rucio = "servicex.dataset_identifier:RucioDatasetIdentifier"
@@ -124,3 +125,6 @@ packages = ["servicex"]
124125

125126
[tool.coverage.run]
126127
dynamic_context = "test_function"
128+
129+
[tool.pytest.ini_options]
130+
asyncio_mode = "auto"

servicex/topcp/topcp.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
29+
# pydantic 2 API
30+
31+
import pydantic
32+
from pathlib import Path
33+
34+
from typing import Optional, Union
35+
from ..query_core import QueryStringGenerator
36+
37+
38+
@pydantic.dataclasses.dataclass
39+
class TopCPQuery(QueryStringGenerator):
40+
yaml_tag = "!TopCP"
41+
default_codegen = "topcp"
42+
43+
reco: Optional[Union[Path, str]] = None
44+
"""Path to the reco.yaml"""
45+
parton: Optional[Union[Path, str]] = None
46+
"""Path to the parton.yaml"""
47+
particle: Optional[Union[Path, str]] = None
48+
"""Path to the particle.yaml"""
49+
max_events: Optional[int] = -1
50+
"""Number of events to process"""
51+
no_systematics: Optional[bool] = True
52+
"""Toggles off the computation of systematics"""
53+
no_filter: Optional[bool] = False
54+
"""Save all events regardless of analysis filters (still saves the decision)"""
55+
56+
@pydantic.model_validator(mode="after")
57+
def no_input_yaml(self):
58+
if self.reco is None and self.parton is None and self.particle is None:
59+
raise ValueError("No yaml provided!")
60+
return self
61+
62+
def generate_selection_string(self):
63+
import json
64+
65+
recoYaml = None
66+
if self.reco:
67+
with open(Path(self.reco), "r") as reco_file:
68+
recoYaml = reco_file.read()
69+
70+
partonYaml = None
71+
if self.parton:
72+
with open(Path(self.parton), "r") as parton_file:
73+
partonYaml = parton_file.read()
74+
75+
particleYaml = None
76+
if self.particle:
77+
with open(Path(self.particle), "r") as particle_file:
78+
particleYaml = particle_file.read()
79+
80+
query = {
81+
"reco": recoYaml,
82+
"parton": partonYaml,
83+
"particle": particleYaml,
84+
"max_events": self.max_events,
85+
"no_systematics": self.no_systematics,
86+
"no_filter": self.no_filter,
87+
}
88+
return json.dumps(query)
89+
90+
@classmethod
91+
def from_yaml(cls, _, node):
92+
code = node.value
93+
import re
94+
95+
# Use regex to split key-value pairs
96+
matches = re.findall(r'(\w+)="?(.*?)"?(?:,|$)', code)
97+
98+
# Convert to dictionary
99+
result = {key: value for key, value in matches}
100+
101+
q = cls(**result)
102+
return q

tests/test_databinder.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,13 +453,17 @@ def run_query(input_filenames=None):
453453
Query: !UprootRaw '[{"treename": "nominal"}]'
454454
- Name: ttH7
455455
Dataset: !XRootD root://eosatlas.cern.ch//eos/atlas/path/*/file.root
456+
- Name: ttH8
457+
Dataset: !Rucio user.kchoi:user.kchoi.fcnc_tHq_ML.ttH.v113
458+
Query: !TopCP 'reco="examples/reco.yaml"'
456459
"""
457460
)
458461
f.flush()
459462
result = _load_ServiceXSpec(path)
460463
assert type(result.Sample[0].Query).__name__ == "PythonFunction"
461464
assert type(result.Sample[1].Query).__name__ == "FuncADLQuery_Uproot"
462465
assert type(result.Sample[2].Query).__name__ == "UprootRawQuery"
466+
assert type(result.Sample[7].Query).__name__ == "TopCPQuery"
463467
assert isinstance(result.Sample[3].dataset_identifier, Rucio)
464468
assert (
465469
result.Sample[3].dataset_identifier.did

tests/test_topcp_dataset.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
from servicex.topcp.topcp import TopCPQuery
29+
import tempfile
30+
from pathlib import Path
31+
import os
32+
import json
33+
import pytest
34+
35+
36+
def test_default_keys():
37+
with tempfile.NamedTemporaryFile(mode="w", delete=False) as fp:
38+
test_logfile_path = Path(fp.name)
39+
fp.write(
40+
"""
41+
CommonServices:
42+
runSystematics: False
43+
"""
44+
)
45+
fp.close()
46+
47+
topcp_query = TopCPQuery(reco=test_logfile_path)
48+
query_string = topcp_query.generate_selection_string()
49+
query = json.loads(query_string)
50+
51+
query_keys = [
52+
"reco",
53+
"parton",
54+
"particle",
55+
"max_events",
56+
"no_systematics",
57+
"no_filter",
58+
]
59+
for key in query_keys:
60+
assert key in query, f"Missing key: {key}"
61+
os.remove(test_logfile_path)
62+
63+
64+
def test_yaml_serialization():
65+
with (
66+
tempfile.NamedTemporaryFile(mode="w", delete=False) as f1,
67+
tempfile.NamedTemporaryFile(mode="w", delete=False) as f2,
68+
):
69+
test_parton_yaml = Path(f1.name)
70+
f1.write(
71+
"""
72+
CommonServices:
73+
systematicsHistogram: 'listOfSystematicsPartonLevel'
74+
runSystematics: True
75+
76+
GeneratorLevelAnalysis: {}
77+
78+
PartonHistory:
79+
- histories: 'Ttbar'
80+
"""
81+
)
82+
f1.close()
83+
test_particle_yaml = Path(f2.name)
84+
f2.write(
85+
"""
86+
CommonServices:
87+
systematicsHistogram: 'listOfSystematicsParticleLevel'
88+
runSystematics: True
89+
90+
GeneratorLevelAnalysis: {}
91+
92+
PL_Electrons:
93+
notFromTau: False
94+
PtEtaSelection:
95+
useDressedProperties: True
96+
minPt: 25000.0
97+
maxEta: 2.5
98+
"""
99+
)
100+
f2.close()
101+
102+
topcp_query = TopCPQuery(parton=test_parton_yaml, particle=test_particle_yaml)
103+
query_string = topcp_query.generate_selection_string()
104+
query = json.loads(query_string)
105+
assert (
106+
"systematicsHistogram: 'listOfSystematicsParticleLevel'\n"
107+
in query["particle"]
108+
)
109+
os.remove(test_parton_yaml)
110+
os.remove(test_particle_yaml)
111+
112+
113+
def test_no_yaml():
114+
with pytest.raises(ValueError):
115+
TopCPQuery()

0 commit comments

Comments
 (0)