Skip to content

Commit e85864c

Browse files
feat: Only allocate shapesys parameter if yield and uncrt are positive, nonzero (#775)
- improve handling of shapesys by only allocating nuisance parameters for valid, physically meaningful bins (yield and uncertainty are nonzero and positive) - add documentation about this particular "feature" of shapesys
1 parent 5a73625 commit e85864c

File tree

6 files changed

+274
-14
lines changed

6 files changed

+274
-14
lines changed

docs/likelihood.rst

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,27 @@ shown below:
132132
133133
{ "name": "mod_name", "type": "shapesys", "data": [1.0, 1.5, 2.0] }
134134
135-
An example of an uncorrelated shape modifier with three absolute uncertainty terms for a 3-bin channel.
135+
An example of an uncorrelated shape modifier with three absolute uncertainty
136+
terms for a 3-bin channel.
137+
138+
.. warning::
139+
140+
Nuisance parameters will not be allocated for any bins where either
141+
142+
* the samples nominal expected rate is zero, or
143+
* the absolute uncertainty is zero.
144+
145+
These values are, in the context of uncorrelated shape uncertainties,
146+
unphysical. If this situation occurs, one needs to go back and understand
147+
the inputs as this is undefined behavior in HistFactory.
148+
149+
The previous example will allocate three nuisance parameters for ``mod_name``.
150+
The following example will allocate only two nuisance parameters for a 3-bin
151+
channel:
152+
153+
.. code:: json
154+
155+
{ "name": "mod_name", "type": "shapesys", "data": [1.0, 0.0, 2.0] }
136156
137157
Correlated Shape (histosys)
138158
~~~~~~~~~~~~~~~~~~~~~~~~~~~

src/pyhf/modifiers/shapesys.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,24 @@
1313
class shapesys(object):
1414
@classmethod
1515
def required_parset(cls, sample_data, modifier_data):
16+
# count the number of bins with nonzero, positive yields
17+
valid_bins = [
18+
(sample_bin > 0 and modifier_bin > 0)
19+
for sample_bin, modifier_bin in zip(modifier_data, sample_data)
20+
]
21+
n_parameters = sum(valid_bins)
1622
return {
1723
'paramset_type': constrained_by_poisson,
18-
'n_parameters': len(sample_data),
24+
'n_parameters': n_parameters,
1925
'modifier': cls.__name__,
2026
'is_constrained': cls.is_constrained,
2127
'is_shared': False,
22-
'inits': (1.0,) * len(sample_data),
23-
'bounds': ((1e-10, 10.0),) * len(sample_data),
28+
'inits': (1.0,) * n_parameters,
29+
'bounds': ((1e-10, 10.0),) * n_parameters,
2430
# nb: auxdata/factors set by finalize. Set to non-numeric to crash
2531
# if we fail to set auxdata/factors correctly
26-
'auxdata': (None,) * len(sample_data),
27-
'factors': (None,) * len(sample_data),
32+
'auxdata': (None,) * n_parameters,
33+
'factors': (None,) * n_parameters,
2834
}
2935

3036

@@ -66,17 +72,36 @@ def __init__(self, shapesys_mods, pdfconfig, mega_mods, batch_size=None):
6672
(len(shapesys_mods), self.batch_size or 1, 1),
6773
)
6874
# access field is shape (sys, batch, globalbin)
69-
for s, syst_access in enumerate(self._access_field):
70-
for t, batch_access in enumerate(syst_access):
71-
selection = self.param_viewer.index_selection[s][t]
72-
for b, bin_access in enumerate(batch_access):
73-
self._access_field[s, t, b] = (
74-
selection[bin_access] if bin_access < len(selection) else 0
75-
)
75+
76+
# reindex it based on current masking
77+
self._reindex_access_field(pdfconfig)
7678

7779
self._precompute()
7880
events.subscribe('tensorlib_changed')(self._precompute)
7981

82+
def _reindex_access_field(self, pdfconfig):
83+
for syst_index, syst_access in enumerate(self._access_field):
84+
if not pdfconfig.param_set(self._shapesys_mods[syst_index]).n_parameters:
85+
self._access_field[syst_index] = 0
86+
continue
87+
for batch_index, batch_access in enumerate(syst_access):
88+
selection = self.param_viewer.index_selection[syst_index][batch_index]
89+
access_field_for_syst_and_batch = default_backend.zeros(
90+
len(batch_access)
91+
)
92+
singular_sample_index = [
93+
idx
94+
for idx, syst in enumerate(
95+
default_backend.astensor(self._shapesys_mask)[syst_index, :, 0]
96+
)
97+
if any(syst)
98+
][-1]
99+
sample_mask = self._shapesys_mask[syst_index][singular_sample_index][0]
100+
access_field_for_syst_and_batch[sample_mask] = selection
101+
self._access_field[
102+
syst_index, batch_index
103+
] = access_field_for_syst_and_batch
104+
80105
def _precompute(self):
81106
tensorlib, _ = get_backend()
82107
if not self.param_viewer.index_selection:
@@ -91,6 +116,8 @@ def _precompute(self):
91116

92117
def finalize(self, pdfconfig):
93118
for uncert_this_mod, pname in zip(self.__shapesys_uncrt, self._shapesys_mods):
119+
if not pdfconfig.param_set(pname).n_parameters:
120+
continue
94121
unc_nom = default_backend.astensor(
95122
[x for x in uncert_this_mod[:, :, :] if any(x[0][x[0] > 0])]
96123
)

src/pyhf/pdf.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,10 @@ def _nominal_and_modifiers_from_spec(config, spec):
188188
) # broadcasting
189189
elif mtype in ['shapesys', 'staterror']:
190190
uncrt = thismod['data'] if thismod else [0.0] * len(nom)
191-
maskval = [True if thismod else False] * len(nom)
191+
if mtype == 'shapesys':
192+
maskval = [(x > 0 and y > 0) for x, y in zip(uncrt, nom)]
193+
else:
194+
maskval = [True if thismod else False] * len(nom)
192195
mega_mods[key][s]['data']['mask'] += maskval
193196
mega_mods[key][s]['data']['uncrt'] += uncrt
194197
mega_mods[key][s]['data']['nom_data'] += nom

tests/test_combined_modifiers.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,88 @@ def test_normfactor(backend):
504504
assert np.allclose(mod[1, 0, 3], [1.0, 8.0, 8.0])
505505

506506

507+
def test_shapesys_zero(backend):
508+
mc = MockConfig(
509+
par_map={
510+
'SigXsecOverSM': {
511+
'paramset': paramset(n_parameters=1, inits=[0], bounds=[[0, 10]]),
512+
'slice': slice(0, 1),
513+
},
514+
'syst': {
515+
'paramset': paramset(
516+
n_parameters=5, inits=[0] * 5, bounds=[[0, 10]] * 5
517+
),
518+
'slice': slice(1, 6),
519+
},
520+
'syst_lowstats': {
521+
'paramset': paramset(
522+
n_parameters=0, inits=[0] * 0, bounds=[[0, 10]] * 0
523+
),
524+
'slice': slice(6, 6),
525+
},
526+
},
527+
channels=['channel1'],
528+
channel_nbins={'channel1': 6},
529+
par_order=['SigXsecOverSM', 'syst', 'syst_lowstats'],
530+
samples=['signal', 'background'],
531+
)
532+
533+
mega_mods = {
534+
'shapesys/syst': {
535+
'background': {
536+
'type': 'shapesys',
537+
'name': 'syst',
538+
'data': {
539+
'mask': [True, True, False, True, True, True],
540+
'nom_data': [100.0, 90.0, 0.0, 70, 0.1, 50],
541+
'uncrt': [10, 9, 1, 0.0, 0.1, 5],
542+
},
543+
},
544+
'signal': {
545+
'type': 'shapesys',
546+
'name': 'syst',
547+
'data': {
548+
'mask': [False, False, False, False, False, False],
549+
'nom_data': [20.0, 10.0, 5.0, 3.0, 2.0, 1.0],
550+
'uncrt': [10, 9, 1, 0.0, 0.1, 5],
551+
},
552+
},
553+
},
554+
'shapesys/syst_lowstats': {
555+
'background': {
556+
'type': 'shapesys',
557+
'name': 'syst_lowstats',
558+
'data': {
559+
'mask': [False, False, False, False, False, False],
560+
'nom_data': [100.0, 90.0, 0.0, 70, 0.1, 50],
561+
'uncrt': [0, 0, 0, 0, 0, 0],
562+
},
563+
},
564+
'signal': {
565+
'type': 'shapesys',
566+
'name': 'syst',
567+
'data': {
568+
'mask': [False, False, False, False, False, False],
569+
'nom_data': [20.0, 10.0, 5.0, 3.0, 2.0, 1.0],
570+
'uncrt': [10, 9, 1, 0.0, 0.1, 5],
571+
},
572+
},
573+
},
574+
}
575+
hsc = shapesys_combined(
576+
[('syst', 'shapesys'), ('syst_lowstats', 'shapesys')], mc, mega_mods
577+
)
578+
579+
mod = hsc.apply(pyhf.tensorlib.astensor([-10, 1.1, 1.2, 1.3, -20, -30]))
580+
shape = pyhf.tensorlib.shape(mod)
581+
assert shape == (2, 2, 1, 6)
582+
583+
# expect the 'background' sample to have a single masked bin for 'syst'
584+
assert mod[0, 1, 0, 2] == 1.0
585+
# expect the 'background' sample to have all bins masked for 'syst_lowstats'
586+
assert np.all(kappa == 1 for kappa in mod[1, 1, 0])
587+
588+
507589
def test_shapefactor(backend):
508590
mc = MockConfig(
509591
par_map={

tests/test_pdf.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,56 @@ def test_pdf_integration_staterror(backend):
182182
)
183183

184184

185+
def test_pdf_integration_shapesys_zeros(backend):
186+
spec = {
187+
"channels": [
188+
{
189+
"name": "channel1",
190+
"samples": [
191+
{
192+
"data": [20.0, 10.0, 5.0, 3.0, 2.0, 1.0],
193+
"modifiers": [
194+
{"data": None, "name": "mu", "type": "normfactor"}
195+
],
196+
"name": "signal",
197+
},
198+
{
199+
"data": [100.0, 90, 0.0, 70, 0.1, 50],
200+
"modifiers": [
201+
{
202+
"data": [10, 9, 1, 0.0, 0.1, 5],
203+
"name": "syst",
204+
"type": "shapesys",
205+
},
206+
{
207+
"data": [0, 0, 0, 0, 0, 0],
208+
"name": "syst_lowstats",
209+
"type": "shapesys",
210+
},
211+
],
212+
"name": "background1",
213+
},
214+
],
215+
}
216+
]
217+
}
218+
pdf = pyhf.Model(spec)
219+
par_set_syst = pdf.config.param_set('syst')
220+
par_set_syst_lowstats = pdf.config.param_set('syst_lowstats')
221+
222+
assert par_set_syst.n_parameters == 4
223+
assert par_set_syst_lowstats.n_parameters == 0
224+
tensorlib, _ = backend
225+
nominal_sq = tensorlib.power(tensorlib.astensor([100.0, 90, 0.0, 70, 0.1, 50]), 2)
226+
uncerts_sq = tensorlib.power(tensorlib.astensor([10, 9, 1, 0.0, 0.1, 5]), 2)
227+
factors = tensorlib.divide(nominal_sq, uncerts_sq)
228+
indices = tensorlib.astensor([0, 1, 4, 5], dtype='int')
229+
assert pytest.approx(tensorlib.tolist(par_set_syst.factors)) == tensorlib.tolist(
230+
tensorlib.gather(factors, indices)
231+
)
232+
assert getattr(par_set_syst_lowstats, 'factors', None) is None
233+
234+
185235
@pytest.mark.only_numpy
186236
def test_pdf_integration_histosys(backend):
187237
source = json.load(open('validation/data/2bin_histosys_example2.json'))

tests/test_validation.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
import pytest
66
import os
7+
import numpy as np
78

89

910
@pytest.fixture(scope='module')
@@ -846,3 +847,80 @@ def test_import_roundtrip(tmpdir, toplvl, basedir):
846847
assert abs(CLs_obs_after - CLs_obs_before) / CLs_obs_before < tolerance
847848
for result, expected_result in zip(CLs_exp_set_after, CLs_exp_set_before):
848849
assert abs(result - expected_result) / expected_result < tolerance
850+
851+
852+
def test_shapesys_nuisparfilter_validation():
853+
reference_root_results = {
854+
"CLs_exp": [
855+
2.702197937866914e-05,
856+
0.00037099917612576155,
857+
0.004360634386335687,
858+
0.03815031509701916,
859+
0.20203027564155074,
860+
],
861+
"CLs_obs": 0.004360634405484502,
862+
}
863+
null = None
864+
spec = {
865+
"channels": [
866+
{
867+
"name": "channel1",
868+
"samples": [
869+
{
870+
"data": [20, 10],
871+
"modifiers": [
872+
{
873+
"data": null,
874+
"name": "SigXsecOverSM",
875+
"type": "normfactor",
876+
}
877+
],
878+
"name": "signal",
879+
},
880+
{
881+
"data": [100, 10],
882+
"modifiers": [
883+
{"data": [10, 0], "name": "syst", "type": "shapesys"}
884+
],
885+
"name": "background1",
886+
},
887+
],
888+
}
889+
],
890+
"measurements": [
891+
{
892+
"config": {
893+
"parameters": [
894+
{
895+
"auxdata": [1],
896+
"bounds": [[0.5, 1.5]],
897+
"inits": [1],
898+
"name": "lumi",
899+
"sigmas": [0.1],
900+
}
901+
],
902+
"poi": "SigXsecOverSM",
903+
},
904+
"name": "GaussExample",
905+
}
906+
],
907+
"observations": [{"data": [100, 10], "name": "channel1"}],
908+
"version": "1.0.0",
909+
}
910+
w = pyhf.Workspace(spec)
911+
m = w.model(
912+
modifier_settings={
913+
'normsys': {'interpcode': 'code4'},
914+
'histosys': {'interpcode': 'code4p'},
915+
},
916+
)
917+
d = w.data(m)
918+
obs, exp = pyhf.infer.hypotest(1.0, d, m, return_expected_set=True)
919+
pyhf_results = {'CLs_obs': obs[0], 'CLs_exp': [e[0] for e in exp]}
920+
921+
assert np.allclose(
922+
reference_root_results['CLs_obs'], pyhf_results['CLs_obs'], atol=1e-4, rtol=1e-5
923+
)
924+
assert np.allclose(
925+
reference_root_results['CLs_exp'], pyhf_results['CLs_exp'], atol=1e-4, rtol=1e-5
926+
)

0 commit comments

Comments
 (0)