Skip to content

Commit 35044d4

Browse files
authored
fix: Uncorrelated shape systematic with proper masking (#766)
* Apply a mask to skip cases where given shapesys modifier doesn't affect any samples * Remove RuntimeError check that is never reached
1 parent a3082bd commit 35044d4

File tree

2 files changed

+41
-36
lines changed

2 files changed

+41
-36
lines changed

src/pyhf/modifiers/shapesys.py

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,13 @@ def __init__(self, shapesys_mods, pdfconfig, mega_mods, batch_size=None):
4949
self._shapesys_mask = [
5050
[[mega_mods[m][s]['data']['mask']] for s in pdfconfig.samples] for m in keys
5151
]
52-
self.__shapesys_uncrt = default_backend.astensor(
52+
self.__shapesys_info = default_backend.astensor(
5353
[
5454
[
5555
[
56-
mega_mods[m][s]['data']['uncrt'],
56+
mega_mods[m][s]['data']['mask'],
5757
mega_mods[m][s]['data']['nom_data'],
58+
mega_mods[m][s]['data']['uncrt'],
5859
]
5960
for s in pdfconfig.samples
6061
]
@@ -84,18 +85,21 @@ def _reindex_access_field(self, pdfconfig):
8485
if not pdfconfig.param_set(self._shapesys_mods[syst_index]).n_parameters:
8586
self._access_field[syst_index] = 0
8687
continue
88+
89+
singular_sample_index = [
90+
idx
91+
for idx, syst in enumerate(
92+
default_backend.astensor(self._shapesys_mask)[syst_index, :, 0]
93+
)
94+
if any(syst)
95+
][-1]
96+
8797
for batch_index, batch_access in enumerate(syst_access):
8898
selection = self.param_viewer.index_selection[syst_index][batch_index]
8999
access_field_for_syst_and_batch = default_backend.zeros(
90100
len(batch_access)
91101
)
92-
singular_sample_index = [
93-
idx
94-
for idx, syst in enumerate(
95-
default_backend.astensor(self._shapesys_mask)[syst_index, :, 0]
96-
)
97-
if any(syst)
98-
][-1]
102+
99103
sample_mask = self._shapesys_mask[syst_index][singular_sample_index][0]
100104
access_field_for_syst_and_batch[sample_mask] = selection
101105
self._access_field[
@@ -115,30 +119,36 @@ def _precompute(self):
115119
self.shapesys_default = tensorlib.ones(tensorlib.shape(self.shapesys_mask))
116120

117121
def finalize(self, pdfconfig):
118-
for uncert_this_mod, pname in zip(self.__shapesys_uncrt, self._shapesys_mods):
122+
# self.__shapesys_info: (parameter, sample, [mask, nominal rate, uncertainty], bin)
123+
for mod_uncert_info, pname in zip(self.__shapesys_info, self._shapesys_mods):
124+
# skip cases where given shapesys modifier affects zero samples
119125
if not pdfconfig.param_set(pname).n_parameters:
120126
continue
121-
unc_nom = default_backend.astensor(
122-
[x for x in uncert_this_mod[:, :, :] if any(x[0][x[0] > 0])]
123-
)
124-
unc = unc_nom[0, 0]
125-
nom = unc_nom[0, 1]
126-
unc_sq = default_backend.power(unc, 2)
127-
nom_sq = default_backend.power(nom, 2)
128-
129-
# the below tries to filter cases in which
130-
# this modifier is not used by checking non
131-
# zeroness.. shoudl probably use mask
132-
numerator = default_backend.where(
133-
unc_sq > 0, nom_sq, default_backend.zeros(unc_sq.shape)
134-
)
135-
denominator = default_backend.where(
136-
unc_sq > 0, unc_sq, default_backend.ones(unc_sq.shape)
137-
)
138-
139-
factors = numerator / denominator
140-
factors = factors[factors > 0]
127+
128+
# identify the information for the sample that the given parameter
129+
# affects. shapesys is not shared, so there should only ever be at
130+
# most one sample
131+
# sample_uncert_info: ([mask, nominal rate, uncertainty], bin)
132+
sample_uncert_info = mod_uncert_info[
133+
default_backend.astensor(
134+
default_backend.sum(mod_uncert_info[:, 0] > 0, axis=1), dtype='bool'
135+
)
136+
][0]
137+
138+
# bin_mask: ([mask], bin)
139+
bin_mask = default_backend.astensor(sample_uncert_info[0], dtype='bool')
140+
# nom_unc: ([nominal, uncertainty], bin)
141+
nom_unc = sample_uncert_info[1:]
142+
143+
# compute gamma**2 and sigma**2
144+
nom_unc_sq = default_backend.power(nom_unc, 2)
145+
# when the nominal rate = 0 OR uncertainty = 0, set = 1
146+
nom_unc_sq[nom_unc_sq == 0] = 1
147+
# divide (gamma**2 / sigma**2) and mask to set factors for only the
148+
# parameters we have allocated
149+
factors = (nom_unc_sq[0] / nom_unc_sq[1])[bin_mask]
141150
assert len(factors) == pdfconfig.param_set(pname).n_parameters
151+
142152
pdfconfig.param_set(pname).factors = default_backend.tolist(factors)
143153
pdfconfig.param_set(pname).auxdata = default_backend.tolist(factors)
144154

src/pyhf/pdf.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,7 @@ def _nominal_and_modifiers_from_spec(config, spec):
195195
mega_mods[key][s]['data']['mask'] += maskval
196196
mega_mods[key][s]['data']['uncrt'] += uncrt
197197
mega_mods[key][s]['data']['nom_data'] += nom
198-
else:
199-
raise RuntimeError(
200-
'not sure how to combine {mtype} into the mega-channel'.format(
201-
mtype=mtype
202-
)
203-
)
198+
204199
sample_dict = {'name': 'mega_{}'.format(s), 'nom': mega_nom}
205200
mega_samples[s] = sample_dict
206201

0 commit comments

Comments
 (0)