Skip to content

Commit 0e1ab4d

Browse files
authored
Merge pull request #1325 from cta-observatory/tailcut_finder
Tailcut finder function and script
2 parents 14201de + 563bc26 commit 0e1ab4d

File tree

3 files changed

+343
-36
lines changed

3 files changed

+343
-36
lines changed

lstchain/image/cleaning.py

Lines changed: 248 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1+
import glob
2+
import logging
13
import numpy as np
4+
from pathlib import Path
5+
from lstchain.paths import run_to_dl1_filename
6+
from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
7+
from lstchain.io.io import dl1_params_tel_mon_cal_key
8+
from lstchain.io.config import get_standard_config
9+
from ctapipe.containers import EventType
10+
from scipy.stats import median_abs_deviation
211

3-
__all__ = ['apply_dynamic_cleaning']
12+
from ctapipe.io import read_table
13+
__all__ = ['apply_dynamic_cleaning',
14+
'find_tailcuts',
15+
'pic_th']
16+
17+
log = logging.getLogger(__name__)
418

519

620
def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
@@ -36,3 +50,236 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
3650
mask_dynamic_cleaning = (image >= dynamic_threshold) & signal_pixels
3751

3852
return mask_dynamic_cleaning
53+
54+
55+
def find_tailcuts(input_dir, run_number):
56+
"""
57+
This function uses DL1 files to determine tailcuts which are adequate
58+
for the bulk of the pixels in a given run. The script also returns the
59+
suggested NSB adjustment needed in the "dark-sky" MC to match the data.
60+
The function uses the median (for the whole camera, excluding outliers)
61+
of the 95% quantile of the pixel charge for pedestal events to deduce the
62+
NSB level. It is good to use a large quantile of the pixel charge
63+
distribution (vs. e.g. using the median) because what matters for having
64+
a realistic noise simulation is the tail on the right-side, i.e. for
65+
large pulses.
66+
For reasons of stability & simplicity of analysis, we cannot decide the
67+
cleaning levels (or the NSB tuning) on a subrun-by-subrun basis. We select
68+
values which are more or less valid for the whole run.
69+
70+
The script will process a subset of the subruns (~10, hardcoded) of the run,
71+
distributed uniformly through it.
72+
73+
Parameters
74+
----------
75+
input_dir: `Path`
76+
directory where the DL1 files (subrun-wise, i.e., including
77+
DL1a) are stored
78+
79+
run_number : int
80+
run number to be processed
81+
82+
Returns
83+
-------
84+
additional_nsb_rate : float
85+
p.e./ns rate of NSB to be added to "dark MC" to match the noise in the data
86+
newconfig : dict
87+
cleaning configuration for running the DL1ab stage
88+
"""
89+
90+
# subrun-wise dl1 file names:
91+
dl1_filenames = Path(input_dir,
92+
run_to_dl1_filename(1, run_number, 0).replace(
93+
'.0000.h5', '.????.h5'))
94+
all_dl1_files = glob.glob(str(dl1_filenames))
95+
all_dl1_files.sort()
96+
97+
# Number of median absolute deviations (mad) away from the median that a
98+
# value has to be to be considered an outlier:
99+
mad_max = 5 # would exclude 8e-4 of the pdf for a gaussian
100+
101+
# Minimum number of interleaved pedestals in subrun to proceed with
102+
# calculation:
103+
min_number_of_ped_events = 100
104+
105+
# Minimum number of valid pixels to consider the calculation of NSB level
106+
# acceptable:
107+
min_number_of_valid_pixels = 1000
108+
109+
# Approx. maximum number of subruns (uniformly distributed through the
110+
# run) to be processed:
111+
max_number_of_processed_subruns = 10
112+
# Keep only ~max_number_of_processed_subruns subruns, distributed
113+
# along the run:
114+
dl1_files = all_dl1_files[::int(1+len(all_dl1_files) /
115+
max_number_of_processed_subruns)]
116+
117+
number_of_pedestals = []
118+
median_ped_qt95_pix_charge = []
119+
120+
for dl1_file in dl1_files:
121+
log.info('\nInput file: %s', dl1_file)
122+
123+
data_parameters = read_table(dl1_file, dl1_params_lstcam_key)
124+
event_type_data = data_parameters['event_type'].data
125+
pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value
126+
127+
num_pedestals = pedestal_mask.sum()
128+
if num_pedestals < min_number_of_ped_events:
129+
log.warning(f' Too few interleaved pedestals ('
130+
f'{num_pedestals}) - skipped subrun!')
131+
continue
132+
133+
number_of_pedestals.append(pedestal_mask.sum())
134+
data_images = read_table(dl1_file, dl1_images_lstcam_key)
135+
data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key)
136+
# data_calib['unusable_pixels'] , indices: (Gain Calib_id Pixel)
137+
138+
# Get the "unusable" flags from the pedcal file:
139+
unusable_hg = data_calib['unusable_pixels'][0][0]
140+
unusable_lg = data_calib['unusable_pixels'][0][1]
141+
142+
unreliable_pixels = unusable_hg | unusable_lg
143+
if unreliable_pixels.sum() > 0:
144+
log.info(f' Removed {unreliable_pixels.sum()/unreliable_pixels.size:.2%} of pixels '
145+
f'due to unreliable calibration!')
146+
147+
reliable_pixels = ~unreliable_pixels
148+
149+
charges_data = data_images['image']
150+
charges_pedestals = charges_data[pedestal_mask]
151+
# pixel-wise 95% quantile of ped pix charge through the subrun
152+
# (#pixels):
153+
qt95_pix_charge = np.nanquantile(charges_pedestals, 0.95, axis=0)
154+
# ignore pixels with 0 signal:
155+
qt95_pix_charge = np.where(qt95_pix_charge > 0, qt95_pix_charge, np.nan)
156+
# median of medians across camera:
157+
median_qt95_pix_charge = np.nanmedian(qt95_pix_charge)
158+
# mean abs deviation of pixel qt95 values:
159+
qt95_pix_charge_dev = median_abs_deviation(qt95_pix_charge,
160+
nan_policy='omit')
161+
162+
# Just a cut to remove outliers (pixels):
163+
outliers = (np.abs(qt95_pix_charge - median_qt95_pix_charge) /
164+
qt95_pix_charge_dev) > mad_max
165+
166+
if outliers.sum() > 0:
167+
removed_fraction = outliers.sum() / outliers.size
168+
log.info(f' Removed {removed_fraction:.2%} of pixels (outliers) '
169+
f'from pedestal median calculation')
170+
171+
# Now compute the median (for the whole camera) of the qt95's (for
172+
# each pixel) of the charges in pedestal events. Use only reliable
173+
# pixels for this, and exclude outliers:
174+
n_valid_pixels = np.isfinite(qt95_pix_charge[reliable_pixels]).sum()
175+
if n_valid_pixels < min_number_of_valid_pixels:
176+
logging.warning(f' Too few valid pixels ({n_valid_pixels}) for '
177+
f'calculation!')
178+
median_ped_qt95_pix_charge.append(np.nan)
179+
else:
180+
median_ped_qt95_pix_charge.append(np.nanmedian(qt95_pix_charge[
181+
reliable_pixels &
182+
~outliers]))
183+
# convert to ndarray:
184+
median_ped_qt95_pix_charge = np.array(median_ped_qt95_pix_charge)
185+
number_of_pedestals = np.array(number_of_pedestals)
186+
187+
# Now compute the median for all processed subruns, which is more robust
188+
# against e.g. subruns affected by car flashes. We exclude subruns
189+
# which have less than half of the median statistics per subrun.
190+
good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals)
191+
192+
# First check if we have any valid subruns at all:
193+
if np.isfinite(median_ped_qt95_pix_charge[good_stats]).sum() == 0:
194+
qped = np.nan
195+
qped_dev = np.nan
196+
additional_nsb_rate = np.nan
197+
log.error(f'No valid computation was possible for run {run_number} with any of the processed subruns!')
198+
return qped, additional_nsb_rate, None
199+
200+
qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats])
201+
not_outlier = np.zeros(len(median_ped_qt95_pix_charge), dtype='bool')
202+
# Now we also remove outliers (subruns) if any:
203+
qped_dev = median_abs_deviation(median_ped_qt95_pix_charge[good_stats])
204+
not_outlier = (np.abs(median_ped_qt95_pix_charge - qped) /
205+
qped_dev) < mad_max
206+
207+
if (~good_stats).sum() > 0:
208+
log.info(f'\nNumber of subruns with low statistics: '
209+
f'{(~good_stats).sum()} - removed from pedestal median '
210+
f'calculation')
211+
if (~not_outlier).sum() > 0:
212+
log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns '
213+
f'(out of {not_outlier.size}) from pedestal median '
214+
f'calculation')
215+
216+
# If less than half of the processed files are valid for the final calculation,
217+
# we declare it unsuccessful (data probably have problems):
218+
if (good_stats & not_outlier).sum() < 0.5 * len(dl1_files):
219+
qped = np.nan
220+
additional_nsb_rate = np.nan
221+
log.error(f'Calculation failed for more than half of the processed subruns of run {run_number}!')
222+
return qped, additional_nsb_rate, None
223+
224+
225+
# recompute with good-statistics and well-behaving runs:
226+
qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats & not_outlier])
227+
log.info(f'\nNumber of subruns used in calculations: '
228+
f'{(good_stats & not_outlier).sum()}')
229+
230+
picture_threshold = pic_th(qped)
231+
boundary_threshold = picture_threshold / 2
232+
233+
# We now create a .json files with recommended image cleaning
234+
# settings for lstchain_dl1ab.
235+
newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
236+
# casts below are needed, json does not like numpy's int64:
237+
newconfig['picture_thresh'] = int(picture_threshold)
238+
newconfig['boundary_thresh'] = int(boundary_threshold)
239+
240+
additional_nsb_rate = get_nsb(qped)
241+
242+
return qped, additional_nsb_rate, newconfig
243+
244+
245+
def pic_th(qt95_ped):
246+
"""
247+
Parameters
248+
----------
249+
qt95_ped : float
250+
95% quantile of pixel charge in pedestal events (for the standard
251+
LocalPeakWindowSearch algo & settings in lstchain)
252+
253+
Returns
254+
-------
255+
int
256+
recommended picture threshold for image cleaning (from a table)
257+
258+
"""
259+
mp_edges = np.array([5.85, 7.25, 8.75, 10.3, 11.67])
260+
picture_threshold = np.array([8, 10, 12, 14, 16, 18])
261+
262+
if qt95_ped >= mp_edges[-1]:
263+
return picture_threshold[-1]
264+
return picture_threshold[np.where(mp_edges > qt95_ped)[0][0]]
265+
266+
267+
def get_nsb(qt95_ped):
268+
"""
269+
Parameters
270+
----------
271+
qt95_ped: `float`
272+
95% quantile of pixel charge in pedestal events
273+
274+
Returns
275+
-------
276+
float
277+
(from a parabolic parametrization) the recommended additional NSB
278+
(in p.e. / ns) that has to be added to the "dark MC" waveforms in
279+
order to match the data for which the 95% quantile of pedestal pixel
280+
charge is qt95_ped
281+
282+
"""
283+
params = [3.95147396, 0.12642504, 0.01718627]
284+
return (params[1] * (qt95_ped - params[0]) +
285+
params[2] * (qt95_ped - params[0]) ** 2)

lstchain/scripts/lstchain_dvr_pixselector.py

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,7 @@
2727
The program creates in the output directory (which is the current by default)
2828
a file DVR_settings_LST-1.Run12469.h5 which contains a table "run_summary"
2929
which includes the DVR algorithm parameters determined for each processed
30-
subrun. It also creates a file with recommended cleaning settings for running
31-
DL1ab, based on the NSB level measured in the processed runs. We use
32-
as picture threshold the closest even number not smaller than the charge
33-
"min_charge_for_certain_selection" (averaged for all subruns and rounded)
34-
which is the value from which a pixel will be certainly kept by the Data Volume
35-
Reduction.
30+
subrun.
3631
3732
Then we run again the script over all subruns, and using the option to create
3833
the pixel maks (this can also be done subrun by subrun, to parallelize the
@@ -68,7 +63,6 @@
6863
from lstchain.paths import parse_dl1_filename
6964
from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
7065
from lstchain.io.io import dl1_params_tel_mon_cal_key
71-
from lstchain.io.config import get_standard_config, dump_config
7266

7367
from ctapipe.io import read_table
7468
from ctapipe.core import Container, Field
@@ -506,34 +500,6 @@ def main():
506500
time.sleep(300)
507501
continue
508502

509-
510-
# We now create also .json files with recommended image cleaning
511-
# settings for lstchain_dl1ab. We determine the picture threshold
512-
# from the values of min_charge_for_certain_selection:
513-
514-
if not write_pixel_masks:
515-
log.info('Output files:')
516-
for file in list_of_output_files:
517-
log.info(file)
518-
dvrtable = read_table(file, "/run_summary")
519-
picture_threshold = get_typical_dvr_min_charge(dvrtable)
520-
521-
# we round it to an even number of p.e., just to limit the amount
522-
# of different settings in the analysis (i.e. we change
523-
# picture_threshold in steps of 2 p.e.):
524-
if picture_threshold % 2 != 0:
525-
picture_threshold += 1
526-
boundary_threshold = picture_threshold / 2
527-
newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold']
528-
newconfig['picture_thresh'] = picture_threshold
529-
newconfig['boundary_thresh'] = boundary_threshold
530-
run = int(file.name[file.name.find('Run')+3:-3])
531-
json_filename = Path(output_dir, f'dl1ab_Run{run:05d}.json')
532-
dump_config({'tailcuts_clean_with_pedestal_threshold': newconfig,
533-
'dynamic_cleaning': get_standard_config()['dynamic_cleaning']},
534-
json_filename, overwrite=True)
535-
log.info(json_filename)
536-
537503
log.info('lstchain_dvr_pixselector finished successfully!')
538504

539505

0 commit comments

Comments
 (0)