|
| 1 | +import glob |
| 2 | +import logging |
1 | 3 | import numpy as np
|
| 4 | +from pathlib import Path |
| 5 | +from lstchain.paths import run_to_dl1_filename |
| 6 | +from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key |
| 7 | +from lstchain.io.io import dl1_params_tel_mon_cal_key |
| 8 | +from lstchain.io.config import get_standard_config |
| 9 | +from ctapipe.containers import EventType |
| 10 | +from scipy.stats import median_abs_deviation |
2 | 11 |
|
3 |
| -__all__ = ['apply_dynamic_cleaning'] |
| 12 | +from ctapipe.io import read_table |
| 13 | +__all__ = ['apply_dynamic_cleaning', |
| 14 | + 'find_tailcuts', |
| 15 | + 'pic_th'] |
| 16 | + |
| 17 | +log = logging.getLogger(__name__) |
4 | 18 |
|
5 | 19 |
|
6 | 20 | def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
|
@@ -36,3 +50,236 @@ def apply_dynamic_cleaning(image, signal_pixels, threshold, fraction):
|
36 | 50 | mask_dynamic_cleaning = (image >= dynamic_threshold) & signal_pixels
|
37 | 51 |
|
38 | 52 | return mask_dynamic_cleaning
|
| 53 | + |
| 54 | + |
| 55 | +def find_tailcuts(input_dir, run_number): |
| 56 | + """ |
| 57 | + This function uses DL1 files to determine tailcuts which are adequate |
| 58 | + for the bulk of the pixels in a given run. The script also returns the |
| 59 | + suggested NSB adjustment needed in the "dark-sky" MC to match the data. |
| 60 | + The function uses the median (for the whole camera, excluding outliers) |
| 61 | + of the 95% quantile of the pixel charge for pedestal events to deduce the |
| 62 | + NSB level. It is good to use a large quantile of the pixel charge |
| 63 | + distribution (vs. e.g. using the median) because what matters for having |
| 64 | + a realistic noise simulation is the tail on the right-side, i.e. for |
| 65 | + large pulses. |
| 66 | + For reasons of stability & simplicity of analysis, we cannot decide the |
| 67 | + cleaning levels (or the NSB tuning) on a subrun-by-subrun basis. We select |
| 68 | + values which are more or less valid for the whole run. |
| 69 | +
|
| 70 | + The script will process a subset of the subruns (~10, hardcoded) of the run, |
| 71 | + distributed uniformly through it. |
| 72 | +
|
| 73 | + Parameters |
| 74 | + ---------- |
| 75 | + input_dir: `Path` |
| 76 | + directory where the DL1 files (subrun-wise, i.e., including |
| 77 | + DL1a) are stored |
| 78 | +
|
| 79 | + run_number : int |
| 80 | + run number to be processed |
| 81 | +
|
| 82 | + Returns |
| 83 | + ------- |
| 84 | + additional_nsb_rate : float |
| 85 | + p.e./ns rate of NSB to be added to "dark MC" to match the noise in the data |
| 86 | + newconfig : dict |
| 87 | + cleaning configuration for running the DL1ab stage |
| 88 | + """ |
| 89 | + |
| 90 | + # subrun-wise dl1 file names: |
| 91 | + dl1_filenames = Path(input_dir, |
| 92 | + run_to_dl1_filename(1, run_number, 0).replace( |
| 93 | + '.0000.h5', '.????.h5')) |
| 94 | + all_dl1_files = glob.glob(str(dl1_filenames)) |
| 95 | + all_dl1_files.sort() |
| 96 | + |
| 97 | + # Number of median absolute deviations (mad) away from the median that a |
| 98 | + # value has to be to be considered an outlier: |
| 99 | + mad_max = 5 # would exclude 8e-4 of the pdf for a gaussian |
| 100 | + |
| 101 | + # Minimum number of interleaved pedestals in subrun to proceed with |
| 102 | + # calculation: |
| 103 | + min_number_of_ped_events = 100 |
| 104 | + |
| 105 | + # Minimum number of valid pixels to consider the calculation of NSB level |
| 106 | + # acceptable: |
| 107 | + min_number_of_valid_pixels = 1000 |
| 108 | + |
| 109 | + # Approx. maximum number of subruns (uniformly distributed through the |
| 110 | + # run) to be processed: |
| 111 | + max_number_of_processed_subruns = 10 |
| 112 | + # Keep only ~max_number_of_processed_subruns subruns, distributed |
| 113 | + # along the run: |
| 114 | + dl1_files = all_dl1_files[::int(1+len(all_dl1_files) / |
| 115 | + max_number_of_processed_subruns)] |
| 116 | + |
| 117 | + number_of_pedestals = [] |
| 118 | + median_ped_qt95_pix_charge = [] |
| 119 | + |
| 120 | + for dl1_file in dl1_files: |
| 121 | + log.info('\nInput file: %s', dl1_file) |
| 122 | + |
| 123 | + data_parameters = read_table(dl1_file, dl1_params_lstcam_key) |
| 124 | + event_type_data = data_parameters['event_type'].data |
| 125 | + pedestal_mask = event_type_data == EventType.SKY_PEDESTAL.value |
| 126 | + |
| 127 | + num_pedestals = pedestal_mask.sum() |
| 128 | + if num_pedestals < min_number_of_ped_events: |
| 129 | + log.warning(f' Too few interleaved pedestals (' |
| 130 | + f'{num_pedestals}) - skipped subrun!') |
| 131 | + continue |
| 132 | + |
| 133 | + number_of_pedestals.append(pedestal_mask.sum()) |
| 134 | + data_images = read_table(dl1_file, dl1_images_lstcam_key) |
| 135 | + data_calib = read_table(dl1_file, dl1_params_tel_mon_cal_key) |
| 136 | + # data_calib['unusable_pixels'] , indices: (Gain Calib_id Pixel) |
| 137 | + |
| 138 | + # Get the "unusable" flags from the pedcal file: |
| 139 | + unusable_hg = data_calib['unusable_pixels'][0][0] |
| 140 | + unusable_lg = data_calib['unusable_pixels'][0][1] |
| 141 | + |
| 142 | + unreliable_pixels = unusable_hg | unusable_lg |
| 143 | + if unreliable_pixels.sum() > 0: |
| 144 | + log.info(f' Removed {unreliable_pixels.sum()/unreliable_pixels.size:.2%} of pixels ' |
| 145 | + f'due to unreliable calibration!') |
| 146 | + |
| 147 | + reliable_pixels = ~unreliable_pixels |
| 148 | + |
| 149 | + charges_data = data_images['image'] |
| 150 | + charges_pedestals = charges_data[pedestal_mask] |
| 151 | + # pixel-wise 95% quantile of ped pix charge through the subrun |
| 152 | + # (#pixels): |
| 153 | + qt95_pix_charge = np.nanquantile(charges_pedestals, 0.95, axis=0) |
| 154 | + # ignore pixels with 0 signal: |
| 155 | + qt95_pix_charge = np.where(qt95_pix_charge > 0, qt95_pix_charge, np.nan) |
| 156 | + # median of medians across camera: |
| 157 | + median_qt95_pix_charge = np.nanmedian(qt95_pix_charge) |
| 158 | + # mean abs deviation of pixel qt95 values: |
| 159 | + qt95_pix_charge_dev = median_abs_deviation(qt95_pix_charge, |
| 160 | + nan_policy='omit') |
| 161 | + |
| 162 | + # Just a cut to remove outliers (pixels): |
| 163 | + outliers = (np.abs(qt95_pix_charge - median_qt95_pix_charge) / |
| 164 | + qt95_pix_charge_dev) > mad_max |
| 165 | + |
| 166 | + if outliers.sum() > 0: |
| 167 | + removed_fraction = outliers.sum() / outliers.size |
| 168 | + log.info(f' Removed {removed_fraction:.2%} of pixels (outliers) ' |
| 169 | + f'from pedestal median calculation') |
| 170 | + |
| 171 | + # Now compute the median (for the whole camera) of the qt95's (for |
| 172 | + # each pixel) of the charges in pedestal events. Use only reliable |
| 173 | + # pixels for this, and exclude outliers: |
| 174 | + n_valid_pixels = np.isfinite(qt95_pix_charge[reliable_pixels]).sum() |
| 175 | + if n_valid_pixels < min_number_of_valid_pixels: |
| 176 | + logging.warning(f' Too few valid pixels ({n_valid_pixels}) for ' |
| 177 | + f'calculation!') |
| 178 | + median_ped_qt95_pix_charge.append(np.nan) |
| 179 | + else: |
| 180 | + median_ped_qt95_pix_charge.append(np.nanmedian(qt95_pix_charge[ |
| 181 | + reliable_pixels & |
| 182 | + ~outliers])) |
| 183 | + # convert to ndarray: |
| 184 | + median_ped_qt95_pix_charge = np.array(median_ped_qt95_pix_charge) |
| 185 | + number_of_pedestals = np.array(number_of_pedestals) |
| 186 | + |
| 187 | + # Now compute the median for all processed subruns, which is more robust |
| 188 | + # against e.g. subruns affected by car flashes. We exclude subruns |
| 189 | + # which have less than half of the median statistics per subrun. |
| 190 | + good_stats = number_of_pedestals > 0.5 * np.median(number_of_pedestals) |
| 191 | + |
| 192 | + # First check if we have any valid subruns at all: |
| 193 | + if np.isfinite(median_ped_qt95_pix_charge[good_stats]).sum() == 0: |
| 194 | + qped = np.nan |
| 195 | + qped_dev = np.nan |
| 196 | + additional_nsb_rate = np.nan |
| 197 | + log.error(f'No valid computation was possible for run {run_number} with any of the processed subruns!') |
| 198 | + return qped, additional_nsb_rate, None |
| 199 | + |
| 200 | + qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats]) |
| 201 | + not_outlier = np.zeros(len(median_ped_qt95_pix_charge), dtype='bool') |
| 202 | + # Now we also remove outliers (subruns) if any: |
| 203 | + qped_dev = median_abs_deviation(median_ped_qt95_pix_charge[good_stats]) |
| 204 | + not_outlier = (np.abs(median_ped_qt95_pix_charge - qped) / |
| 205 | + qped_dev) < mad_max |
| 206 | + |
| 207 | + if (~good_stats).sum() > 0: |
| 208 | + log.info(f'\nNumber of subruns with low statistics: ' |
| 209 | + f'{(~good_stats).sum()} - removed from pedestal median ' |
| 210 | + f'calculation') |
| 211 | + if (~not_outlier).sum() > 0: |
| 212 | + log.info(f'\nRemoved {(~not_outlier).sum()} outlier subruns ' |
| 213 | + f'(out of {not_outlier.size}) from pedestal median ' |
| 214 | + f'calculation') |
| 215 | + |
| 216 | + # If less than half of the processed files are valid for the final calculation, |
| 217 | + # we declare it unsuccessful (data probably have problems): |
| 218 | + if (good_stats & not_outlier).sum() < 0.5 * len(dl1_files): |
| 219 | + qped = np.nan |
| 220 | + additional_nsb_rate = np.nan |
| 221 | + log.error(f'Calculation failed for more than half of the processed subruns of run {run_number}!') |
| 222 | + return qped, additional_nsb_rate, None |
| 223 | + |
| 224 | + |
| 225 | + # recompute with good-statistics and well-behaving runs: |
| 226 | + qped = np.nanmedian(median_ped_qt95_pix_charge[good_stats & not_outlier]) |
| 227 | + log.info(f'\nNumber of subruns used in calculations: ' |
| 228 | + f'{(good_stats & not_outlier).sum()}') |
| 229 | + |
| 230 | + picture_threshold = pic_th(qped) |
| 231 | + boundary_threshold = picture_threshold / 2 |
| 232 | + |
| 233 | + # We now create a .json files with recommended image cleaning |
| 234 | + # settings for lstchain_dl1ab. |
| 235 | + newconfig = get_standard_config()['tailcuts_clean_with_pedestal_threshold'] |
| 236 | + # casts below are needed, json does not like numpy's int64: |
| 237 | + newconfig['picture_thresh'] = int(picture_threshold) |
| 238 | + newconfig['boundary_thresh'] = int(boundary_threshold) |
| 239 | + |
| 240 | + additional_nsb_rate = get_nsb(qped) |
| 241 | + |
| 242 | + return qped, additional_nsb_rate, newconfig |
| 243 | + |
| 244 | + |
| 245 | +def pic_th(qt95_ped): |
| 246 | + """ |
| 247 | + Parameters |
| 248 | + ---------- |
| 249 | + qt95_ped : float |
| 250 | + 95% quantile of pixel charge in pedestal events (for the standard |
| 251 | + LocalPeakWindowSearch algo & settings in lstchain) |
| 252 | +
|
| 253 | + Returns |
| 254 | + ------- |
| 255 | + int |
| 256 | + recommended picture threshold for image cleaning (from a table) |
| 257 | +
|
| 258 | + """ |
| 259 | + mp_edges = np.array([5.85, 7.25, 8.75, 10.3, 11.67]) |
| 260 | + picture_threshold = np.array([8, 10, 12, 14, 16, 18]) |
| 261 | + |
| 262 | + if qt95_ped >= mp_edges[-1]: |
| 263 | + return picture_threshold[-1] |
| 264 | + return picture_threshold[np.where(mp_edges > qt95_ped)[0][0]] |
| 265 | + |
| 266 | + |
| 267 | +def get_nsb(qt95_ped): |
| 268 | + """ |
| 269 | + Parameters |
| 270 | + ---------- |
| 271 | + qt95_ped: `float` |
| 272 | + 95% quantile of pixel charge in pedestal events |
| 273 | +
|
| 274 | + Returns |
| 275 | + ------- |
| 276 | + float |
| 277 | + (from a parabolic parametrization) the recommended additional NSB |
| 278 | + (in p.e. / ns) that has to be added to the "dark MC" waveforms in |
| 279 | + order to match the data for which the 95% quantile of pedestal pixel |
| 280 | + charge is qt95_ped |
| 281 | +
|
| 282 | + """ |
| 283 | + params = [3.95147396, 0.12642504, 0.01718627] |
| 284 | + return (params[1] * (qt95_ped - params[0]) + |
| 285 | + params[2] * (qt95_ped - params[0]) ** 2) |
0 commit comments