7
7
2013-2014 Claudio Satriano <satriano@ipgp.fr>,
8
8
Agnes Chounet <chounet@ipgp.fr>
9
9
10
- 2015-2025 Claudio Satriano <satriano@ipgp.fr>
10
+ 2015-2025 Claudio Satriano <satriano@ipgp.fr>.
11
+ Kris Vanneste <kris.vanneste@oma.be>
11
12
:license:
12
13
CeCILL Free Software License Agreement v2.1
13
14
(http://www.cecill.info/licences.en.html)
14
15
"""
15
16
import sys
16
17
import os
18
+ from glob import glob
17
19
from collections import defaultdict
18
20
from argparse import ArgumentParser
19
21
import numpy as np
@@ -41,26 +43,43 @@ def parse_args():
41
43
parser .add_argument (
42
44
'-r' , '--runid' , dest = 'runid' , action = 'store' ,
43
45
default = None , metavar = 'RUNID' ,
44
- help = 'Select a specific run when multiple runs exist for the same '
46
+ help = 'select a specific run when multiple runs exist for the same '
45
47
'event. If omitted, residuals will be computed using all '
46
48
'available runs. You can provide a specific RUNID or use '
47
49
'"latest" or "earliest" to select the most recent or the '
48
- 'earliest run, based on alphabetical or numerical order.'
50
+ 'earliest run, based on alphabetical or numerical order. '
51
+ 'If provided, the runid will be also used to create a '
52
+ 'subdirectory within the output directory, unless the '
53
+ 'provided output directory name (see option "--outdir") is '
54
+ 'already the runid.'
49
55
)
50
56
parser .add_argument (
51
57
'-o' , '--outdir' , dest = 'outdir' , action = 'store' ,
52
58
default = 'sspec_residuals' ,
53
59
help = 'output directory (default="sspec_residuals")'
54
60
)
61
+ parser .add_argument (
62
+ '-e' , '--exclude' , dest = 'exclude_subdirs' , action = "append" ,
63
+ default = None ,
64
+ help = 'subfolder to exclude (repeat for multiple subfolders)'
65
+ )
55
66
parser .add_argument (
56
67
'-p' , '--plot' , dest = 'plot' , action = 'store_true' ,
57
68
default = False , help = 'save residuals plots to file'
58
69
)
70
+ parser .add_argument (
71
+ '-y' , '--yrange' , dest = 'yrange' , nargs = 2 , type = float ,
72
+ action = 'store' , default = [- 2.5 , 2.5 ],
73
+ help = 'min/max range for Y axis (default: -2.5, 2.5)'
74
+ )
59
75
parser .add_argument (
60
76
'residual_files_dir' ,
61
- help = 'directory containing source_spec residual files '
62
- 'in HDF5 format. Residual files can be in subdirectories '
63
- '(e.g., a subdirectory for each event).' )
77
+ help = 'path to a directory containing SourceSpec residual files in '
78
+ 'HDF5 format. Files may be organized in subdirectories '
79
+ '(e.g., by event). You may use wildcards to match specific '
80
+ 'subdirectories — remember to quote them to avoid shell '
81
+ 'expansion. Example: "sspec_out/*/run1/"'
82
+ )
64
83
return parser .parse_args ()
65
84
66
85
@@ -83,8 +102,20 @@ def _filter_by_runid(residual_dict, runid='latest'):
83
102
"""
84
103
if runid is None :
85
104
return residual_dict
105
+ # Residuals generated by older versions of SourceSpec may not have a runid
106
+ # attribute. In this case, we cannot filter by runid.
107
+ if not all (
108
+ hasattr (spec .stats , 'runid' )
109
+ for spec_st in residual_dict .values ()
110
+ for spec in spec_st
111
+ ):
112
+ print (
113
+ 'Warning: not all residuals have a "runid" attribute. '
114
+ 'Filtering by runid will not be applied.'
115
+ )
116
+ return residual_dict
86
117
if not isinstance (runid , str ):
87
- raise ValueError (" runid must be a string." )
118
+ raise ValueError (' runid must be a string.' )
88
119
filt_residual_dict = defaultdict (SpectrumStream )
89
120
if runid not in ['latest' , 'earliest' ]:
90
121
for spec_st in residual_dict .values ():
@@ -118,39 +149,81 @@ def _filter_by_runid(residual_dict, runid='latest'):
118
149
return filt_residual_dict
119
150
120
151
121
- def read_residuals (resfiles_dir , runid = None ):
152
+ def _read_residuals_from_dir (
153
+ residual_dict , resfiles_dir , exclude_subdirs = None ):
122
154
"""
123
- Read residuals from HDF5 files in resfiles_dir.
155
+ Read residuals from HDF5 files from a specified directory and all its
156
+ subdirectories.
124
157
125
158
Parameters
126
159
----------
160
+ residual_dict : dict
161
+ Dictionary to store residuals for each station.
127
162
resfiles_dir : str
128
163
Directory containing source_spec residual files in HDF5 format.
129
164
Residual files can be in subdirectories (e.g., a subdirectory for
130
165
each event).
131
-
132
- Returns
133
- -------
134
- residual_dict : dict
135
- Dictionary containing residuals for each station.
166
+ exclude_subdirs: list of str
167
+ List of subdirectories (potentially containing residual files)
168
+ that should not be parsed
169
+ (default: None, which means all subdirectories are parsed).
136
170
"""
171
+ if exclude_subdirs is None :
172
+ exclude_subdirs = []
137
173
if not os .path .exists (resfiles_dir ):
138
- sys . exit (f'Error: directory " { resfiles_dir } " does not exist. ' )
174
+ print (f'Skipping directory: { resfiles_dir } : does not exist' )
139
175
resfiles = []
140
176
for root , _dirs , files in os .walk (resfiles_dir ):
177
+ _dirs [:] = [d for d in _dirs if d not in exclude_subdirs ]
141
178
resfiles .extend (
142
179
os .path .join (root , file )
143
180
for file in files
144
181
if file .endswith ('residuals.hdf5' )
145
182
)
146
183
if not resfiles :
147
- sys .exit (f'No residual file found in directory: { resfiles_dir } ' )
148
- residual_dict = defaultdict (SpectrumStream )
184
+ print (f'No residual file found in directory: { resfiles_dir } ' )
149
185
for resfile in sorted (resfiles ):
150
186
print (f'Found residual file: { resfile } ' )
151
187
residual_st = read_spectra (resfile )
152
188
for spec in residual_st :
153
189
residual_dict [spec .id ].append (spec )
190
+
191
+
192
+ def read_residuals (resfiles_dir , runid = None , exclude_subdirs = None ):
193
+ """
194
+ Read residuals from HDF5 files from a specified directory and all its
195
+ subdirectories. The directory can be specified using wildcards.
196
+
197
+ Parameters
198
+ ----------
199
+ resfiles_dir : str
200
+ Directory containing source_spec residual files in HDF5 format.
201
+ Residual files can be in subdirectories (e.g., a subdirectory for
202
+ each event). The subdirectory names can be specified using wildcards.
203
+ runid : str
204
+ Only select residuals with the specified runid. If 'latest' or
205
+ 'earliest', select the most recent or the earliest runid,
206
+ respectively. If None, select all residuals.
207
+ (default: None)
208
+ exclude_subdirs: list of str
209
+ List of subdirectories (potentially containing residual files)
210
+ that should not be parsed
211
+ (default: None, which means all subdirectories are parsed).
212
+
213
+ Returns
214
+ -------
215
+ residual_dict : dict
216
+ Dictionary containing residuals for each station.
217
+ """
218
+ if exclude_subdirs is None :
219
+ exclude_subdirs = []
220
+ residual_dict = defaultdict (SpectrumStream )
221
+ for _resfiles_dir in glob (resfiles_dir ):
222
+ _read_residuals_from_dir (
223
+ residual_dict ,
224
+ _resfiles_dir ,
225
+ exclude_subdirs = exclude_subdirs
226
+ )
154
227
return _filter_by_runid (residual_dict , runid )
155
228
156
229
@@ -183,7 +256,9 @@ def compute_mean_residuals(residual_dict, min_spectra=20):
183
256
delta_f_min = min (spec .stats .delta for spec in res )
184
257
freq_min = min (freqs_min )
185
258
freq_max = max (freqs_max )
186
- freq_array = np .arange (freq_min , freq_max + delta_f_min , delta_f_min )
259
+ # stop two steps before the maximum frequency,
260
+ # to avoid border effects
261
+ freq_array = np .arange (freq_min , freq_max - delta_f_min , delta_f_min )
187
262
188
263
spec_mean = None
189
264
for spec in res :
@@ -193,10 +268,12 @@ def compute_mean_residuals(residual_dict, min_spectra=20):
193
268
# spec_slice.data must exist, so we create it as zeros
194
269
spec_interp .data = np .zeros_like (freq_array )
195
270
# interpolate data_mag to the new frequency array
196
- f = interp1d (spec .freq , spec .data_mag , fill_value = 'extrapolate' )
271
+ f = interp1d (spec .freq , spec .data_mag , bounds_error = False )
197
272
spec_interp .data_mag = f (freq_array )
198
- # norm is 1 where data_mag is not zero, 0 otherwise
199
- norm = (spec_interp .data_mag != 0 ).astype (float )
273
+ # norm is 1 where interpolated data_mag is not nan, 0 otherwise
274
+ norm = (~ np .isnan (spec_interp .data_mag )).astype (float )
275
+ # Replace nan data_mag values with zeros for summation
276
+ spec_interp .data_mag [np .isnan (spec_interp .data_mag )] = 0
200
277
if spec_mean is None :
201
278
spec_mean = spec_interp
202
279
norm_mean = norm
@@ -213,7 +290,8 @@ def compute_mean_residuals(residual_dict, min_spectra=20):
213
290
return residual_mean
214
291
215
292
216
- def plot_residuals (residual_dict , residual_mean , outdir ):
293
+ def plot_residuals (residual_dict , residual_mean , outdir ,
294
+ ymin = None , ymax = None , runid = None ):
217
295
"""
218
296
Plot residuals.
219
297
@@ -225,6 +303,15 @@ def plot_residuals(residual_dict, residual_mean, outdir):
225
303
SpectrumStream containing mean residuals for each station.
226
304
outdir : str
227
305
Output directory.
306
+ ymin : float
307
+ Lower limit of Y axis
308
+ (default: None)
309
+ ymax : float
310
+ Upper limit of Y axis
311
+ (default: None)
312
+ runid : str
313
+ Run ID, to be shown in title
314
+ (default: None)
228
315
"""
229
316
for spec_mean in residual_mean :
230
317
stat_id = spec_mean .id
@@ -234,9 +321,16 @@ def plot_residuals(residual_dict, residual_mean, outdir):
234
321
for spec in res :
235
322
ax .semilogx (spec .freq , spec .data_mag , 'b-' , alpha = 0.5 )
236
323
ax .semilogx (spec_mean .freq , spec_mean .data_mag , 'r-' , linewidth = 2 )
324
+ ax .set_ylim ([ymin , ymax ])
325
+ ax .grid (
326
+ True , which = 'both' , linestyle = 'solid' , color = '#DDDDDD' , zorder = 0 )
237
327
ax .set_xlabel ('frequency (Hz)' )
238
328
ax .set_ylabel ('residual amplitude (obs - synth) in magnitude units' )
239
- ax .set_title (f'residuals: { stat_id } – { len (res )} records' )
329
+ if runid :
330
+ title = f'{ stat_id } – runid: { runid } - { len (res )} records'
331
+ else :
332
+ title = f'{ stat_id } – { len (res )} records'
333
+ ax .set_title (title )
240
334
fig .savefig (figurefile , bbox_inches = 'tight' )
241
335
plt .close (fig )
242
336
print (f'Residual plot saved to: { figurefile } ' )
@@ -246,27 +340,47 @@ def main():
246
340
"""Main function."""
247
341
args = parse_args ()
248
342
249
- residual_dict = read_residuals (args .residual_files_dir , args .runid )
343
+ runid = args .runid
344
+ exclude_subdirs = args .exclude_subdirs
345
+ residual_dict = read_residuals (
346
+ args .residual_files_dir , runid ,
347
+ exclude_subdirs = exclude_subdirs
348
+ )
349
+ if not residual_dict :
350
+ sys .exit (
351
+ 'No residuals found. Please check the provided input directory.'
352
+ )
250
353
251
354
outdir = args .outdir
355
+ # If runid is not None, create a subdirectory with the runid, unless
356
+ # the outdir name is already the runid
357
+ if runid and os .path .split (outdir )[- 1 ] != runid :
358
+ outdir = os .path .join (outdir , runid )
252
359
if not os .path .exists (outdir ):
253
360
os .makedirs (outdir )
254
361
255
362
min_spectra = int (args .min_spectra )
256
363
residual_mean = compute_mean_residuals (residual_dict , min_spectra )
257
364
258
365
if args .plot :
259
- plot_residuals (residual_dict , residual_mean , outdir )
366
+ ymin , ymax = args .yrange
367
+ plot_residuals (
368
+ residual_dict , residual_mean , outdir ,
369
+ ymin = ymin , ymax = ymax , runid = runid
370
+ )
260
371
261
372
# write the mean residuals (the stations corrections)
262
373
res_mean_file = 'residual_mean.hdf5'
263
374
res_mean_file = os .path .join (outdir , res_mean_file )
264
375
if not residual_mean :
265
- print (
376
+ sys . exit (
266
377
'Mean residuals not computed: not enough spectra.\n '
267
378
'Minimum number of spectra ("--min_spectra") currently set to: '
268
379
f'{ min_spectra } '
269
380
)
270
- sys .exit (0 )
271
381
residual_mean .write (res_mean_file , format = 'HDF5' )
272
382
print (f'Mean station residuals saved to: { res_mean_file } ' )
383
+
384
+
385
+ if __name__ == '__main__' :
386
+ main ()
0 commit comments