Skip to content

Commit 00c8160

Browse files
Bugfix and improvements in source_residuals.py (#68)
* Removed extrapolation of individual residuals beyond their frequency range, which was leading to incorrect mean residuals at high frequencies. * Added __main__ block in source_residuals.py. * Added '--exclude' command-line argument and added 'exclude_subdirs' argument to read_residuals function. * Added '--yrange' command-line argument and added 'ymin' and 'ymax' arguments to plot_residuals function. * Save plots in subfolder if runid is specified. * Added bugfix and new options in source_residuals.py to changelog. * Added gridlines to plot in plot_residuals function. * source_residuals: do not filter by runid if not all residuals have a runid attribute * source_residuals: code linting and updated copyright * Explain that the runid will be used to create a subdirectory within the output directory * Default exclude subdirs to None * source_residuals: accept wildcards in directory name * Write a line differently for better readability * Remove two points from the frequency array to avoid border effects * Added 'runid' argument to plot_residuals function, to be displayed in the title. * PR reference in CHANGELOG + style improvements --------- Co-authored-by: Claudio Satriano <satriano@gmail.com>
1 parent e277973 commit 00c8160

File tree

2 files changed

+151
-29
lines changed

2 files changed

+151
-29
lines changed

CHANGELOG.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@ Copyright (c) 2011-2025 Claudio Satriano <satriano@ipgp.fr>
2727

2828
### Post-Inversion
2929

30-
- New option for `source_residuals`: `--runid` to select a specific run when
31-
multiple runs exist for the same event
30+
- New options for `source_residuals`:
31+
- `--runid` to select a specific run when multiple runs exist for the same
32+
event
33+
- `--exclude` to exclude 1 or more subfolders when parsing residuals files
34+
(see [#68])
35+
- `--yrange` to specify a fixed range for the Y axis in the plots (see [#68])
3236

3337
### Plotting
3438

@@ -83,6 +87,9 @@ Copyright (c) 2011-2025 Claudio Satriano <satriano@ipgp.fr>
8387
`sensitivity` is a numerical value, any file format is accepted
8488
- Improved estimation of `fc_0` (initial corner frequency) when inverse
8589
frequency weighting is used (see [#67])
90+
- Fix in `source_residuals`: removed extrapolation of individual station
91+
residuals beyond their valid frequency range, which was leading to incorrect
92+
mean residuals at high frequencies (see [#68])
8693

8794
## v1.8 - 2024-04-07
8895

@@ -818,3 +825,4 @@ Initial Python port.
818825
[#48]: https://github.com/SeismicSource/sourcespec/issues/48
819826
[#49]: https://github.com/SeismicSource/sourcespec/issues/49
820827
[#67]: https://github.com/SeismicSource/sourcespec/issues/67
828+
[#68]: https://github.com/SeismicSource/sourcespec/issues/68

sourcespec/source_residuals.py

Lines changed: 141 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
2013-2014 Claudio Satriano <satriano@ipgp.fr>,
88
Agnes Chounet <chounet@ipgp.fr>
99
10-
2015-2025 Claudio Satriano <satriano@ipgp.fr>
10+
2015-2025 Claudio Satriano <satriano@ipgp.fr>.
11+
Kris Vanneste <kris.vanneste@oma.be>
1112
:license:
1213
CeCILL Free Software License Agreement v2.1
1314
(http://www.cecill.info/licences.en.html)
1415
"""
1516
import sys
1617
import os
18+
from glob import glob
1719
from collections import defaultdict
1820
from argparse import ArgumentParser
1921
import numpy as np
@@ -41,26 +43,43 @@ def parse_args():
4143
parser.add_argument(
4244
'-r', '--runid', dest='runid', action='store',
4345
default=None, metavar='RUNID',
44-
help='Select a specific run when multiple runs exist for the same '
46+
help='select a specific run when multiple runs exist for the same '
4547
'event. If omitted, residuals will be computed using all '
4648
'available runs. You can provide a specific RUNID or use '
4749
'"latest" or "earliest" to select the most recent or the '
48-
'earliest run, based on alphabetical or numerical order.'
50+
'earliest run, based on alphabetical or numerical order. '
51+
'If provided, the runid will be also used to create a '
52+
'subdirectory within the output directory, unless the '
53+
'provided output directory name (see option "--outdir") is '
54+
'already the runid.'
4955
)
5056
parser.add_argument(
5157
'-o', '--outdir', dest='outdir', action='store',
5258
default='sspec_residuals',
5359
help='output directory (default="sspec_residuals")'
5460
)
61+
parser.add_argument(
62+
'-e', '--exclude', dest='exclude_subdirs', action="append",
63+
default=None,
64+
help='subfolder to exclude (repeat for multiple subfolders)'
65+
)
5566
parser.add_argument(
5667
'-p', '--plot', dest='plot', action='store_true',
5768
default=False, help='save residuals plots to file'
5869
)
70+
parser.add_argument(
71+
'-y', '--yrange', dest='yrange', nargs=2, type=float,
72+
action='store', default=[-2.5, 2.5],
73+
help='min/max range for Y axis (default: -2.5, 2.5)'
74+
)
5975
parser.add_argument(
6076
'residual_files_dir',
61-
help='directory containing source_spec residual files '
62-
'in HDF5 format. Residual files can be in subdirectories '
63-
'(e.g., a subdirectory for each event).')
77+
help='path to a directory containing SourceSpec residual files in '
78+
'HDF5 format. Files may be organized in subdirectories '
79+
'(e.g., by event). You may use wildcards to match specific '
80+
'subdirectories — remember to quote them to avoid shell '
81+
'expansion. Example: "sspec_out/*/run1/"'
82+
)
6483
return parser.parse_args()
6584

6685

@@ -83,8 +102,20 @@ def _filter_by_runid(residual_dict, runid='latest'):
83102
"""
84103
if runid is None:
85104
return residual_dict
105+
# Residuals generated by older versions of SourceSpec may not have a runid
106+
# attribute. In this case, we cannot filter by runid.
107+
if not all(
108+
hasattr(spec.stats, 'runid')
109+
for spec_st in residual_dict.values()
110+
for spec in spec_st
111+
):
112+
print(
113+
'Warning: not all residuals have a "runid" attribute. '
114+
'Filtering by runid will not be applied.'
115+
)
116+
return residual_dict
86117
if not isinstance(runid, str):
87-
raise ValueError("runid must be a string.")
118+
raise ValueError('runid must be a string.')
88119
filt_residual_dict = defaultdict(SpectrumStream)
89120
if runid not in ['latest', 'earliest']:
90121
for spec_st in residual_dict.values():
@@ -118,39 +149,81 @@ def _filter_by_runid(residual_dict, runid='latest'):
118149
return filt_residual_dict
119150

120151

121-
def read_residuals(resfiles_dir, runid=None):
152+
def _read_residuals_from_dir(
153+
residual_dict, resfiles_dir, exclude_subdirs=None):
122154
"""
123-
Read residuals from HDF5 files in resfiles_dir.
155+
Read residuals from HDF5 files from a specified directory and all its
156+
subdirectories.
124157
125158
Parameters
126159
----------
160+
residual_dict : dict
161+
Dictionary to store residuals for each station.
127162
resfiles_dir : str
128163
Directory containing source_spec residual files in HDF5 format.
129164
Residual files can be in subdirectories (e.g., a subdirectory for
130165
each event).
131-
132-
Returns
133-
-------
134-
residual_dict : dict
135-
Dictionary containing residuals for each station.
166+
exclude_subdirs: list of str
167+
List of subdirectories (potentially containing residual files)
168+
that should not be parsed
169+
(default: None, which means all subdirectories are parsed).
136170
"""
171+
if exclude_subdirs is None:
172+
exclude_subdirs = []
137173
if not os.path.exists(resfiles_dir):
138-
sys.exit(f'Error: directory "{resfiles_dir}" does not exist.')
174+
print(f'Skipping directory: {resfiles_dir}: does not exist')
139175
resfiles = []
140176
for root, _dirs, files in os.walk(resfiles_dir):
177+
_dirs[:] = [d for d in _dirs if d not in exclude_subdirs]
141178
resfiles.extend(
142179
os.path.join(root, file)
143180
for file in files
144181
if file.endswith('residuals.hdf5')
145182
)
146183
if not resfiles:
147-
sys.exit(f'No residual file found in directory: {resfiles_dir}')
148-
residual_dict = defaultdict(SpectrumStream)
184+
print(f'No residual file found in directory: {resfiles_dir}')
149185
for resfile in sorted(resfiles):
150186
print(f'Found residual file: {resfile}')
151187
residual_st = read_spectra(resfile)
152188
for spec in residual_st:
153189
residual_dict[spec.id].append(spec)
190+
191+
192+
def read_residuals(resfiles_dir, runid=None, exclude_subdirs=None):
193+
"""
194+
Read residuals from HDF5 files from a specified directory and all its
195+
subdirectories. The directory can be specified using wildcards.
196+
197+
Parameters
198+
----------
199+
resfiles_dir : str
200+
Directory containing source_spec residual files in HDF5 format.
201+
Residual files can be in subdirectories (e.g., a subdirectory for
202+
each event). The subdirectory names can be specified using wildcards.
203+
runid : str
204+
Only select residuals with the specified runid. If 'latest' or
205+
'earliest', select the most recent or the earliest runid,
206+
respectively. If None, select all residuals.
207+
(default: None)
208+
exclude_subdirs: list of str
209+
List of subdirectories (potentially containing residual files)
210+
that should not be parsed
211+
(default: None, which means all subdirectories are parsed).
212+
213+
Returns
214+
-------
215+
residual_dict : dict
216+
Dictionary containing residuals for each station.
217+
"""
218+
if exclude_subdirs is None:
219+
exclude_subdirs = []
220+
residual_dict = defaultdict(SpectrumStream)
221+
for _resfiles_dir in glob(resfiles_dir):
222+
_read_residuals_from_dir(
223+
residual_dict,
224+
_resfiles_dir,
225+
exclude_subdirs=exclude_subdirs
226+
)
154227
return _filter_by_runid(residual_dict, runid)
155228

156229

@@ -183,7 +256,9 @@ def compute_mean_residuals(residual_dict, min_spectra=20):
183256
delta_f_min = min(spec.stats.delta for spec in res)
184257
freq_min = min(freqs_min)
185258
freq_max = max(freqs_max)
186-
freq_array = np.arange(freq_min, freq_max + delta_f_min, delta_f_min)
259+
# stop two steps before the maximum frequency,
260+
# to avoid border effects
261+
freq_array = np.arange(freq_min, freq_max - delta_f_min, delta_f_min)
187262

188263
spec_mean = None
189264
for spec in res:
@@ -193,10 +268,12 @@ def compute_mean_residuals(residual_dict, min_spectra=20):
193268
# spec_slice.data must exist, so we create it as zeros
194269
spec_interp.data = np.zeros_like(freq_array)
195270
# interpolate data_mag to the new frequency array
196-
f = interp1d(spec.freq, spec.data_mag, fill_value='extrapolate')
271+
f = interp1d(spec.freq, spec.data_mag, bounds_error=False)
197272
spec_interp.data_mag = f(freq_array)
198-
# norm is 1 where data_mag is not zero, 0 otherwise
199-
norm = (spec_interp.data_mag != 0).astype(float)
273+
# norm is 1 where interpolated data_mag is not nan, 0 otherwise
274+
norm = (~np.isnan(spec_interp.data_mag)).astype(float)
275+
# Replace nan data_mag values with zeros for summation
276+
spec_interp.data_mag[np.isnan(spec_interp.data_mag)] = 0
200277
if spec_mean is None:
201278
spec_mean = spec_interp
202279
norm_mean = norm
@@ -213,7 +290,8 @@ def compute_mean_residuals(residual_dict, min_spectra=20):
213290
return residual_mean
214291

215292

216-
def plot_residuals(residual_dict, residual_mean, outdir):
293+
def plot_residuals(residual_dict, residual_mean, outdir,
294+
ymin=None, ymax=None, runid=None):
217295
"""
218296
Plot residuals.
219297
@@ -225,6 +303,15 @@ def plot_residuals(residual_dict, residual_mean, outdir):
225303
SpectrumStream containing mean residuals for each station.
226304
outdir : str
227305
Output directory.
306+
ymin : float
307+
Lower limit of Y axis
308+
(default: None)
309+
ymax : float
310+
Upper limit of Y axis
311+
(default: None)
312+
runid : str
313+
Run ID, to be shown in title
314+
(default: None)
228315
"""
229316
for spec_mean in residual_mean:
230317
stat_id = spec_mean.id
@@ -234,9 +321,16 @@ def plot_residuals(residual_dict, residual_mean, outdir):
234321
for spec in res:
235322
ax.semilogx(spec.freq, spec.data_mag, 'b-', alpha=0.5)
236323
ax.semilogx(spec_mean.freq, spec_mean.data_mag, 'r-', linewidth=2)
324+
ax.set_ylim([ymin, ymax])
325+
ax.grid(
326+
True, which='both', linestyle='solid', color='#DDDDDD', zorder=0)
237327
ax.set_xlabel('frequency (Hz)')
238328
ax.set_ylabel('residual amplitude (obs - synth) in magnitude units')
239-
ax.set_title(f'residuals: {stat_id}{len(res)} records')
329+
if runid:
330+
title = f'{stat_id} – runid: {runid} - {len(res)} records'
331+
else:
332+
title = f'{stat_id}{len(res)} records'
333+
ax.set_title(title)
240334
fig.savefig(figurefile, bbox_inches='tight')
241335
plt.close(fig)
242336
print(f'Residual plot saved to: {figurefile}')
@@ -246,27 +340,47 @@ def main():
246340
"""Main function."""
247341
args = parse_args()
248342

249-
residual_dict = read_residuals(args.residual_files_dir, args.runid)
343+
runid = args.runid
344+
exclude_subdirs = args.exclude_subdirs
345+
residual_dict = read_residuals(
346+
args.residual_files_dir, runid,
347+
exclude_subdirs=exclude_subdirs
348+
)
349+
if not residual_dict:
350+
sys.exit(
351+
'No residuals found. Please check the provided input directory.'
352+
)
250353

251354
outdir = args.outdir
355+
# If runid is not None, create a subdirectory with the runid, unless
356+
# the outdir name is already the runid
357+
if runid and os.path.split(outdir)[-1] != runid:
358+
outdir = os.path.join(outdir, runid)
252359
if not os.path.exists(outdir):
253360
os.makedirs(outdir)
254361

255362
min_spectra = int(args.min_spectra)
256363
residual_mean = compute_mean_residuals(residual_dict, min_spectra)
257364

258365
if args.plot:
259-
plot_residuals(residual_dict, residual_mean, outdir)
366+
ymin, ymax = args.yrange
367+
plot_residuals(
368+
residual_dict, residual_mean, outdir,
369+
ymin=ymin, ymax=ymax, runid=runid
370+
)
260371

261372
# write the mean residuals (the stations corrections)
262373
res_mean_file = 'residual_mean.hdf5'
263374
res_mean_file = os.path.join(outdir, res_mean_file)
264375
if not residual_mean:
265-
print(
376+
sys.exit(
266377
'Mean residuals not computed: not enough spectra.\n'
267378
'Minimum number of spectra ("--min_spectra") currently set to: '
268379
f'{min_spectra}'
269380
)
270-
sys.exit(0)
271381
residual_mean.write(res_mean_file, format='HDF5')
272382
print(f'Mean station residuals saved to: {res_mean_file}')
383+
384+
385+
if __name__ == '__main__':
386+
main()

0 commit comments

Comments
 (0)