Skip to content

Commit 3296962

Browse files
committed
Merge branch 'meteoswiss-dev' into metoffice
2 parents 090183f + 072e5fd commit 3296962

15 files changed

+146
-20
lines changed

mwr_raw2l1/config/config_0-20000-0-06610_A.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ station_latitude: 46.81
1919
station_longitude: 6.94
2020
station_altitude: 491.
2121

22+
# liquid cloud check parameters
23+
liquid_cloud_check:
24+
do_check: False
25+
multiplying_factor: 0.075
2226

2327
# instrument parameters (frequency-dependent parameters go are associated to channels sorted by increasing frequency)
2428
# ---------------------

mwr_raw2l1/config/qc_config.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,3 @@ check_Tb_offset: False
1313
delta_azi_sun: 7 # minimum accepted absolute azimuth offset between instrument pointing and sun
1414
delta_ele_sun: 7 # minimum accepted absolute elevation offset between instrument pointing and sun
1515
Tb_threshold: [2.7, 330.0] # Threshold for min and max Tb
16-
17-
18-
# potential future extension for liquid_cloud_flag

mwr_raw2l1/main.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ def run(inst_config_file, nc_format_config_file=None, qc_config_file=None, conca
5858
conf_inst = get_inst_config(inst_config_file)
5959
conf_nc = get_nc_format_config(nc_format_config_file)
6060
conf_qc = get_qc_config(qc_config_file)
61+
try:
62+
if conf_inst['lwcl_check'] and 'do_check' in conf_inst['lwcl_check']:
63+
logger.info('Liquid cloud check activated for this instrument.')
64+
conf_qc['lwcl_check'] = conf_inst['lwcl_check']['do_check']
65+
conf_qc['lwcl_multiplying_factor'] = conf_inst['lwcl_check']['multiplying_factor']
66+
except KeyError:
67+
conf_qc['lwcl_check'] = False
68+
conf_qc['lwcl_multiplying_factor'] = None
69+
logger.info('No liquid cloud check configured in instrument config file.')
6170

6271
reader = get_reader(conf_inst['reader'])
6372
meas_constructor = get_meas_constructor(conf_inst['meas_constructor'])

mwr_raw2l1/measurement/measurement.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from mwr_raw2l1.log import logger
66
from mwr_raw2l1.measurement.measurement_constructors import MeasurementConstructors
77
from mwr_raw2l1.measurement.measurement_helpers import channels2receiver, get_receiver_vars, is_var_in_data
8-
from mwr_raw2l1.measurement.measurement_qc_helpers import check_rain, check_receiver_sanity, check_sun
8+
from mwr_raw2l1.measurement.measurement_qc_helpers import check_rain, check_receiver_sanity, check_sun, find_lwcl_from_mwr
99
from mwr_raw2l1.utils.num_utils import setbit, timedelta2s, unsetbit
1010

1111

@@ -252,6 +252,11 @@ def apply_quality_control(self, conf_qc):
252252
qc_thresholds = qc_thresholds[:-1]
253253
self.data['qc_thresholds'] = qc_thresholds
254254

255+
# Compute the liquid cloud flag using MWRpy threshold method:
256+
if conf_qc['lwcl_check']:
257+
self.data = find_lwcl_from_mwr(self.data, multiplying_factor=conf_qc['lwcl_multiplying_factor'])
258+
self.data['liquid_cloud_flag_status'] = xr.ones_like(self.data['liquid_cloud_flag'], dtype=np.int32)
259+
255260
def _setbits_qc(self, bit_nb, channel, mask_fail, mask_applied=None):
256261
"""set values for quality_flag and quality_flag status for executed checks"""
257262
if mask_applied is None:

mwr_raw2l1/measurement/measurement_construct_helpers.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
import xarray as xr
3+
import pandas as pd
34

45
from mwr_raw2l1.errors import DimensionError, MissingInputArgument, TimeMismatch
56
from mwr_raw2l1.log import logger
@@ -109,7 +110,10 @@ def rpg_to_si(all_data):
109110
all_data['met']['windspeed'] = all_data['met']['windspeed'] / 3.6 # km/h -> m/s
110111
except KeyError: # KeyError will only occur if quantity not in data, what can well happen. Do nothing in this case
111112
pass
112-
113+
try:
114+
all_data['irt']['IRT'] = all_data['irt']['IRT'] + 273.15 # °C -> K
115+
except KeyError: # KeyError will only occur if quantity not in data, what can well happen. Do nothing in this case
116+
pass
113117
return all_data
114118

115119

@@ -149,7 +153,7 @@ def make_dataset(data, dims, vars, vars_opt=None, multidim_vars=None, time_vecto
149153
if data is None or not data:
150154
if time_vector is None:
151155
raise MissingInputArgument('if data is empty or None the input argument time_vector must be specified')
152-
data = {'time': time_vector} # start overwriting empty data variable
156+
data = {'time': pd.to_datetime(time_vector)} # start overwriting empty data variable
153157
for dim in dims[1:]: # assume first dimension to be 'time'
154158
data[dim] = np.array([missing_val]) # other dimensions all one-element
155159
for var in all_vars:
@@ -174,7 +178,11 @@ def make_dataset(data, dims, vars, vars_opt=None, multidim_vars=None, time_vecto
174178
raise DimensionError(dims, var, nd)
175179
spec[var] = dict(dims=dims[0:nd], data=data[var])
176180

177-
return xr.Dataset.from_dict(spec)
181+
ds = xr.Dataset.from_dict(spec)
182+
# For some reason, this does not keep the formatting of the time coordinates so we overwrite it again
183+
if not isinstance(ds['time'].data[0], np.datetime64):
184+
ds['time'] = spec['time']['data'].values
185+
return ds
178186

179187

180188
def to_single_dataset(data_dicts, *args, **kwargs):
@@ -221,6 +229,8 @@ def merge_aux_data(mwr_data, all_data, srcs_to_ignore=None):
221229
all_data[src] = all_data[src].rename(varname_map)
222230

223231
# interp to same time grid (time grid from blb now stems from some interp) and merge into out
232+
# Note that this does not do any extrapolation which leaves some values (e.g. IRT) to NaN
233+
# in case of a file starting with a scan
224234
srcdat_interp = all_data[src].interp(time=out['time'], method='nearest') # nearest: flags stay integer
225235
out = out.merge(srcdat_interp, join='left')
226236

@@ -264,7 +274,9 @@ def merge_brt_blb(all_data):
264274
logger.warning(
265275
'Skipping {} of {} scanning observations due to identical timestamp with zenith obs for {}'.format(
266276
len(duplicate_times), len(blb_ts.time), duplicate_times))
267-
out = out.merge(blb_ts, join='outer', compat='override')
277+
# remove duplicate times from BRT and merge
278+
out = out.sel(time=~out.time.isin(duplicate_times))
279+
out = out.merge(blb_ts, join='outer')
268280
else:
269281
out = scan_to_timeseries_from_aux(all_data['blb'], hkd=all_data['hkd'])
270282

mwr_raw2l1/measurement/measurement_constructors.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def from_radiometrics(cls, readin_data, conf_inst=None):
6060
# dimensions and variable names for usage with make_dataset
6161
dims = {'mwr': ['time', 'frequency'],
6262
'aux': ['time']}
63-
vars = {'mwr': ['Tb', 'ele', 'azi', 'quality'],
63+
vars = {'mwr': ['Tb', 'ele', 'azi', 'quality', 'T_amb'],
6464
'aux': ['IRT', 'p', 'T', 'RH', 'rainflag', 'quality']}
6565
vars_opt = {'mwr': [],
6666
'aux': []}
@@ -72,6 +72,9 @@ def from_radiometrics(cls, readin_data, conf_inst=None):
7272
all_data['mwr']['scanflag'] = ('time', flags_here)
7373
data = merge_aux_data(all_data['mwr'], all_data)
7474

75+
# adapt the dimensions for the T_amb variable as only 1 temperature is given for 2 receivers
76+
data['T_amb'] = data['T_amb'].expand_dims(dim={'receiver_nb':2}, axis=1)
77+
7578
data['mfr'] = 'radiometrics' # manufacturer (lowercase)
7679

7780
return cls(data, conf_inst)

mwr_raw2l1/measurement/measurement_qc_helpers.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import ephem
22
import numpy as np
3+
import xarray as xr
34

45
from mwr_raw2l1.errors import UnknownManufacturer
56
from mwr_raw2l1.log import logger
@@ -26,6 +27,11 @@ def check_receiver_sanity(data, channel):
2627
masks_and_checks = [] # collect all output tuples from flag_check here
2728
masks_and_checks.append(flag_check(data, 'channel_quality_ok', 0, channel))
2829
masks_and_checks.append(flag_check(data, 'alarm', 1, channel=None))
30+
masks_and_checks.append(flag_check(data, 'noisediode_ok_hum', 0, channel=None))
31+
masks_and_checks.append(flag_check(data, 'noisediode_ok_temp', 0, channel=None))
32+
masks_and_checks.append(flag_check(data, 'Tstab_ok_hum', 0, channel=None))
33+
masks_and_checks.append(flag_check(data, 'Tstab_ok_temp', 0, channel=None))
34+
masks_and_checks.append(flag_check(data, 'Tstab_ok_amb', 0, channel=None))
2935
# TODO: could add checks for noisediode_ok_hum, noisediode_ok_temp, Tstab_ok_hum, Tstab_ok_temp, Tstab_ok_amb
3036
check_applied_all = [m[1] for m in masks_and_checks]
3137
if any(check_applied_all):
@@ -164,3 +170,73 @@ def flag_check(data, varname, value, channel=None):
164170
else:
165171
logger.info("Cannot apply check for '{}' during quality control as variable does not exist".format(varname))
166172
return None, False
173+
174+
def find_lwcl_from_mwr(data, multiplying_factor=0.075):
175+
"""
176+
This is a copy of the MWRpy function to find liquid water cloud free periods using 31.4 GHz TB variability.
177+
It uses water vapor channel as proxy for a humidity dependent threshold.
178+
179+
For now, it works only for HATPRO instrument as this includes some empirically derived parameters.
180+
181+
Refactored to work directly with xarray data instead of dict
182+
183+
Args:
184+
data: dataset, commonly Measurement.data
185+
multiplying_factor: factor to multiply the threshold with, empirically derived
186+
187+
Returns:
188+
dataset with liquid cloud flag set
189+
"""
190+
# Different frequencies for window and water vapor channels depending on instrument type
191+
freq_win = np.where(
192+
(np.isclose(data["frequency"].values, 31.4, atol=2))
193+
| (np.isclose(data["frequency"].values, 190.8, atol=1))
194+
)[0]
195+
freq_win = np.array([freq_win[0]]) if len(freq_win) > 1 else freq_win
196+
freq_wv = np.where(
197+
(np.isclose(np.round(data["frequency"][:], 1), 22.2))
198+
| (np.isclose(np.round(data["frequency"][:], 1), 183.9))
199+
)[0]
200+
201+
if len(freq_win) == 1 and len(freq_wv) == 1:
202+
tb = data["Tb"].isel(frequency=freq_win)
203+
tb = tb.squeeze(dim='frequency', drop=True)
204+
tb_zenith = tb.where(data["scanflag"] == 0, drop=True).where((data["ele"] > 89.0) & (data["ele"] < 91.0), drop=True)
205+
mean_diff_t = np.nanmean(tb.time.diff(dim='time').dt.seconds)
206+
number_of_samples = 180/mean_diff_t.round() if mean_diff_t < 1.8 else 180/mean_diff_t.round()
207+
# tb_std = tb_df.rolling(
208+
# pd.tseries.frequencies.to_offset(offset), center=True, min_periods=50
209+
# ).std()
210+
tb_std = tb_zenith.rolling(
211+
time=int(number_of_samples), center=True
212+
).std()
213+
number_of_samples = 600/mean_diff_t.round() if mean_diff_t < 1.8 else 600/mean_diff_t.round()
214+
# tb_mx = tb_std.rolling(
215+
# pd.tseries.frequencies.to_offset(offset), center=True, min_periods=100
216+
# ).max()
217+
tb_mx = tb_std.rolling(
218+
time=int(number_of_samples), center=True
219+
).max()
220+
#tb_wv = np.squeeze(ds["tb"][:, freq_wv])
221+
tb_wv = data["Tb"].isel(frequency=freq_wv)
222+
tb_wv = tb_wv.squeeze(dim='frequency', drop=True)
223+
# In order to compute the ratio, we need to get rid of the frequency coordinates
224+
225+
tb_rat = tb_wv / tb
226+
tb_rat = tb_rat.rolling(
227+
time=int(number_of_samples), center=True
228+
).max()
229+
230+
threshold_rat = tb_rat * multiplying_factor
231+
232+
233+
data['liquid_cloud_flag'] = xr.where(
234+
tb_mx < threshold_rat,
235+
0,
236+
1
237+
)
238+
data['liquid_cloud_flag'] = xr.where((data["ele"] > 89.0) & (data["ele"] < 91.0), data['liquid_cloud_flag'], 2)
239+
# also fill nans with 2
240+
data['liquid_cloud_flag'] = data['liquid_cloud_flag'].fillna(2)
241+
242+
return data

mwr_raw2l1/measurement/scan_transform.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from mwr_raw2l1.utils.num_utils import timedelta2s
1010

1111

12-
def scan_endtime_to_time(endtime, n_angles, time_per_angle=17):
12+
def scan_endtime_to_time(endtime, n_angles, time_per_angle=11, from_starttime=False):
1313
"""
1414
RPG and Attex scan files only have one timestamp per scan. This function returns the approximate timestamp for the
1515
observation at each angle
@@ -20,6 +20,8 @@ def scan_endtime_to_time(endtime, n_angles, time_per_angle=17):
2020
n_angles: number of angles per scan.
2121
time_per_angle: total time for scanning one angle incl. integration time and the time for moving the mirror.
2222
Indicated in seconds. The default is 17.
23+
from_starttime: if True, the timestamps will be calculated assuming the provided time is the start time of
24+
the scan, otherwise from the end time. This arise from the change in timestamping operated in HATPRO instruments (TBC)
2325
2426
Returns:
2527
time : :class:`numpy.ndarray` of :class:`datetime.datetime` objects of end times for each observed angle
@@ -33,11 +35,18 @@ def timedelta_method(seconds):
3335
# use ms as timedelta needs int. Will truncate to ms what should also avoid rounding errors in tests
3436
return np.timedelta64(int(seconds*1000), 'ms')
3537

36-
delta = [timedelta_method(n * time_per_angle) for n in reversed(range(n_angles))]
38+
if from_starttime:
39+
delta = [timedelta_method(n * time_per_angle) for n in range(n_angles)]
40+
else:
41+
delta = [timedelta_method(n * time_per_angle) for n in reversed(range(n_angles))]
3742
delta = np.array(delta)
3843

3944
endtime = endtime.reshape(len(endtime), 1) # for letting numpy broadcast along dimension 1
40-
time = endtime - delta # calculate time for each scan position (matrix)
45+
if from_starttime:
46+
time = endtime + delta # calculate time for each scan position (matrix)
47+
else:
48+
time = endtime - delta
49+
4150
time = time.reshape((-1,)) # make one-dimenional vector out of time matrix
4251

4352
return time
@@ -61,6 +70,7 @@ def scantime_from_aux(blb, hkd=None, brt=None):
6170
n_ele = len(blb['scan_ele'].values)
6271

6372
endtime2time_params = dict(endtime=time_scan, n_angles=n_ele)
73+
endtime2time_params['from_starttime'] = True # default assume time in blb is start time of scan
6474
if hkd is not None and 'BLscan_active' in hkd:
6575
time_scan_active = hkd.time[hkd.BLscan_active.values == 1].values
6676
time_zen_active = hkd.time[hkd.BLscan_active.values == 0].values
@@ -71,11 +81,16 @@ def scantime_from_aux(blb, hkd=None, brt=None):
7181
scan_duration = timedelta2s(time_last_scan_active[-1] - time_last_scan_active[0])
7282
endtime2time_params['time_per_angle'] = scan_duration / n_ele
7383
elif time_scan_active[0] > time_zen_active[0]: # sure to have full scan at beginning of hkd
74-
scan_duration = timedelta2s(blb.time[0].values - time_scan_active[0])
84+
scan_duration = timedelta2s(time_scan_active[-1] - time_scan_active[0])
7585
endtime2time_params['time_per_angle'] = scan_duration / n_ele
7686
else:
7787
logger.warning(
7888
'Cannot infer scan duration as first scan might extend to previous period. Using default values')
89+
90+
if np.abs(timedelta2s(time_scan_active[0] - time_scan[0])) > 20 or np.abs(timedelta2s(time_scan_active[-1] - time_scan[-1])) < 20:
91+
endtime2time_params['from_starttime'] = False
92+
logger.info('Assuming that the time in BLB file is the endtime of the scan: TBC')
93+
7994
elif brt is not None:
8095
# less accurate than hkd because things happen before scan starts (e.g. ambload obs).
8196
# Assume after last hkd measure it takes 2x time_per_angle before first scanobs ends.
@@ -86,7 +101,7 @@ def scantime_from_aux(blb, hkd=None, brt=None):
86101
else:
87102
logger.warning(
88103
'Cannot infer scan duration as first scan might extend to previous period. Using default values')
89-
104+
90105
return scan_endtime_to_time(**endtime2time_params)
91106

92107

mwr_raw2l1/readers/reader_radiometrics_helpers.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from mwr_raw2l1.errors import CorruptRectype, EmptyLineError
44
from mwr_raw2l1.readers.reader_helpers import get_time, simplify_header
55

6+
from mwr_raw2l1.log import logger
67

78
def get_data(data_raw, header, no_mwr=False, **kwargs):
89
"""extract all known data from data_raw using header
@@ -17,13 +18,16 @@ def get_data(data_raw, header, no_mwr=False, **kwargs):
1718
"""
1819
data = get_simple_vars(data_raw, header)
1920
try:
20-
data['time'] = get_time(data_raw, header, 'date/time', '%m/%d/%y %H:%M:%S')
21-
except ValueError:
2221
# Radiometrics changed its timestamps format with upgrade to VizMetPro.
2322
# The new format is '%Y/%m/%d %H:%M:%S' instead of '%m/%d/%y %H:%M:%S'.
24-
# This is a workaround to support both formats but a better solution would be to
25-
# add this pattern to the config file.
23+
# TODO: This is a workaround to support both formats but a better solution would be to
24+
# add this pattern to the config file (or get rid of older formats but we can not exclude
25+
# that we integrate an old instrument once).
2626
data['time'] = get_time(data_raw, header, 'date/time', '%Y/%m/%d %H:%M:%S')
27+
except ValueError:
28+
logger.warning('Failed to parse time with new timestamp format %Y/%m/%d %H:%M:%S, trying with old version %m/%d/%y %H:%M:%S')
29+
data['time'] = get_time(data_raw, header, 'date/time', '%m/%d/%y %H:%M:%S')
30+
2731
if not no_mwr:
2832
data['Tb'], data['frequency'] = get_mwr(data_raw, header, **kwargs)
2933
return data

mwr_raw2l1/readers/reader_rpg_helpers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import datetime as dt
55

66
import numpy as np
7+
import pandas as pd
78

89
from mwr_raw2l1.errors import UnknownFlagValue, WrongInputFormat
910

@@ -18,8 +19,8 @@ def interpret_time(time_in):
1819
time_in = np.array([time_in])
1920
scalar_input = True
2021

21-
times = [dt.datetime.utcfromtimestamp(x + posix_offset) for x in time_in]
22-
out = np.array(times)
22+
times = [dt.datetime.fromtimestamp(x + posix_offset, dt.timezone.utc) for x in time_in]
23+
out = pd.to_datetime(times)
2324

2425
if scalar_input:
2526
out = out[0]

0 commit comments

Comments
 (0)