1
- var_missing (array; val = - 9999 ) = array == val
1
+ """
2
+ var_missing(x; val = -9999)
3
+
4
+ A function that checks if the value of `x` is equal to
5
+ -9999, which is the value that Fluxnet uses for missing
6
+ data. Returns true if x == -9999.
7
+ """
8
+ var_missing (x; val = - 9999 ) = x == val
2
9
10
+ """
11
+ mask_data(t, v; val = -9999)
12
+
13
+ Returns t[id], v[id], where id corresponds
14
+ to the indices where v is not equal `val`,
15
+ which indicates missing data.
16
+
17
+ Note that `v` can either be a 1d array or a 2d array;
18
+ in the latter case, id refers to rows where
19
+ none of the columns of `v` are missing. The returned
20
+ array is 2d: v[id, :]
21
+ """
3
22
function mask_data (t, v; val = - 9999 )
4
23
if ndims (v) == 1
5
- not_missing_mask = .~ var_missing .(v)
24
+ not_missing_mask = .~ var_missing .(v; val )
6
25
return t[not_missing_mask], v[not_missing_mask]
7
26
elseif ndims (v) == 2
8
- not_missing = .~ var_missing .(v)
27
+ not_missing = .~ var_missing .(v; val )
9
28
not_missing_mask = all (not_missing, dims = 2 )[:]
10
29
return t[not_missing_mask], v[not_missing_mask, :]
11
30
else
@@ -14,74 +33,157 @@ function mask_data(t, v; val = -9999)
14
33
end
15
34
16
35
"""
36
+ time_varying_input_from_data(
37
+ driver_data,
38
+ varname::String,
39
+ column_name_map,
40
+ time_in_seconds::Vector;
41
+ preprocess_func = identity,
42
+ val=-9999,
43
+ )
44
+
45
+ Returns the TimeVaryingInput object corresponding
46
+ to `varname` in the driver_data matrix, the `column_name_map`
47
+ should map between varname and column id, and the `time_in_seconds`
48
+ should be the timestamp in seconds relative to the start_date of the
49
+ simulation corresponding to each row of driver_data.
50
+
51
+ If you need to preprocess the data (e.g., unit conversion), you must pass
52
+ a pointwise function preprocess_func(var) as a keyword argument.
53
+
54
+ Note that this function handles missing data by removing it (assuming it is marked by missing by a given value equal to `val`), because the
55
+ TimeVaryingInput object is an interpolating object (in time).
17
56
"""
18
57
function time_varying_input_from_data (
19
58
driver_data,
20
59
varname:: String ,
21
60
column_name_map,
22
61
time_in_seconds:: Vector ;
23
- skip_header = true ,
24
62
preprocess_func = identity,
63
+ val = - 9999
25
64
)
26
- first_row = skip_header ? 2 : 1
27
- var_data = driver_data[first_row: end , column_name_map[varname]]
65
+ var_data = driver_data[:, column_name_map[varname]]
28
66
# The time varying input object interpolates over gaps
29
67
# as needed, so we remove data that is marked as missing here
30
- t, v = mask_data (time_in_seconds, var_data)
31
- return TimeVaryingInput (Float64 .(t), Float64 .( preprocess_func .(v) ))
68
+ t, v = mask_data (time_in_seconds, var_data; val )
69
+ return TimeVaryingInput (t, preprocess_func .(v))
32
70
end
33
71
34
72
"""
73
+ time_varying_input_from_data(
74
+ driver_data,
75
+ varnames::Vector{String},
76
+ column_name_map,
77
+ time_in_seconds::Vector,
78
+ preprocess_func,
79
+ val=-9999,
80
+ )
81
+
82
+ Returns a TimeVaryingInput object which is computing using
83
+ `preprocess_func` as a pointwise function of -in order - the columns in
84
+ `driver_data` specified by `varnames`.
85
+
86
+ For example, if you wish to compute specific humidity from temperature,
87
+ pressure, and vpd, you would do:
88
+ varnames = ["TA_F", "PA_F", "VPD_F"] along with a preprocess function of
89
+ the form
90
+ function preprocess_func(T,P,VPD)
91
+ # carries out unit conversion
92
+ # computes q from T, P, VPD
93
+ #returns q
94
+ end
95
+
96
+ The `column_name_map`
97
+ should map between varname and column id, and the `time_in_seconds`
98
+ should be the timestamp in seconds relative to the start_date of the
99
+ simulation corresponding to each row of driver_data.
100
+
101
+ Note that this function handles missing data by removing it (assuming it is marked by missing by a given value equal to `val`), because the
102
+ TimeVaryingInput object is an interpolating object (in time).
35
103
"""
36
104
function time_varying_input_from_data (
37
105
driver_data,
38
106
varnames:: Vector{String} ,
39
107
column_name_map,
40
108
time_in_seconds:: Vector ;
41
- skip_header = true ,
42
109
preprocess_func = identity,
43
- )
44
- first_row = skip_header ? 2 : 1
110
+ val = - 9999
111
+ )
45
112
var_ids = [column_name_map[varname] for varname in varnames]
46
- var_data = driver_data[first_row : end , var_ids]
113
+ var_data = driver_data[: , var_ids]
47
114
# The time varying input object interpolates over gaps
48
115
# as needed, so we remove data that is marked as missing here
49
- t, v = mask_data (time_in_seconds, var_data)
116
+ t, v = mask_data (time_in_seconds, var_data; val )
50
117
return TimeVaryingInput (
51
- Float64 .(t) ,
52
- Float64 .( preprocess_func .(eachcol (v)... ) ),
118
+ t ,
119
+ preprocess_func .(eachcol (v)... ),
53
120
)
54
121
end
55
122
56
123
"""
124
+ get_data_at_start_date(
125
+ v::Vector,
126
+ Δ_date::Vector;
127
+ preprocess_func = identity,
128
+ val = -9999,
129
+ )
130
+
131
+ Returns the value in the raw data `v` closest to where
132
+ |Δ_date| = 0, after preprocessing the data using preprocess_func
133
+ (a pointwise function) and removing missing values.
134
+
135
+ If Δ_date corresponds to a vector of dates relative to the start_date
136
+ of the simulation, the returned value can be used as an initial condition.
57
137
"""
58
138
function get_data_at_start_date (
59
139
v:: Vector ,
60
140
Δ_date:: Vector ;
61
- skip_header = true ,
62
141
preprocess_func = identity,
142
+ val = - 9999
63
143
)
64
- Δ_date, v = mask_data (Δ_date, v)
144
+ Δ_date, v = mask_data (Δ_date, v; val )
65
145
idx_start = argmin (abs .(Δ_date))
66
146
return preprocess_func (v[idx_start])
67
147
end
68
148
149
+ """
150
+ get_comparison_data(
151
+ driver_data,
152
+ varname,
153
+ column_name_map;
154
+ preprocess_func = identity,
155
+
156
+ Gets and returns the a NamedTuple with the data identified
157
+ by `varname` in the `driver_data` matrix by looking up the
158
+ column index of varname
159
+ using the column_name_map, replacing missing data with the mean
160
+ of the non-missing data, and preprocessing the data using the
161
+ `preprocess_func`, which should be a pointwise function.
162
+
163
+ The NamedTuple has two keys: absent, and values. If the data column
164
+ is missing completely, the value of absent is true, and no values
165
+ are returned. If the data column is present, absent is set to false,
166
+ and the data is returned with the key values.
167
+
168
+ In the future, we can explore dropping the missing values to be
169
+ consistent with what we do above, but this is consistent with the
170
+ current Fluxnet runs.
171
+ )
172
+ """
69
173
function get_comparison_data (
70
174
driver_data,
71
175
varname,
72
176
column_name_map;
73
- skip_header = true ,
74
177
preprocess_func = identity,
75
178
)
76
- first_row = skip_header ? 2 : 1
77
179
idx = column_name_map[varname]
78
- if idx isa Nothing
180
+ if isnothing ( idx)
79
181
return (; absent = true )
80
182
else
81
- v = driver_data[first_row : end , idx]
183
+ v = driver_data[: , idx]
82
184
missing_mask = var_missing .(v)
83
185
not_missing_mask = .~ missing_mask
84
186
v[missing_mask] .= sum (v[not_missing_mask]) / sum (not_missing_mask)
85
- return (; absent = false , values = Float64 .( preprocess_func .(v) ))
187
+ return (; absent = false , values = preprocess_func .(v))
86
188
end
87
189
end
0 commit comments