Skip to content

Commit 69af2b9

Browse files
committed
review comments [skip ci]
1 parent a581f57 commit 69af2b9

File tree

5 files changed

+158
-67
lines changed

5 files changed

+158
-67
lines changed

Project.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,14 @@ ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
3131
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
3232
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
3333
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
34-
Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8"
3534
GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6"
3635
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
3736
Poppler_jll = "9c32591e-4766-534b-9725-b71a8799265b"
3837
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
3938
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
4039

4140
[extensions]
42-
FluxnetSimulations = ["DelimitedFiles", "Format"]
41+
FluxnetSimulations = ["DelimitedFiles",]
4342
LandSimulationVisualization = ["CairoMakie", "ClimaAnalysis", "GeoMakie", "Poppler_jll", "Printf", "StatsBase"]
4443
NeuralSnowExt = ["CSV", "DataFrames", "HTTP", "Flux", "StatsBase", "BSON"]
4544

@@ -59,7 +58,6 @@ Dates = "1"
5958
DelimitedFiles = "1"
6059
DocStringExtensions = "0.9"
6160
Flux = "0.15, 0.16"
62-
Format = "1"
6361
GeoMakie = "< 0.7.13"
6462
HTTP = "1.10"
6563
Insolation = "0.9.2"

experiments/integrated/fluxnet/run_fluxnet.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import ClimaUtilities.OutputPathGenerator: generate_output_path
2222
using ClimaDiagnostics
2323
using ClimaUtilities
2424
using DelimitedFiles
25-
using Format
2625
FluxnetSimulations =
2726
Base.get_extension(ClimaLand, :FluxnetSimulations).FluxnetSimulations;
2827

@@ -500,7 +499,7 @@ lines!(
500499
label = "Ice, 1.25cm",
501500
)
502501

503-
if comparison_data.SWC.absent == false
502+
if !comparison_data.SWC.absent
504503
lines!(
505504
ax1,
506505
data_times ./ 3600 ./ 24,
@@ -562,7 +561,7 @@ lines!(
562561
color = "blue",
563562
)
564563

565-
if comparison_data.TS.absent == false
564+
if !comparison_data.TS.absent
566565
lines!(
567566
ax12,
568567
data_times ./ 3600 ./ 24,

ext/FluxnetSimulations.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import ClimaUtilities.TimeManager: ITime, date
55
using Thermodynamics
66
using Dates
77
using DelimitedFiles
8-
using Format
98
using DocStringExtensions
109
using Insolation
1110
import ClimaLand.Parameters as LP

ext/fluxnet_sims/data_processing.jl

Lines changed: 124 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,30 @@
1-
var_missing(array; val = -9999) = array == val
1+
"""
2+
var_missing(x; val = -9999)
3+
4+
A function that checks if the value of `x` is equal to
5+
-9999, which is the value that Fluxnet uses for missing
6+
data. Returns true if x == -9999.
7+
"""
8+
var_missing(x; val = -9999) = x == val
29

10+
"""
11+
mask_data(t, v; val = -9999)
12+
13+
Returns t[id], v[id], where id corresponds
14+
to the indices where v is not equal `val`,
15+
which indicates missing data.
16+
17+
Note that `v` can either be a 1d array or a 2d array;
18+
in the latter case, id refers to rows where
19+
none of the columns of `v` are missing. The returned
20+
array is 2d: v[id, :]
21+
"""
322
function mask_data(t, v; val = -9999)
423
if ndims(v) == 1
5-
not_missing_mask = .~var_missing.(v)
24+
not_missing_mask = .~var_missing.(v; val)
625
return t[not_missing_mask], v[not_missing_mask]
726
elseif ndims(v) == 2
8-
not_missing = .~var_missing.(v)
27+
not_missing = .~var_missing.(v; val)
928
not_missing_mask = all(not_missing, dims = 2)[:]
1029
return t[not_missing_mask], v[not_missing_mask, :]
1130
else
@@ -14,74 +33,157 @@ function mask_data(t, v; val = -9999)
1433
end
1534

1635
"""
36+
time_varying_input_from_data(
37+
driver_data,
38+
varname::String,
39+
column_name_map,
40+
time_in_seconds::Vector;
41+
preprocess_func = identity,
42+
val=-9999,
43+
)
44+
45+
Returns the TimeVaryingInput object corresponding
46+
to `varname` in the driver_data matrix, the `column_name_map`
47+
should map between varname and column id, and the `time_in_seconds`
48+
should be the timestamp in seconds relative to the start_date of the
49+
simulation corresponding to each row of driver_data.
50+
51+
If you need to preprocess the data (e.g., unit conversion), you must pass
52+
a pointwise function preprocess_func(var) as a keyword argument.
53+
54+
Note that this function handles missing data by removing it (assuming it is marked by missing by a given value equal to `val`), because the
55+
TimeVaryingInput object is an interpolating object (in time).
1756
"""
1857
function time_varying_input_from_data(
1958
driver_data,
2059
varname::String,
2160
column_name_map,
2261
time_in_seconds::Vector;
23-
skip_header = true,
2462
preprocess_func = identity,
63+
val = -9999
2564
)
26-
first_row = skip_header ? 2 : 1
27-
var_data = driver_data[first_row:end, column_name_map[varname]]
65+
var_data = driver_data[:, column_name_map[varname]]
2866
# The time varying input object interpolates over gaps
2967
# as needed, so we remove data that is marked as missing here
30-
t, v = mask_data(time_in_seconds, var_data)
31-
return TimeVaryingInput(Float64.(t), Float64.(preprocess_func.(v)))
68+
t, v = mask_data(time_in_seconds, var_data; val)
69+
return TimeVaryingInput(t, preprocess_func.(v))
3270
end
3371

3472
"""
73+
time_varying_input_from_data(
74+
driver_data,
75+
varnames::Vector{String},
76+
column_name_map,
77+
time_in_seconds::Vector,
78+
preprocess_func,
79+
val=-9999,
80+
)
81+
82+
Returns a TimeVaryingInput object which is computing using
83+
`preprocess_func` as a pointwise function of -in order - the columns in
84+
`driver_data` specified by `varnames`.
85+
86+
For example, if you wish to compute specific humidity from temperature,
87+
pressure, and vpd, you would do:
88+
varnames = ["TA_F", "PA_F", "VPD_F"] along with a preprocess function of
89+
the form
90+
function preprocess_func(T,P,VPD)
91+
# carries out unit conversion
92+
# computes q from T, P, VPD
93+
#returns q
94+
end
95+
96+
The `column_name_map`
97+
should map between varname and column id, and the `time_in_seconds`
98+
should be the timestamp in seconds relative to the start_date of the
99+
simulation corresponding to each row of driver_data.
100+
101+
Note that this function handles missing data by removing it (assuming it is marked by missing by a given value equal to `val`), because the
102+
TimeVaryingInput object is an interpolating object (in time).
35103
"""
36104
function time_varying_input_from_data(
37105
driver_data,
38106
varnames::Vector{String},
39107
column_name_map,
40108
time_in_seconds::Vector;
41-
skip_header = true,
42109
preprocess_func = identity,
43-
)
44-
first_row = skip_header ? 2 : 1
110+
val = -9999
111+
)
45112
var_ids = [column_name_map[varname] for varname in varnames]
46-
var_data = driver_data[first_row:end, var_ids]
113+
var_data = driver_data[:, var_ids]
47114
# The time varying input object interpolates over gaps
48115
# as needed, so we remove data that is marked as missing here
49-
t, v = mask_data(time_in_seconds, var_data)
116+
t, v = mask_data(time_in_seconds, var_data; val)
50117
return TimeVaryingInput(
51-
Float64.(t),
52-
Float64.(preprocess_func.(eachcol(v)...)),
118+
t,
119+
preprocess_func.(eachcol(v)...),
53120
)
54121
end
55122

56123
"""
124+
get_data_at_start_date(
125+
v::Vector,
126+
Δ_date::Vector;
127+
preprocess_func = identity,
128+
val = -9999,
129+
)
130+
131+
Returns the value in the raw data `v` closest to where
132+
|Δ_date| = 0, after preprocessing the data using preprocess_func
133+
(a pointwise function) and removing missing values.
134+
135+
If Δ_date corresponds to a vector of dates relative to the start_date
136+
of the simulation, the returned value can be used as an initial condition.
57137
"""
58138
function get_data_at_start_date(
59139
v::Vector,
60140
Δ_date::Vector;
61-
skip_header = true,
62141
preprocess_func = identity,
142+
val = -9999
63143
)
64-
Δ_date, v = mask_data(Δ_date, v)
144+
Δ_date, v = mask_data(Δ_date, v; val)
65145
idx_start = argmin(abs.(Δ_date))
66146
return preprocess_func(v[idx_start])
67147
end
68148

149+
"""
150+
get_comparison_data(
151+
driver_data,
152+
varname,
153+
column_name_map;
154+
preprocess_func = identity,
155+
156+
Gets and returns the a NamedTuple with the data identified
157+
by `varname` in the `driver_data` matrix by looking up the
158+
column index of varname
159+
using the column_name_map, replacing missing data with the mean
160+
of the non-missing data, and preprocessing the data using the
161+
`preprocess_func`, which should be a pointwise function.
162+
163+
The NamedTuple has two keys: absent, and values. If the data column
164+
is missing completely, the value of absent is true, and no values
165+
are returned. If the data column is present, absent is set to false,
166+
and the data is returned with the key values.
167+
168+
In the future, we can explore dropping the missing values to be
169+
consistent with what we do above, but this is consistent with the
170+
current Fluxnet runs.
171+
)
172+
"""
69173
function get_comparison_data(
70174
driver_data,
71175
varname,
72176
column_name_map;
73-
skip_header = true,
74177
preprocess_func = identity,
75178
)
76-
first_row = skip_header ? 2 : 1
77179
idx = column_name_map[varname]
78-
if idx isa Nothing
180+
if isnothing(idx)
79181
return (; absent = true)
80182
else
81-
v = driver_data[first_row:end, idx]
183+
v = driver_data[:, idx]
82184
missing_mask = var_missing.(v)
83185
not_missing_mask = .~missing_mask
84186
v[missing_mask] .= sum(v[not_missing_mask]) / sum(not_missing_mask)
85-
return (; absent = false, values = Float64.(preprocess_func.(v)))
187+
return (; absent = false, values = preprocess_func.(v))
86188
end
87189
end

0 commit comments

Comments
 (0)