Skip to content

Commit 1f6f5ce

Browse files
authored
Increase test coverage (#68)
- addition of a module to test util/xarray_ops.py - increased test coverage for the util/dataframe_ops.py module
1 parent d91acb2 commit 1f6f5ce

File tree

2 files changed

+350
-1
lines changed

2 files changed

+350
-1
lines changed

tests/util/test_dataframe_ops.py

Lines changed: 247 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,18 @@
44

55
from unittest.mock import patch
66

7+
import numpy as np
78
import pandas as pd
9+
import pytest
810

9-
from util.dataframe_ops import parse_check
11+
from util.dataframe_ops import (
12+
compute_division,
13+
compute_rel_diff_dataframe,
14+
force_monotonic,
15+
parse_check,
16+
parse_probtest_csv,
17+
unify_time_index,
18+
)
1019

1120

1221
@patch("util.dataframe_ops.parse_probtest_csv")
@@ -52,3 +61,240 @@ def test_parse_check(mock_parse_probtest_csv, setup_csv_files):
5261

5362
pd.testing.assert_frame_equal(df_ref, expected_ref)
5463
pd.testing.assert_frame_equal(df_cur, expected_cur)
64+
65+
66+
def test_force_monotonic():
67+
"""
68+
Test that the function modify the dataframe forcing the values of every line
69+
to become non-decreasing monotonic along the columns
70+
"""
71+
# Creation of a DataFrame with MultiIndex on the columns
72+
arrays = [
73+
["var1", "var1", "var2", "var2"],
74+
["mean", "max", "mean", "max"],
75+
]
76+
columns = pd.MultiIndex.from_arrays(arrays)
77+
data = [
78+
[1, 5, 2, 7],
79+
[3, 2, 1, 9],
80+
[2, 8, 5, 4],
81+
]
82+
df = pd.DataFrame(data, columns=columns)
83+
84+
force_monotonic(df)
85+
86+
# Property verification
87+
for stat in df.columns.levels[1]:
88+
sub_df = df.loc[:, (slice(None), stat)]
89+
assert (sub_df.diff(axis=1).fillna(0) >= 0).all().all()
90+
91+
# Comparison with expected dataframe
92+
expected = pd.DataFrame([[1, 5, 2, 7], [3, 2, 3, 9], [2, 8, 5, 8]], columns=columns)
93+
pd.testing.assert_frame_equal(df, expected, check_exact=True)
94+
95+
96+
def test_compute_rel_diff_basic():
97+
"""
98+
Test that the function is giving the expected values with basic numbers
99+
"""
100+
df1 = pd.DataFrame([[1.0, 3.0], [2.0, 4.0]], columns=["A", "B"])
101+
df2 = pd.DataFrame([[1.0, 3.0], [1.0, 5.0]], columns=["A", "B"])
102+
103+
result = compute_rel_diff_dataframe(df1, df2)
104+
expected = pd.DataFrame([[0.0, 0.0], [1.0 / 3.0, 0.2]], columns=["A", "B"])
105+
106+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
107+
108+
109+
def test_compute_rel_diff_with_negatives():
110+
"""
111+
Test that the function is giving the expected values also with negative numbers
112+
"""
113+
df1 = pd.DataFrame([[-1.0, 3.0], [-2.0, -4.0]], columns=["A", "B"])
114+
df2 = pd.DataFrame([[-2.0, 3.0], [-1.0, -5.0]], columns=["A", "B"])
115+
116+
result = compute_rel_diff_dataframe(df1, df2)
117+
expected = pd.DataFrame([[0.5, 0.0], [1.0 / 3.0, 0.2]], columns=["A", "B"])
118+
119+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
120+
121+
122+
def test_compute_rel_diff_with_zeros():
123+
"""
124+
Test that the function is giving the expected values also with zeros in numerator
125+
"""
126+
df1 = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]], columns=["A", "B"])
127+
df2 = pd.DataFrame([[1.0, 2.0], [-1.0, -2.0]], columns=["A", "B"])
128+
129+
result = compute_rel_diff_dataframe(df1, df2)
130+
expected = pd.DataFrame([[1.0, 2.0], [1.0, 2.0]], columns=["A", "B"])
131+
132+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
133+
134+
135+
def test_compute_rel_diff_identical():
136+
"""
137+
Test that the function is giving the expected values aift dataframe are identical
138+
"""
139+
df1 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], columns=["A", "B"])
140+
df2 = df1.copy()
141+
142+
result = compute_rel_diff_dataframe(df1, df2)
143+
144+
assert (result == 0).all().all()
145+
146+
147+
def test_compute_division_basic():
148+
"""
149+
Test that the function is giving the expected values with basic numbers
150+
"""
151+
df1 = pd.DataFrame([[10.0, 20.0], [30.0, 40.0]], columns=["A", "B"])
152+
df2 = pd.DataFrame([[2.0, 4.0], [5.0, 10.0]], columns=["A", "B"])
153+
154+
result = compute_division(df1, df2)
155+
expected = pd.DataFrame([[5.0, 5.0], [6.0, 4.0]], columns=["A", "B"])
156+
157+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
158+
159+
160+
def test_compute_division_with_zero_in_denominator():
161+
"""
162+
Test that the function is giving the expected values also with zeros in denominator
163+
"""
164+
df1 = pd.DataFrame([[10.0, 20.0], [30.0, 40.0]], columns=["A", "B"])
165+
df2 = pd.DataFrame([[0.0, 4.0], [5.0, 0.0]], columns=["A", "B"])
166+
167+
result = compute_division(df1, df2)
168+
expected = pd.DataFrame([[np.nan, 5.0], [6.0, np.nan]], columns=["A", "B"])
169+
170+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
171+
172+
173+
def test_division_with_zero_in_numerator():
174+
"""
175+
Test that the function is giving the expected values also with zeros in numerator
176+
"""
177+
df1 = pd.DataFrame([[0.0, 20.0], [0.0, 40.0]], columns=["A", "B"])
178+
df2 = pd.DataFrame([[2.0, 4.0], [5.0, 10.0]], columns=["A", "B"])
179+
180+
result = compute_division(df1, df2)
181+
expected = pd.DataFrame([[0.0, 5.0], [0.0, 4.0]], columns=["A", "B"])
182+
183+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
184+
185+
186+
def test_division_both_zero():
187+
"""
188+
Check the function in case all values are equal to zero
189+
"""
190+
df1 = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, 0.0]})
191+
df2 = pd.DataFrame({"A": [0.0, 1.0], "B": [2.0, 0.0]})
192+
193+
result = compute_division(df1, df2)
194+
expected = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, 0.0]})
195+
196+
pd.testing.assert_frame_equal(result, expected, check_exact=False)
197+
198+
199+
# Creation of a temporary file for function test
200+
@pytest.fixture(name="sample_csv", scope="function")
201+
def fixture_sample_csv(tmp_path):
202+
csv_content = """col1,col2,3,3,2,2
203+
sub1,sub2,A,B,A,B
204+
a,b,1,3,5,7
205+
d,e,2,4,6,8
206+
"""
207+
file_path = tmp_path / "multi.csv"
208+
file_path.write_text(csv_content)
209+
return file_path
210+
211+
212+
def test_parse_probtest_csv(sample_csv):
213+
"""
214+
Check that the first multiindex of the rows is reversed because
215+
it is not in ascending order
216+
"""
217+
df = parse_probtest_csv(sample_csv, index_col=[0, 1])
218+
219+
expected = pd.DataFrame(
220+
{
221+
(2, "A"): [5, 6],
222+
(2, "B"): [7, 8],
223+
(3, "A"): [1, 2],
224+
(3, "B"): [3, 4],
225+
},
226+
index=pd.MultiIndex.from_tuples(
227+
[("a", "b"), ("d", "e")],
228+
names=["col1", "col2"],
229+
),
230+
)
231+
expected.index.names = df.index.names
232+
expected.columns.names = df.columns.names
233+
234+
pd.testing.assert_frame_equal(df, expected)
235+
236+
237+
# Create a dataframe to test unify_time_index
238+
@pytest.fixture(name="sample_unify_time", scope="module")
239+
def fixture_sample_unify_time():
240+
features = ["A", "B"]
241+
times = [6, 4, 2]
242+
243+
multi_index = pd.MultiIndex.from_product(
244+
[features, times], names=["feature", "time"]
245+
)
246+
247+
data1 = [
248+
[1, 2, 3, 4, 5, 6],
249+
[7, 8, 9, 10, 11, 12],
250+
[13, 14, 15, 16, 17, 18],
251+
[19, 20, 21, 22, 23, 24],
252+
[25, 26, 27, 28, 29, 30],
253+
]
254+
df1 = pd.DataFrame(data1, columns=multi_index)
255+
256+
data2 = [
257+
[101, 102, 103, 104, 105, 106],
258+
[107, 108, 109, 110, 111, 112],
259+
[113, 114, 115, 116, 117, 118],
260+
]
261+
df2 = pd.DataFrame(data2, columns=multi_index)
262+
263+
fid_dfs = [df1, df2]
264+
return fid_dfs
265+
266+
267+
def test_unify_time_index(sample_unify_time):
268+
"""
269+
Test that the function unify column index and put it in
270+
ascending order and starting from 0
271+
"""
272+
result_dfs = unify_time_index(sample_unify_time)
273+
274+
features = ["A", "B"]
275+
times = [0, 1, 2] # same as before but in ascending order and starting from 0
276+
277+
multi_index = pd.MultiIndex.from_product(
278+
[features, times], names=["feature", "time"]
279+
)
280+
281+
data1 = [
282+
[3, 2, 1, 6, 5, 4],
283+
[9, 8, 7, 12, 11, 10],
284+
[15, 14, 13, 18, 17, 16],
285+
[21, 20, 19, 24, 23, 22],
286+
[27, 26, 25, 30, 29, 28],
287+
]
288+
df1 = pd.DataFrame(data1, columns=multi_index)
289+
290+
data2 = [
291+
[103, 102, 101, 106, 105, 104],
292+
[109, 108, 107, 112, 111, 110],
293+
[115, 114, 113, 118, 117, 116],
294+
]
295+
df2 = pd.DataFrame(data2, columns=multi_index)
296+
297+
expected = [df1, df2]
298+
299+
for res, exp in zip(result_dfs, expected):
300+
pd.testing.assert_frame_equal(res, exp)

tests/util/test_xarray_ops.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""
2+
This module contains unit tests for the "xarray_ops.py" module
3+
"""
4+
5+
import sys
6+
7+
import numpy as np
8+
import pytest
9+
import xarray as xr
10+
11+
from util.xarray_ops import statistics_over_horizontal_dim
12+
13+
14+
@pytest.fixture(name="sample_data", scope="module")
15+
def fixture_sample_data():
16+
# Create easy-to-use file for testing
17+
data = np.array([[1, 2, 3], [4, 5, 6]], dtype=float)
18+
da = xr.DataArray(
19+
data,
20+
dims=("x", "y"),
21+
coords={"x": [10, 20], "y": ["a", "b", "c"]},
22+
name="test_var",
23+
)
24+
return da
25+
26+
27+
ALL_STATS = ["mean", "max", "min", "sum", "std", "var", "median", "prod"]
28+
ALL_DIMS = ["x", "y", "x:y"]
29+
30+
31+
@pytest.mark.parametrize("stat", ALL_STATS)
32+
@pytest.mark.parametrize("dims", ALL_DIMS)
33+
def test_statistics_against_manual(sample_data, stat, dims):
34+
result = statistics_over_horizontal_dim(sample_data, [dims], [stat])
35+
values = [r.values for r in result]
36+
numbers = values[0].tolist()
37+
38+
data = sample_data
39+
40+
expected_values = {
41+
"mean": {"x": [2.5, 3.5, 4.5], "y": [2, 5], "x:y": [3.5]},
42+
"max": {"x": [4, 5, 6], "y": [3, 6], "x:y": [6]},
43+
"min": {"x": [1, 2, 3], "y": [1, 4], "x:y": [1]},
44+
"sum": {"x": [5, 7, 9], "y": [6, 15], "x:y": [21]},
45+
"std": {
46+
"x": [1.5, 1.5, 1.5],
47+
"y": [0.816496580927726, 0.816496580927726],
48+
"x:y": [np.std(data)],
49+
},
50+
"var": {
51+
"x": [2.25, 2.25, 2.25],
52+
"y": [0.6666666666666666, 0.6666666666666666],
53+
"x:y": [np.var(data)],
54+
},
55+
"median": {"x": [2.5, 3.5, 4.5], "y": [2, 5], "x:y": [3.5]},
56+
"prod": {"x": [4, 10, 18], "y": [6, 120], "x:y": [720]},
57+
}
58+
59+
expected = expected_values[stat][dims]
60+
61+
np.testing.assert_allclose(numbers, expected)
62+
63+
64+
def test_statistics_with_fill(sample_data):
65+
"""
66+
Test that the fill value mask is applied correctly
67+
"""
68+
# Add a fake _FillValue attribute and inject fill values
69+
data = sample_data.copy()
70+
data.attrs["_FillValue"] = -999
71+
data = data.where(~np.isnan(data), other=-999)
72+
73+
result = statistics_over_horizontal_dim(
74+
data, [data.dims[0]], ["mean", "sum"], fill_value_key="_FillValue"
75+
)
76+
77+
expected_mean = data.where(data != -999).mean(dim=data.dims[0], skipna=True)
78+
expected_sum = data.where(data != -999).sum(dim=data.dims[0], skipna=True)
79+
80+
assert np.allclose(result[0], expected_mean)
81+
assert np.allclose(result[1], expected_sum)
82+
83+
84+
def test_no_matching_dimension(sample_data, monkeypatch):
85+
"""
86+
Test that sys.exit(1) is called in case the dimension name doesn't exist
87+
"""
88+
89+
monkeypatch.setattr(
90+
sys, "exit", lambda code: (_ for _ in ()).throw(SystemExit(code))
91+
)
92+
with pytest.raises(SystemExit):
93+
statistics_over_horizontal_dim(sample_data, ["nonexistent_dim"], ["mean"])
94+
95+
96+
def test_invalid_statistic(sample_data):
97+
"""
98+
Test that statistics that does not exist can not be computed
99+
"""
100+
101+
dim = sample_data.dims[0]
102+
with pytest.raises(AttributeError):
103+
statistics_over_horizontal_dim(sample_data, [dim], ["not_a_stat"])

0 commit comments

Comments
 (0)