|
4 | 4 |
|
5 | 5 | from unittest.mock import patch |
6 | 6 |
|
| 7 | +import numpy as np |
7 | 8 | import pandas as pd |
| 9 | +import pytest |
8 | 10 |
|
9 | | -from util.dataframe_ops import parse_check |
| 11 | +from util.dataframe_ops import ( |
| 12 | + compute_division, |
| 13 | + compute_rel_diff_dataframe, |
| 14 | + force_monotonic, |
| 15 | + parse_check, |
| 16 | + parse_probtest_csv, |
| 17 | + unify_time_index, |
| 18 | +) |
10 | 19 |
|
11 | 20 |
|
12 | 21 | @patch("util.dataframe_ops.parse_probtest_csv") |
@@ -52,3 +61,240 @@ def test_parse_check(mock_parse_probtest_csv, setup_csv_files): |
52 | 61 |
|
53 | 62 | pd.testing.assert_frame_equal(df_ref, expected_ref) |
54 | 63 | pd.testing.assert_frame_equal(df_cur, expected_cur) |
| 64 | + |
| 65 | + |
| 66 | +def test_force_monotonic(): |
| 67 | + """ |
| 68 | + Test that the function modify the dataframe forcing the values of every line |
| 69 | + to become non-decreasing monotonic along the columns |
| 70 | + """ |
| 71 | + # Creation of a DataFrame with MultiIndex on the columns |
| 72 | + arrays = [ |
| 73 | + ["var1", "var1", "var2", "var2"], |
| 74 | + ["mean", "max", "mean", "max"], |
| 75 | + ] |
| 76 | + columns = pd.MultiIndex.from_arrays(arrays) |
| 77 | + data = [ |
| 78 | + [1, 5, 2, 7], |
| 79 | + [3, 2, 1, 9], |
| 80 | + [2, 8, 5, 4], |
| 81 | + ] |
| 82 | + df = pd.DataFrame(data, columns=columns) |
| 83 | + |
| 84 | + force_monotonic(df) |
| 85 | + |
| 86 | + # Property verification |
| 87 | + for stat in df.columns.levels[1]: |
| 88 | + sub_df = df.loc[:, (slice(None), stat)] |
| 89 | + assert (sub_df.diff(axis=1).fillna(0) >= 0).all().all() |
| 90 | + |
| 91 | + # Comparison with expected dataframe |
| 92 | + expected = pd.DataFrame([[1, 5, 2, 7], [3, 2, 3, 9], [2, 8, 5, 8]], columns=columns) |
| 93 | + pd.testing.assert_frame_equal(df, expected, check_exact=True) |
| 94 | + |
| 95 | + |
| 96 | +def test_compute_rel_diff_basic(): |
| 97 | + """ |
| 98 | + Test that the function is giving the expected values with basic numbers |
| 99 | + """ |
| 100 | + df1 = pd.DataFrame([[1.0, 3.0], [2.0, 4.0]], columns=["A", "B"]) |
| 101 | + df2 = pd.DataFrame([[1.0, 3.0], [1.0, 5.0]], columns=["A", "B"]) |
| 102 | + |
| 103 | + result = compute_rel_diff_dataframe(df1, df2) |
| 104 | + expected = pd.DataFrame([[0.0, 0.0], [1.0 / 3.0, 0.2]], columns=["A", "B"]) |
| 105 | + |
| 106 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 107 | + |
| 108 | + |
| 109 | +def test_compute_rel_diff_with_negatives(): |
| 110 | + """ |
| 111 | + Test that the function is giving the expected values also with negative numbers |
| 112 | + """ |
| 113 | + df1 = pd.DataFrame([[-1.0, 3.0], [-2.0, -4.0]], columns=["A", "B"]) |
| 114 | + df2 = pd.DataFrame([[-2.0, 3.0], [-1.0, -5.0]], columns=["A", "B"]) |
| 115 | + |
| 116 | + result = compute_rel_diff_dataframe(df1, df2) |
| 117 | + expected = pd.DataFrame([[0.5, 0.0], [1.0 / 3.0, 0.2]], columns=["A", "B"]) |
| 118 | + |
| 119 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 120 | + |
| 121 | + |
| 122 | +def test_compute_rel_diff_with_zeros(): |
| 123 | + """ |
| 124 | + Test that the function is giving the expected values also with zeros in numerator |
| 125 | + """ |
| 126 | + df1 = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]], columns=["A", "B"]) |
| 127 | + df2 = pd.DataFrame([[1.0, 2.0], [-1.0, -2.0]], columns=["A", "B"]) |
| 128 | + |
| 129 | + result = compute_rel_diff_dataframe(df1, df2) |
| 130 | + expected = pd.DataFrame([[1.0, 2.0], [1.0, 2.0]], columns=["A", "B"]) |
| 131 | + |
| 132 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 133 | + |
| 134 | + |
| 135 | +def test_compute_rel_diff_identical(): |
| 136 | + """ |
| 137 | + Test that the function is giving the expected values aift dataframe are identical |
| 138 | + """ |
| 139 | + df1 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], columns=["A", "B"]) |
| 140 | + df2 = df1.copy() |
| 141 | + |
| 142 | + result = compute_rel_diff_dataframe(df1, df2) |
| 143 | + |
| 144 | + assert (result == 0).all().all() |
| 145 | + |
| 146 | + |
| 147 | +def test_compute_division_basic(): |
| 148 | + """ |
| 149 | + Test that the function is giving the expected values with basic numbers |
| 150 | + """ |
| 151 | + df1 = pd.DataFrame([[10.0, 20.0], [30.0, 40.0]], columns=["A", "B"]) |
| 152 | + df2 = pd.DataFrame([[2.0, 4.0], [5.0, 10.0]], columns=["A", "B"]) |
| 153 | + |
| 154 | + result = compute_division(df1, df2) |
| 155 | + expected = pd.DataFrame([[5.0, 5.0], [6.0, 4.0]], columns=["A", "B"]) |
| 156 | + |
| 157 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 158 | + |
| 159 | + |
| 160 | +def test_compute_division_with_zero_in_denominator(): |
| 161 | + """ |
| 162 | + Test that the function is giving the expected values also with zeros in denominator |
| 163 | + """ |
| 164 | + df1 = pd.DataFrame([[10.0, 20.0], [30.0, 40.0]], columns=["A", "B"]) |
| 165 | + df2 = pd.DataFrame([[0.0, 4.0], [5.0, 0.0]], columns=["A", "B"]) |
| 166 | + |
| 167 | + result = compute_division(df1, df2) |
| 168 | + expected = pd.DataFrame([[np.nan, 5.0], [6.0, np.nan]], columns=["A", "B"]) |
| 169 | + |
| 170 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 171 | + |
| 172 | + |
| 173 | +def test_division_with_zero_in_numerator(): |
| 174 | + """ |
| 175 | + Test that the function is giving the expected values also with zeros in numerator |
| 176 | + """ |
| 177 | + df1 = pd.DataFrame([[0.0, 20.0], [0.0, 40.0]], columns=["A", "B"]) |
| 178 | + df2 = pd.DataFrame([[2.0, 4.0], [5.0, 10.0]], columns=["A", "B"]) |
| 179 | + |
| 180 | + result = compute_division(df1, df2) |
| 181 | + expected = pd.DataFrame([[0.0, 5.0], [0.0, 4.0]], columns=["A", "B"]) |
| 182 | + |
| 183 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 184 | + |
| 185 | + |
| 186 | +def test_division_both_zero(): |
| 187 | + """ |
| 188 | + Check the function in case all values are equal to zero |
| 189 | + """ |
| 190 | + df1 = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, 0.0]}) |
| 191 | + df2 = pd.DataFrame({"A": [0.0, 1.0], "B": [2.0, 0.0]}) |
| 192 | + |
| 193 | + result = compute_division(df1, df2) |
| 194 | + expected = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, 0.0]}) |
| 195 | + |
| 196 | + pd.testing.assert_frame_equal(result, expected, check_exact=False) |
| 197 | + |
| 198 | + |
| 199 | +# Creation of a temporary file for function test |
| 200 | +@pytest.fixture(name="sample_csv", scope="function") |
| 201 | +def fixture_sample_csv(tmp_path): |
| 202 | + csv_content = """col1,col2,3,3,2,2 |
| 203 | +sub1,sub2,A,B,A,B |
| 204 | +a,b,1,3,5,7 |
| 205 | +d,e,2,4,6,8 |
| 206 | +""" |
| 207 | + file_path = tmp_path / "multi.csv" |
| 208 | + file_path.write_text(csv_content) |
| 209 | + return file_path |
| 210 | + |
| 211 | + |
| 212 | +def test_parse_probtest_csv(sample_csv): |
| 213 | + """ |
| 214 | + Check that the first multiindex of the rows is reversed because |
| 215 | + it is not in ascending order |
| 216 | + """ |
| 217 | + df = parse_probtest_csv(sample_csv, index_col=[0, 1]) |
| 218 | + |
| 219 | + expected = pd.DataFrame( |
| 220 | + { |
| 221 | + (2, "A"): [5, 6], |
| 222 | + (2, "B"): [7, 8], |
| 223 | + (3, "A"): [1, 2], |
| 224 | + (3, "B"): [3, 4], |
| 225 | + }, |
| 226 | + index=pd.MultiIndex.from_tuples( |
| 227 | + [("a", "b"), ("d", "e")], |
| 228 | + names=["col1", "col2"], |
| 229 | + ), |
| 230 | + ) |
| 231 | + expected.index.names = df.index.names |
| 232 | + expected.columns.names = df.columns.names |
| 233 | + |
| 234 | + pd.testing.assert_frame_equal(df, expected) |
| 235 | + |
| 236 | + |
| 237 | +# Create a dataframe to test unify_time_index |
| 238 | +@pytest.fixture(name="sample_unify_time", scope="module") |
| 239 | +def fixture_sample_unify_time(): |
| 240 | + features = ["A", "B"] |
| 241 | + times = [6, 4, 2] |
| 242 | + |
| 243 | + multi_index = pd.MultiIndex.from_product( |
| 244 | + [features, times], names=["feature", "time"] |
| 245 | + ) |
| 246 | + |
| 247 | + data1 = [ |
| 248 | + [1, 2, 3, 4, 5, 6], |
| 249 | + [7, 8, 9, 10, 11, 12], |
| 250 | + [13, 14, 15, 16, 17, 18], |
| 251 | + [19, 20, 21, 22, 23, 24], |
| 252 | + [25, 26, 27, 28, 29, 30], |
| 253 | + ] |
| 254 | + df1 = pd.DataFrame(data1, columns=multi_index) |
| 255 | + |
| 256 | + data2 = [ |
| 257 | + [101, 102, 103, 104, 105, 106], |
| 258 | + [107, 108, 109, 110, 111, 112], |
| 259 | + [113, 114, 115, 116, 117, 118], |
| 260 | + ] |
| 261 | + df2 = pd.DataFrame(data2, columns=multi_index) |
| 262 | + |
| 263 | + fid_dfs = [df1, df2] |
| 264 | + return fid_dfs |
| 265 | + |
| 266 | + |
| 267 | +def test_unify_time_index(sample_unify_time): |
| 268 | + """ |
| 269 | + Test that the function unify column index and put it in |
| 270 | + ascending order and starting from 0 |
| 271 | + """ |
| 272 | + result_dfs = unify_time_index(sample_unify_time) |
| 273 | + |
| 274 | + features = ["A", "B"] |
| 275 | + times = [0, 1, 2] # same as before but in ascending order and starting from 0 |
| 276 | + |
| 277 | + multi_index = pd.MultiIndex.from_product( |
| 278 | + [features, times], names=["feature", "time"] |
| 279 | + ) |
| 280 | + |
| 281 | + data1 = [ |
| 282 | + [3, 2, 1, 6, 5, 4], |
| 283 | + [9, 8, 7, 12, 11, 10], |
| 284 | + [15, 14, 13, 18, 17, 16], |
| 285 | + [21, 20, 19, 24, 23, 22], |
| 286 | + [27, 26, 25, 30, 29, 28], |
| 287 | + ] |
| 288 | + df1 = pd.DataFrame(data1, columns=multi_index) |
| 289 | + |
| 290 | + data2 = [ |
| 291 | + [103, 102, 101, 106, 105, 104], |
| 292 | + [109, 108, 107, 112, 111, 110], |
| 293 | + [115, 114, 113, 118, 117, 116], |
| 294 | + ] |
| 295 | + df2 = pd.DataFrame(data2, columns=multi_index) |
| 296 | + |
| 297 | + expected = [df1, df2] |
| 298 | + |
| 299 | + for res, exp in zip(result_dfs, expected): |
| 300 | + pd.testing.assert_frame_equal(res, exp) |
0 commit comments