Increase test coverage (#68)

cghielmini · web-flow · commit 1f6f5cec0f05 · 2025-09-17T09:37:21.000+02:00
- addition of a module to test util/xarray_ops.py
- increased test coverage for the util/dataframe_ops.py module
diff --git a/tests/util/test_dataframe_ops.py b/tests/util/test_dataframe_ops.py
@@ -4,9 +4,18 @@
 
 from unittest.mock import patch
 
+import numpy as np
 import pandas as pd
+import pytest
 
-from util.dataframe_ops import parse_check
+from util.dataframe_ops import (
+    compute_division,
+    compute_rel_diff_dataframe,
+    force_monotonic,
+    parse_check,
+    parse_probtest_csv,
+    unify_time_index,
+)
 
 
 @patch("util.dataframe_ops.parse_probtest_csv")
@@ -52,3 +61,240 @@ def test_parse_check(mock_parse_probtest_csv, setup_csv_files):
 
     pd.testing.assert_frame_equal(df_ref, expected_ref)
     pd.testing.assert_frame_equal(df_cur, expected_cur)
+
+
+def test_force_monotonic():
+    """
+    Test that the function modify the dataframe forcing the values of every line
+    to become non-decreasing monotonic along the columns
+    """
+    # Creation of a DataFrame with MultiIndex on the columns
+    arrays = [
+        ["var1", "var1", "var2", "var2"],
+        ["mean", "max", "mean", "max"],
+    ]
+    columns = pd.MultiIndex.from_arrays(arrays)
+    data = [
+        [1, 5, 2, 7],
+        [3, 2, 1, 9],
+        [2, 8, 5, 4],
+    ]
+    df = pd.DataFrame(data, columns=columns)
+
+    force_monotonic(df)
+
+    # Property verification
+    for stat in df.columns.levels[1]:
+        sub_df = df.loc[:, (slice(None), stat)]
+        assert (sub_df.diff(axis=1).fillna(0) >= 0).all().all()
+
+    # Comparison with expected dataframe
+    expected = pd.DataFrame([[1, 5, 2, 7], [3, 2, 3, 9], [2, 8, 5, 8]], columns=columns)
+    pd.testing.assert_frame_equal(df, expected, check_exact=True)
+
+
+def test_compute_rel_diff_basic():
+    """
+    Test that the function is giving the expected values with basic numbers
+    """
+    df1 = pd.DataFrame([[1.0, 3.0], [2.0, 4.0]], columns=["A", "B"])
+    df2 = pd.DataFrame([[1.0, 3.0], [1.0, 5.0]], columns=["A", "B"])
+
+    result = compute_rel_diff_dataframe(df1, df2)
+    expected = pd.DataFrame([[0.0, 0.0], [1.0 / 3.0, 0.2]], columns=["A", "B"])
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+def test_compute_rel_diff_with_negatives():
+    """
+    Test that the function is giving the expected values also with negative numbers
+    """
+    df1 = pd.DataFrame([[-1.0, 3.0], [-2.0, -4.0]], columns=["A", "B"])
+    df2 = pd.DataFrame([[-2.0, 3.0], [-1.0, -5.0]], columns=["A", "B"])
+
+    result = compute_rel_diff_dataframe(df1, df2)
+    expected = pd.DataFrame([[0.5, 0.0], [1.0 / 3.0, 0.2]], columns=["A", "B"])
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+def test_compute_rel_diff_with_zeros():
+    """
+    Test that the function is giving the expected values also with zeros in numerator
+    """
+    df1 = pd.DataFrame([[0.0, 0.0], [0.0, 0.0]], columns=["A", "B"])
+    df2 = pd.DataFrame([[1.0, 2.0], [-1.0, -2.0]], columns=["A", "B"])
+
+    result = compute_rel_diff_dataframe(df1, df2)
+    expected = pd.DataFrame([[1.0, 2.0], [1.0, 2.0]], columns=["A", "B"])
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+def test_compute_rel_diff_identical():
+    """
+    Test that the function is giving the expected values aift dataframe are identical
+    """
+    df1 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], columns=["A", "B"])
+    df2 = df1.copy()
+
+    result = compute_rel_diff_dataframe(df1, df2)
+
+    assert (result == 0).all().all()
+
+
+def test_compute_division_basic():
+    """
+    Test that the function is giving the expected values with basic numbers
+    """
+    df1 = pd.DataFrame([[10.0, 20.0], [30.0, 40.0]], columns=["A", "B"])
+    df2 = pd.DataFrame([[2.0, 4.0], [5.0, 10.0]], columns=["A", "B"])
+
+    result = compute_division(df1, df2)
+    expected = pd.DataFrame([[5.0, 5.0], [6.0, 4.0]], columns=["A", "B"])
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+def test_compute_division_with_zero_in_denominator():
+    """
+    Test that the function is giving the expected values also with zeros in denominator
+    """
+    df1 = pd.DataFrame([[10.0, 20.0], [30.0, 40.0]], columns=["A", "B"])
+    df2 = pd.DataFrame([[0.0, 4.0], [5.0, 0.0]], columns=["A", "B"])
+
+    result = compute_division(df1, df2)
+    expected = pd.DataFrame([[np.nan, 5.0], [6.0, np.nan]], columns=["A", "B"])
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+def test_division_with_zero_in_numerator():
+    """
+    Test that the function is giving the expected values also with zeros in numerator
+    """
+    df1 = pd.DataFrame([[0.0, 20.0], [0.0, 40.0]], columns=["A", "B"])
+    df2 = pd.DataFrame([[2.0, 4.0], [5.0, 10.0]], columns=["A", "B"])
+
+    result = compute_division(df1, df2)
+    expected = pd.DataFrame([[0.0, 5.0], [0.0, 4.0]], columns=["A", "B"])
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+def test_division_both_zero():
+    """
+    Check the function in case all values are equal to zero
+    """
+    df1 = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, 0.0]})
+    df2 = pd.DataFrame({"A": [0.0, 1.0], "B": [2.0, 0.0]})
+
+    result = compute_division(df1, df2)
+    expected = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, 0.0]})
+
+    pd.testing.assert_frame_equal(result, expected, check_exact=False)
+
+
+# Creation of a temporary file for function test
+@pytest.fixture(name="sample_csv", scope="function")
+def fixture_sample_csv(tmp_path):
+    csv_content = """col1,col2,3,3,2,2
+sub1,sub2,A,B,A,B
+a,b,1,3,5,7
+d,e,2,4,6,8
+"""
+    file_path = tmp_path / "multi.csv"
+    file_path.write_text(csv_content)
+    return file_path
+
+
+def test_parse_probtest_csv(sample_csv):
+    """
+    Check that the first multiindex of the rows is reversed because
+    it is not in ascending order
+    """
+    df = parse_probtest_csv(sample_csv, index_col=[0, 1])
+
+    expected = pd.DataFrame(
+        {
+            (2, "A"): [5, 6],
+            (2, "B"): [7, 8],
+            (3, "A"): [1, 2],
+            (3, "B"): [3, 4],
+        },
+        index=pd.MultiIndex.from_tuples(
+            [("a", "b"), ("d", "e")],
+            names=["col1", "col2"],
+        ),
+    )
+    expected.index.names = df.index.names
+    expected.columns.names = df.columns.names
+
+    pd.testing.assert_frame_equal(df, expected)
+
+
+# Create a dataframe to test unify_time_index
+@pytest.fixture(name="sample_unify_time", scope="module")
+def fixture_sample_unify_time():
+    features = ["A", "B"]
+    times = [6, 4, 2]
+
+    multi_index = pd.MultiIndex.from_product(
+        [features, times], names=["feature", "time"]
+    )
+
+    data1 = [
+        [1, 2, 3, 4, 5, 6],
+        [7, 8, 9, 10, 11, 12],
+        [13, 14, 15, 16, 17, 18],
+        [19, 20, 21, 22, 23, 24],
+        [25, 26, 27, 28, 29, 30],
+    ]
+    df1 = pd.DataFrame(data1, columns=multi_index)
+
+    data2 = [
+        [101, 102, 103, 104, 105, 106],
+        [107, 108, 109, 110, 111, 112],
+        [113, 114, 115, 116, 117, 118],
+    ]
+    df2 = pd.DataFrame(data2, columns=multi_index)
+
+    fid_dfs = [df1, df2]
+    return fid_dfs
+
+
+def test_unify_time_index(sample_unify_time):
+    """
+    Test that the function unify column index and put it in
+    ascending order and starting from 0
+    """
+    result_dfs = unify_time_index(sample_unify_time)
+
+    features = ["A", "B"]
+    times = [0, 1, 2]  # same as before but in ascending order and starting from 0
+
+    multi_index = pd.MultiIndex.from_product(
+        [features, times], names=["feature", "time"]
+    )
+
+    data1 = [
+        [3, 2, 1, 6, 5, 4],
+        [9, 8, 7, 12, 11, 10],
+        [15, 14, 13, 18, 17, 16],
+        [21, 20, 19, 24, 23, 22],
+        [27, 26, 25, 30, 29, 28],
+    ]
+    df1 = pd.DataFrame(data1, columns=multi_index)
+
+    data2 = [
+        [103, 102, 101, 106, 105, 104],
+        [109, 108, 107, 112, 111, 110],
+        [115, 114, 113, 118, 117, 116],
+    ]
+    df2 = pd.DataFrame(data2, columns=multi_index)
+
+    expected = [df1, df2]
+
+    for res, exp in zip(result_dfs, expected):
+        pd.testing.assert_frame_equal(res, exp)
diff --git a/tests/util/test_xarray_ops.py b/tests/util/test_xarray_ops.py
@@ -0,0 +1,103 @@
+"""
+This module contains unit tests for the "xarray_ops.py" module
+"""
+
+import sys
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from util.xarray_ops import statistics_over_horizontal_dim
+
+
+@pytest.fixture(name="sample_data", scope="module")
+def fixture_sample_data():
+    # Create easy-to-use file for testing
+    data = np.array([[1, 2, 3], [4, 5, 6]], dtype=float)
+    da = xr.DataArray(
+        data,
+        dims=("x", "y"),
+        coords={"x": [10, 20], "y": ["a", "b", "c"]},
+        name="test_var",
+    )
+    return da
+
+
+ALL_STATS = ["mean", "max", "min", "sum", "std", "var", "median", "prod"]
+ALL_DIMS = ["x", "y", "x:y"]
+
+
+@pytest.mark.parametrize("stat", ALL_STATS)
+@pytest.mark.parametrize("dims", ALL_DIMS)
+def test_statistics_against_manual(sample_data, stat, dims):
+    result = statistics_over_horizontal_dim(sample_data, [dims], [stat])
+    values = [r.values for r in result]
+    numbers = values[0].tolist()
+
+    data = sample_data
+
+    expected_values = {
+        "mean": {"x": [2.5, 3.5, 4.5], "y": [2, 5], "x:y": [3.5]},
+        "max": {"x": [4, 5, 6], "y": [3, 6], "x:y": [6]},
+        "min": {"x": [1, 2, 3], "y": [1, 4], "x:y": [1]},
+        "sum": {"x": [5, 7, 9], "y": [6, 15], "x:y": [21]},
+        "std": {
+            "x": [1.5, 1.5, 1.5],
+            "y": [0.816496580927726, 0.816496580927726],
+            "x:y": [np.std(data)],
+        },
+        "var": {
+            "x": [2.25, 2.25, 2.25],
+            "y": [0.6666666666666666, 0.6666666666666666],
+            "x:y": [np.var(data)],
+        },
+        "median": {"x": [2.5, 3.5, 4.5], "y": [2, 5], "x:y": [3.5]},
+        "prod": {"x": [4, 10, 18], "y": [6, 120], "x:y": [720]},
+    }
+
+    expected = expected_values[stat][dims]
+
+    np.testing.assert_allclose(numbers, expected)
+
+
+def test_statistics_with_fill(sample_data):
+    """
+    Test that the fill value mask is applied correctly
+    """
+    # Add a fake _FillValue attribute and inject fill values
+    data = sample_data.copy()
+    data.attrs["_FillValue"] = -999
+    data = data.where(~np.isnan(data), other=-999)
+
+    result = statistics_over_horizontal_dim(
+        data, [data.dims[0]], ["mean", "sum"], fill_value_key="_FillValue"
+    )
+
+    expected_mean = data.where(data != -999).mean(dim=data.dims[0], skipna=True)
+    expected_sum = data.where(data != -999).sum(dim=data.dims[0], skipna=True)
+
+    assert np.allclose(result[0], expected_mean)
+    assert np.allclose(result[1], expected_sum)
+
+
+def test_no_matching_dimension(sample_data, monkeypatch):
+    """
+    Test that sys.exit(1) is called in case the dimension name doesn't exist
+    """
+
+    monkeypatch.setattr(
+        sys, "exit", lambda code: (_ for _ in ()).throw(SystemExit(code))
+    )
+    with pytest.raises(SystemExit):
+        statistics_over_horizontal_dim(sample_data, ["nonexistent_dim"], ["mean"])
+
+
+def test_invalid_statistic(sample_data):
+    """
+    Test that statistics that does not exist can not be computed
+    """
+
+    dim = sample_data.dims[0]
+    with pytest.raises(AttributeError):
+        statistics_over_horizontal_dim(sample_data, [dim], ["not_a_stat"])