unumpy.average: init

doronbehar · doronbehar · commit 31141770abb6 · 2024-10-25T10:02:46.000+03:00
Ref: #38
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -8,6 +8,7 @@ Fixes:
 
 - fix `readthedocs` configuration so that the build passes (#254)
 - Add `unumpy.covariance_matrix`: A vectorized variant of the pure Python function `covariance_matrix` (#265)
+- Add `unumpy.average` to calculate uncertainties aware average (#264)
 
 3.2.2   2024-July-08
 -----------------------
diff --git a/doc/numpy_guide.rst b/doc/numpy_guide.rst
@@ -144,6 +144,29 @@ functions is available in the documentation for  :mod:`uncertainties.umath`.
 .. index::
    pair: testing and operations (in arrays); NaN
 
+Uncertainties aware average
+---------------------------
+
+If you have measured a certain value multiple times, with a different
+uncertainty every measurement. Averaging over the results in a manner aware of
+the different uncertainties, is not trivial. The function ``unumpy.average()``
+does that:
+
+>>> measurements = numpy.array([2.1, 2.0, 2.05, 2.08, 2.02])
+>>> stds = numpy.array([0.05, 0.03, 0.04, 0.06, 0.05])
+>>> arr = unumpy.uarray(measurements, stds)
+>>> unumpy.average(arr)
+2.03606+/-0.019
+
+Note how that function gives a value different from numpy's ``mean`` function:
+
+>>> numpy.mean(arr)
+2.050+/-0.019
+
+If you have an array with correlated values, the covariances will be considered
+as well. You can also specify an ``axes`` argument, to specify a certain axis
+or a tuple of axes upon which to average the result.
+
 NaN testing and NaN-aware operations
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/tests/test_unumpy.py b/tests/test_unumpy.py
@@ -1,3 +1,5 @@
+import pytest
+
 try:
     import numpy
 except ImportError:
@@ -300,3 +302,70 @@ def test_array_comparisons():
     # For matrices, 1D arrays are converted to 2D arrays:
     mat = unumpy.umatrix([1, 2], [1, 4])
     assert numpy.all((mat == [mat[0, 0], 4]) == [True, False])
+
+
+class TestAverage:
+    arr1d = unumpy.uarray(
+        [2.1, 2.0, 2.05, 2.08, 2.02],
+        [0.05, 0.03, 0.04, 0.06, 0.05],
+    )
+
+    def __init__(self):
+        sigma2d = 0.3
+        means2d = np.linspace(4, 20, num=50).reshape(5, 10)
+        self.arr2d = unumpy.uarray(
+            np.random.normal(loc=means, scale=sigma2d),
+            np.random.uniform(low=0, high=sigma2d),
+        )
+        meansNd = np.random.rand(4, 7, 5, 2, 10, 14) + 10
+        self.arrNd = unumpy.uarray(
+            meansNd, np.random.uniform(low=0, high=0.2, size=meansNd.shape)
+        )
+
+    def test_average_type_check():
+        with pytest.raises(ValueError):
+            unumpy.average(numpy.array(["bla"]))
+
+    def test_average_example():
+        """Tests the example from the docs."""
+        avg = unumpy.average(self.arr1d)
+        assert np.isclose(avg.n, 2.0360612043435338)
+        assert np.isclose(avg.s, 0.018851526708200846)
+
+    @pytest.mark.parametrize("invalid_axis", [1, 2, 3])
+    def test_average1d_invalid_axes(invalid_axis):
+        with pytest.raises(ValueError):
+            unumpy.average(self.arr1d, axes=invalid_axis)
+
+    @pytest.mark.parametrize("invalid_axis", [2, 3])
+    def test_average2d_invalid_axes(invalid_axis):
+        with pytest.raises(ValueError):
+            unumpy.average(self.arr1d, axes=invalid_axis)
+
+    @pytest.mark.parametrize(
+        "expected_shape, axis_argument",
+        [
+            ((), None),
+            # According to the linspace reshape in __init__
+            ((5,), 1),
+            ((5,), (1,)),
+            ((10,), 0),
+            ((10,), (0,)),
+        ],
+    )
+    def test_average2d_shape(expected_shape, axis_argument):
+        assert unumpy.average(self.arr2d, axes=axis_argument).shape == expected_shape
+
+    @pytest.mark.parametrize(
+        "expected_shape, axis_argument",
+        [
+            ((), None),
+            # According to random.rand() argument in __init__
+            ((4, 5, 10), (1, 3, 5)),
+            ((10,), (0, 1, 2, 3, 4, 6)),
+            ((14,), (0, 1, 2, 3, 4, 5)),
+            ((7, 2), (0, 2, 4, 5, 6)),
+        ],
+    )
+    def test_averageNd_shape(expected_shape, axis_argument):
+        assert unumpy.average(self.arrNd, axes=axis_argument).shape == expected_shape
diff --git a/uncertainties/unumpy/core.py b/uncertainties/unumpy/core.py
@@ -31,10 +31,79 @@
     "nominal_values",
     "covariance_matrix",
     "std_devs",
+    "average",
     # Classes:
     "matrix",
 ]
 
+
+def _average(arr):
+    """
+    The real implementation of average, for 1D arrays.
+    """
+    assert arr.ndim == 1
+    cov_matrix = covariance_matrix(arr)
+    weights = numpy.diagonal(cov_matrix) ** -1
+    weights_sum = weights.sum()
+    return uarray(
+        (nominal_values(arr) * weights).sum() / weights_sum,
+        numpy.sqrt(
+            numpy.einsum(
+                "i,ij,j",
+                weights,
+                cov_matrix,
+                weights,
+            )
+        ) / weights_sum,
+    )
+
+def average(arr, axes=None):
+    """
+    Return a weighted averaged along with a weighted mean over a certain axis
+    or a axes. The formulas implemented by this are:
+
+    $$ \\mu = \frac{\\sum_i (x_i/\\sigma_i^2)}{\\sum_i \\sigma_i^{-2}}$$
+
+    $$\\sigma_\\mu = \\frac{\\sqrt{\\sum_{i,j} \\sigma_i^{-2} \\sigma_j^{-2} \\cdot Cov(x_i, x_j)}}{\\sum_i \\sigma_i^{-2}}$$
+
+    Where of course $Cov(x_i, x_i) = \\sigma_i^2$.
+
+    By default, (when ``axes=None``), it flattens the given array first and
+    then applies the above equations. When axes is a list or tuple, it applies
+    the same formula over each axis in a loop, and hence correlations between
+    values in different axes are not taken into account.
+    """
+    # NOTE regarding the above implementation disclaimer: Ideally we could have
+    # taken the (2N)-D shaped covariance_matrix(arr) and worked with that, but
+    # it is not very clear how to do that truely correctly.
+    arr = np.asanyarray(arr)
+    if axes is None:
+        axes = [0]
+        arr = arr.flatten()
+    # The following sanity checks on axes similar to what Numpy 2.x's
+    # lib.array_utils.normalize_axis_tuple do.
+    elif not isinstance(axes, [tuple, list]):
+        axes = [operator.index(axes)]
+    else:
+        axes = tuple(axes)
+    if len(set(axes)) != len(axes):
+        raise ValueError("repeated axis found in axes argument")
+    # This one is not checked by np.lib.array_utils.normalize_axis_tuple
+    if max(axes) >= arr.ndim:
+        raise ValueError(
+            f"Cannot average over an inexistent axis {max(axes)} >= arr.ndim = "
+            f"{arr.ndim}"
+        )
+    if not isinstance(arr.flat[0], core.Variable):
+        raise ValueError(
+            "unumpy.average is meant to operate upon numpy arrays of ufloats, "
+            "not pure numpy arrays"
+        )
+    for axis in sorted(axes, reverse=True):
+        arr = numpy.apply_over_axis(_average, axis, arr)
+    return arr
+
+
 ###############################################################################
 # Utilities: