FIX Fix adjusted_mutual_info_score numerical issue (scikit-learn#31065)

glevv · lesteve · web-flow · commit 75cb7c37cb7e · 2025-04-03T15:12:53.000Z
Co-authored-by: Loïc Estève &lt;loic.esteve@ymail.com&gt;
diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/31065.fix.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/31065.fix.rst
@@ -0,0 +1,3 @@
+- Fix :func:`metrics.adjusted_mutual_info_score` numerical issue when number of
+  classes and samples is low.
+  By :user:`Hleb Levitski <glevv>`
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
@@ -1033,6 +1033,9 @@ def adjusted_mutual_info_score(
         or classes.shape[0] == clusters.shape[0] == 0
     ):
         return 1.0
+    # if there is only one class or one cluster return 0.0.
+    elif classes.shape[0] == 1 or clusters.shape[0] == 1:
+        return 0.0
 
     contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
     # Calculate the MI for the two clusterings
@@ -1051,8 +1054,13 @@ def adjusted_mutual_info_score(
         denominator = min(denominator, -np.finfo("float64").eps)
     else:
         denominator = max(denominator, np.finfo("float64").eps)
-    ami = (mi - emi) / denominator
-    return float(ami)
+    # The same applies analogously to mi and emi.
+    numerator = mi - emi
+    if numerator < 0:
+        numerator = min(numerator, -np.finfo("float64").eps)
+    else:
+        numerator = max(numerator, np.finfo("float64").eps)
+    return float(numerator / denominator)
 
 
 @validate_params(
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -40,21 +40,19 @@
 ]
 
 
-def test_error_messages_on_wrong_input():
-    for score_func in score_funcs:
-        expected = (
-            r"Found input variables with inconsistent numbers of samples: \[2, 3\]"
-        )
-        with pytest.raises(ValueError, match=expected):
-            score_func([0, 1], [1, 1, 1])
+@pytest.mark.parametrize("score_func", score_funcs)
+def test_error_messages_on_wrong_input(score_func):
+    expected = r"Found input variables with inconsistent numbers of samples: \[2, 3\]"
+    with pytest.raises(ValueError, match=expected):
+        score_func([0, 1], [1, 1, 1])
 
-        expected = r"labels_true must be 1D: shape is \(2"
-        with pytest.raises(ValueError, match=expected):
-            score_func([[0, 1], [1, 0]], [1, 1, 1])
+    expected = r"labels_true must be 1D: shape is \(2"
+    with pytest.raises(ValueError, match=expected):
+        score_func([[0, 1], [1, 0]], [1, 1, 1])
 
-        expected = r"labels_pred must be 1D: shape is \(2"
-        with pytest.raises(ValueError, match=expected):
-            score_func([0, 1, 0], [[1, 1], [0, 0]])
+    expected = r"labels_pred must be 1D: shape is \(2"
+    with pytest.raises(ValueError, match=expected):
+        score_func([0, 1, 0], [[1, 1], [0, 0]])
 
 
 def test_generalized_average():
@@ -67,39 +65,50 @@ def test_generalized_average():
     assert means[0] == means[1] == means[2] == means[3]
 
 
-def test_perfect_matches():
-    for score_func in score_funcs:
-        assert score_func([], []) == pytest.approx(1.0)
-        assert score_func([0], [1]) == pytest.approx(1.0)
-        assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
-        assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
-        assert score_func([0.0, 1.0, 0.0], [42.0, 7.0, 42.0]) == pytest.approx(1.0)
-        assert score_func([0.0, 1.0, 2.0], [42.0, 7.0, 2.0]) == pytest.approx(1.0)
-        assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
-    score_funcs_with_changing_means = [
+@pytest.mark.parametrize("score_func", score_funcs)
+def test_perfect_matches(score_func):
+    assert score_func([], []) == pytest.approx(1.0)
+    assert score_func([0], [1]) == pytest.approx(1.0)
+    assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
+    assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
+    assert score_func([0.0, 1.0, 0.0], [42.0, 7.0, 42.0]) == pytest.approx(1.0)
+    assert score_func([0.0, 1.0, 2.0], [42.0, 7.0, 2.0]) == pytest.approx(1.0)
+    assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
+
+
+@pytest.mark.parametrize(
+    "score_func",
+    [
         normalized_mutual_info_score,
         adjusted_mutual_info_score,
-    ]
-    means = {"min", "geometric", "arithmetic", "max"}
-    for score_func in score_funcs_with_changing_means:
-        for mean in means:
-            assert score_func([], [], average_method=mean) == pytest.approx(1.0)
-            assert score_func([0], [1], average_method=mean) == pytest.approx(1.0)
-            assert score_func(
-                [0, 0, 0], [0, 0, 0], average_method=mean
-            ) == pytest.approx(1.0)
-            assert score_func(
-                [0, 1, 0], [42, 7, 42], average_method=mean
-            ) == pytest.approx(1.0)
-            assert score_func(
-                [0.0, 1.0, 0.0], [42.0, 7.0, 42.0], average_method=mean
-            ) == pytest.approx(1.0)
-            assert score_func(
-                [0.0, 1.0, 2.0], [42.0, 7.0, 2.0], average_method=mean
-            ) == pytest.approx(1.0)
-            assert score_func(
-                [0, 1, 2], [42, 7, 2], average_method=mean
-            ) == pytest.approx(1.0)
+    ],
+)
+@pytest.mark.parametrize("average_method", ["min", "geometric", "arithmetic", "max"])
+def test_perfect_matches_with_changing_means(score_func, average_method):
+    assert score_func([], [], average_method=average_method) == pytest.approx(1.0)
+    assert score_func([0], [1], average_method=average_method) == pytest.approx(1.0)
+    assert score_func(
+        [0, 0, 0], [0, 0, 0], average_method=average_method
+    ) == pytest.approx(1.0)
+    assert score_func(
+        [0, 1, 0], [42, 7, 42], average_method=average_method
+    ) == pytest.approx(1.0)
+    assert score_func(
+        [0.0, 1.0, 0.0], [42.0, 7.0, 42.0], average_method=average_method
+    ) == pytest.approx(1.0)
+    assert score_func(
+        [0.0, 1.0, 2.0], [42.0, 7.0, 2.0], average_method=average_method
+    ) == pytest.approx(1.0)
+    assert score_func(
+        [0, 1, 2], [42, 7, 2], average_method=average_method
+    ) == pytest.approx(1.0)
+    # Non-regression tests for: https://github.com/scikit-learn/scikit-learn/issues/30950
+    assert score_func([0, 1], [0, 1], average_method=average_method) == pytest.approx(
+        1.0
+    )
+    assert score_func(
+        [0, 1, 2, 3], [0, 1, 2, 3], average_method=average_method
+    ) == pytest.approx(1.0)
 
 
 def test_homogeneous_but_not_complete_labeling():
@@ -306,12 +315,13 @@ def test_exactly_zero_info_score():
         labels_a, labels_b = (np.ones(i, dtype=int), np.arange(i, dtype=int))
         assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
         assert v_measure_score(labels_a, labels_b) == pytest.approx(0.0)
-        assert adjusted_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
+        assert adjusted_mutual_info_score(labels_a, labels_b) == 0.0
         assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
         for method in ["min", "geometric", "arithmetic", "max"]:
-            assert adjusted_mutual_info_score(
-                labels_a, labels_b, average_method=method
-            ) == pytest.approx(0.0)
+            assert (
+                adjusted_mutual_info_score(labels_a, labels_b, average_method=method)
+                == 0.0
+            )
             assert normalized_mutual_info_score(
                 labels_a, labels_b, average_method=method
             ) == pytest.approx(0.0)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+- Fix :func:`metrics.adjusted_mutual_info_score` numerical issue when number of
	`2`	`+ classes and samples is low.`
	`3`	+ By :user:`Hleb Levitski <glevv>`