Added HitRate metric (#124)

Altair7610 · web-flow · commit 1e9844b3811c · 2024-04-23T10:46:13.000+02:00
Added HitRate metric
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Warm and cold users/items support in `cross_validate` ([#77](https://github.com/MobileTeleSystems/RecTools/pull/77))
 - [Breaking] Default value for train dataset type and params for user and item dataset types in `DSSMModel` ([#122](https://github.com/MobileTeleSystems/RecTools/pull/122))
 - [Breaking] `n_factors` and `deterministic` params to `DSSMModel` ([#122](https://github.com/MobileTeleSystems/RecTools/pull/122))
+- Hit Rate metric ([#124](https://github.com/MobileTeleSystems/RecTools/pull/124))
 
 ### Changed
 - Changed the logic of choosing random sampler for `RandomModel` and increased the sampling speed ([#120](https://github.com/MobileTeleSystems/RecTools/pull/120))
diff --git a/rectools/metrics/__init__.py b/rectools/metrics/__init__.py
@@ -33,6 +33,7 @@
 `metrics.IntraListDiversity`
 `metrics.AvgRecPopularity`
 `metrics.Serendipity`
+`metrics.HitRate`
 
 Tools
 -----
@@ -42,7 +43,7 @@
 `metrics.SparsePairwiseHammingDistanceCalculator`
 """
 
-from .classification import MCC, Accuracy, F1Beta, Precision, Recall
+from .classification import MCC, Accuracy, F1Beta, HitRate, Precision, Recall
 from .distances import (
     PairwiseDistanceCalculator,
     PairwiseHammingDistanceCalculator,
@@ -61,6 +62,7 @@
     "F1Beta",
     "Accuracy",
     "MCC",
+    "HitRate",
     "MAP",
     "NDCG",
     "MRR",
diff --git a/rectools/metrics/classification.py b/rectools/metrics/classification.py
@@ -371,6 +371,26 @@ def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog:
         return mcc
 
 
+@attr.s
+class HitRate(SimpleClassificationMetric):
+    """
+    HitRate calculates the fraction of users for which the correct answer is included in the recommendation list.
+
+    The HitRate equals to ``1 if tp > 0, otherwise 0`` where
+        - ``tp`` is the number of relevant recommendations
+          among the first ``k`` items in recommendation list.
+
+    Parameters
+    ----------
+    k : int
+        Number of items in top of recommendations list that will be used to calculate metric.
+    """
+
+    def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
+        hit_rate = (confusion_df[TP] > 0).astype(float)
+        return hit_rate
+
+
 def calc_classification_metrics(
     metrics: tp.Dict[str, tp.Union[ClassificationMetric, SimpleClassificationMetric]],
     merged: pd.DataFrame,
diff --git a/tests/metrics/test_classification.py b/tests/metrics/test_classification.py
@@ -19,7 +19,7 @@
 import pytest
 
 from rectools import Columns
-from rectools.metrics import MCC, Accuracy, F1Beta, Precision, Recall
+from rectools.metrics import MCC, Accuracy, F1Beta, HitRate, Precision, Recall
 from rectools.metrics.base import MetricAtK
 from rectools.metrics.classification import ClassificationMetric, calc_classification_metrics
 
@@ -155,3 +155,29 @@ def test_when_no_interactions(self) -> None:
             expected_metric_per_user,
         )
         assert np.isnan(self.metric.calc(RECO, EMPTY_INTERACTIONS, CATALOG))
+
+
+class TestHitRate:
+    def setup(self) -> None:
+        self.metric = HitRate(k=2)
+
+    def test_calc(self) -> None:
+
+        # tp = pd.Series([1, 1, 0, 0])
+        # tn = pd.Series([6, 8, 7, 7])
+        # fp = pd.Series([1, 1, 2, 2])
+        # fn = pd.Series([2, 0, 1, 1])
+
+        expected_metric_per_user = pd.Series(
+            [1, 1, 0, 0], index=pd.Series([1, 3, 4, 5], name=Columns.User, dtype=int), dtype=float
+        )
+        pd.testing.assert_series_equal(self.metric.calc_per_user(RECO, INTERACTIONS), expected_metric_per_user)
+        assert self.metric.calc(RECO, INTERACTIONS) == expected_metric_per_user.mean()
+
+    def test_when_no_interactions(self) -> None:
+        expected_metric_per_user = pd.Series(index=pd.Series(name=Columns.User, dtype=int), dtype=np.float64)
+        pd.testing.assert_series_equal(
+            self.metric.calc_per_user(RECO, EMPTY_INTERACTIONS),
+            expected_metric_per_user,
+        )
+        assert np.isnan(self.metric.calc(RECO, EMPTY_INTERACTIONS))
diff --git a/tests/metrics/test_scoring.py b/tests/metrics/test_scoring.py
@@ -24,6 +24,7 @@
     NDCG,
     Accuracy,
     AvgRecPopularity,
+    HitRate,
     IntraListDiversity,
     MeanInvUserFreq,
     PairwiseHammingDistanceCalculator,
@@ -72,6 +73,7 @@ def test_success(self) -> None:
             "prec@2": Precision(k=2),
             "recall@1": Recall(k=1),
             "accuracy@1": Accuracy(k=1),
+            "hitrate@1": HitRate(k=1),
             "map@1": MAP(k=1),
             "map@2": MAP(k=2),
             "ndcg@1": NDCG(k=1, log_base=3),
@@ -89,6 +91,7 @@ def test_success(self) -> None:
             "prec@2": 0.375,
             "recall@1": 0.125,
             "accuracy@1": 0.825,
+            "hitrate@1": 0.25,
             "map@1": 0.125,
             "map@2": 0.375,
             "ndcg@1": 0.25,