addressing reviews + fixing lint errors

varisd · varisd · commit 98f3b3b55f91 · 2019-01-02T17:23:16.000+01:00
diff --git a/neuralmonkey/evaluators/chrf.py b/neuralmonkey/evaluators/chrf.py
@@ -37,11 +37,11 @@ def score_instance(self,
                        reference: List[str]) -> float:
         hyp_joined = " ".join(hypothesis)
         hyp_chars = [x for x in list(hyp_joined) if x not in self.ignored]
-        hyp_ngrams = self._get_ngrams(hyp_chars, self.n)
+        hyp_ngrams = _get_ngrams(hyp_chars, self.n)
 
         ref_joined = " ".join(reference)
         ref_chars = [x for x in list(ref_joined) if x not in self.ignored]
-        ref_ngrams = self._get_ngrams(ref_chars, self.n)
+        ref_ngrams = _get_ngrams(ref_chars, self.n)
 
         if not hyp_chars or not ref_chars:
             if "".join(hyp_chars) == "".join(ref_chars):
@@ -69,7 +69,7 @@ def chr_r(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float:
                         ref_count, hyp_ngrams[m - 1][ngr])
         return np.mean(np.divide(
             count_matched, count_all, out=np.ones_like(count_all),
-            where=(count_all!=0)))
+            where=(count_all != 0)))
 
     def chr_p(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float:
         count_all = np.zeros(self.n)
@@ -83,18 +83,18 @@ def chr_p(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float:
                         hyp_count, ref_ngrams[m - 1][ngr])
         return np.mean(np.divide(
             count_matched, count_all, out=np.ones_like(count_all),
-            where=(count_all!=0)))
-
-    def _get_ngrams(self, tokens: List[str], n: int) -> NGramDicts:
-        ngr_dicts = []
-        for m in range(1, n + 1):
-            ngr_dict = {}  # type: Dict[str, int]
-            # if m > len(tokens), return an empty dict
-            for i in range(m, len(tokens) + 1):
-                ngr = "".join(tokens[i - m:i])
-                ngr_dict[ngr] = ngr_dict.setdefault(ngr, 0) + 1
-            ngr_dicts.append(ngr_dict)
-        return ngr_dicts
+            where=(count_all != 0)))
+
+
+def _get_ngrams(tokens: List[str], n: int) -> NGramDicts:
+    ngr_dicts = []
+    for m in range(1, n + 1):
+        ngr_dict = {}  # type: Dict[str, int]
+        for i in range(m, len(tokens) + 1):
+            ngr = "".join(tokens[i - m:i])
+            ngr_dict[ngr] = ngr_dict.setdefault(ngr, 0) + 1
+        ngr_dicts.append(ngr_dict)
+    return ngr_dicts
 
 
 # pylint: disable=invalid-name
diff --git a/neuralmonkey/tests/test_chrf.py b/neuralmonkey/tests/test_chrf.py
@@ -3,40 +3,21 @@
 
 import unittest
 
-from neuralmonkey.evaluators.chrf import ChrFEvaluator
+from neuralmonkey.evaluators.chrf import ChrFEvaluator, _get_ngrams
+from neuralmonkey.tests.test_bleu import DECODED, REFERENCE
 
 
-CORPUS_DECODED = [
-    "colorful thoughts furiously sleep",
-    "little piglet slept all night",
-    "working working working working working be be be be be be be",
-    "ich bin walrus",
-    "walrus for präsident"
-]
-
-CORPUS_REFERENCE = [
-    "the colorless ideas slept furiously",
-    "pooh slept all night",
-    "working class hero is something to be",
-    "I am the working class walrus",
-    "walrus for president"
-]
-
 TOKENS = ["a", "b", "a"]
 NGRAMS = [
-    {"a": 2, "b" : 1},
-    {"ab": 1, "ba" : 1},
-    {"aba" : 1},
+    {"a": 2, "b": 1},
+    {"ab": 1, "ba": 1},
+    {"aba": 1},
     {}]
-            
-
-DECODED = [d.split() for d in CORPUS_DECODED]
-REFERENCE = [r.split() for r in CORPUS_REFERENCE]
 
 FUNC = ChrFEvaluator()
 FUNC_P = FUNC.chr_p
 FUNC_R = FUNC.chr_r
-FUNC_NGRAMS = FUNC._get_ngrams
+
 
 class TestChrF(unittest.TestCase):
 
@@ -63,10 +44,11 @@ def test_chrf(self):
 
     def test_get_ngrams(self):
         tokens = ["a", "b", "a"]
-        ngrams_out = FUNC_NGRAMS(tokens, 4)
+        ngrams_out = _get_ngrams(tokens, 4)
         self.assertEqual(len(ngrams_out), 4)
         for i, _ in enumerate(NGRAMS):
             self.assertDictEqual(ngrams_out[i], NGRAMS[i])
 
+
 if __name__ == "__main__":
     unittest.main()