1
1
from typing import List , Dict
2
2
from typeguard import check_argument_types
3
+ import numpy as np
3
4
from neuralmonkey .evaluators .evaluator import Evaluator
4
5
5
6
# pylint: disable=invalid-name
@@ -25,7 +26,6 @@ def __init__(self,
25
26
super ().__init__ (name )
26
27
27
28
self .n = n
28
- self .max_ord = n
29
29
self .beta_2 = beta ** 2
30
30
31
31
self .ignored = [] # type: List[str]
@@ -37,11 +37,11 @@ def score_instance(self,
37
37
reference : List [str ]) -> float :
38
38
hyp_joined = " " .join (hypothesis )
39
39
hyp_chars = [x for x in list (hyp_joined ) if x not in self .ignored ]
40
- hyp_ngrams = self . _get_ngrams (hyp_chars , self .n )
40
+ hyp_ngrams = _get_ngrams (hyp_chars , self .n )
41
41
42
42
ref_joined = " " .join (reference )
43
43
ref_chars = [x for x in list (ref_joined ) if x not in self .ignored ]
44
- ref_ngrams = self . _get_ngrams (ref_chars , self .n )
44
+ ref_ngrams = _get_ngrams (ref_chars , self .n )
45
45
46
46
if not hyp_chars or not ref_chars :
47
47
if "" .join (hyp_chars ) == "" .join (ref_chars ):
@@ -58,48 +58,43 @@ def score_instance(self,
58
58
/ ((self .beta_2 * precision ) + recall ))
59
59
60
60
def chr_r (self , hyp_ngrams : NGramDicts , ref_ngrams : NGramDicts ) -> float :
61
- recall = 0.0
61
+ count_all = np .zeros (self .n )
62
+ count_matched = np .zeros (self .n )
62
63
for m in range (1 , self .n + 1 ):
63
- count_all = 0
64
- count_matched = 0
65
64
for ngr in ref_ngrams [m - 1 ]:
66
65
ref_count = ref_ngrams [m - 1 ][ngr ]
67
- count_all += ref_count
66
+ count_all [ m - 1 ] += ref_count
68
67
if ngr in hyp_ngrams [m - 1 ]:
69
- count_matched += min ( ref_count , hyp_ngrams [m - 1 ][ ngr ])
70
- # Catch division by zero
71
- if count_all != 0.0 :
72
- recall += count_matched / count_all
73
- return recall / float ( self . max_ord )
68
+ count_matched [m - 1 ] += min (
69
+ ref_count , hyp_ngrams [ m - 1 ][ ngr ])
70
+ return np . mean ( np . divide (
71
+ count_matched , count_all , out = np . ones_like ( count_all ),
72
+ where = ( count_all != 0 )) )
74
73
75
74
def chr_p (self , hyp_ngrams : NGramDicts , ref_ngrams : NGramDicts ) -> float :
76
- precision = 0.0
75
+ count_all = np .zeros (self .n )
76
+ count_matched = np .zeros (self .n )
77
77
for m in range (1 , self .n + 1 ):
78
- count_all = 0
79
- count_matched = 0
80
78
for ngr in hyp_ngrams [m - 1 ]:
81
79
hyp_count = hyp_ngrams [m - 1 ][ngr ]
82
- count_all += hyp_count
80
+ count_all [ m - 1 ] += hyp_count
83
81
if ngr in ref_ngrams [m - 1 ]:
84
- count_matched += min (hyp_count , ref_ngrams [m - 1 ][ngr ])
85
- # Catch division by zero
86
- if count_all != 0.0 :
87
- precision += count_matched / count_all
88
-
89
- return precision / float (self .max_ord )
90
-
91
- def _get_ngrams (self , tokens : List [str ], n : int ) -> NGramDicts :
92
- if len (tokens ) < n :
93
- self .max_ord = len (tokens )
94
-
95
- ngr_dicts = []
96
- for m in range (1 , n + 1 ):
97
- ngr_dict = {} # type: Dict[str, int]
98
- for i in range (m , len (tokens )):
99
- ngr = "" .join (tokens [i - m :i ])
100
- ngr_dict [ngr ] = ngr_dict .setdefault (ngr , 0 ) + 1
101
- ngr_dicts .append (ngr_dict )
102
- return ngr_dicts
82
+ count_matched [m - 1 ] += min (
83
+ hyp_count , ref_ngrams [m - 1 ][ngr ])
84
+ return np .mean (np .divide (
85
+ count_matched , count_all , out = np .ones_like (count_all ),
86
+ where = (count_all != 0 )))
87
+
88
+
89
+ def _get_ngrams (tokens : List [str ], n : int ) -> NGramDicts :
90
+ ngr_dicts = []
91
+ for m in range (1 , n + 1 ):
92
+ ngr_dict = {} # type: Dict[str, int]
93
+ for i in range (m , len (tokens ) + 1 ):
94
+ ngr = "" .join (tokens [i - m :i ])
95
+ ngr_dict [ngr ] = ngr_dict .setdefault (ngr , 0 ) + 1
96
+ ngr_dicts .append (ngr_dict )
97
+ return ngr_dicts
103
98
104
99
105
100
# pylint: disable=invalid-name
0 commit comments