1
1
from typing import List , Dict
2
2
from typeguard import check_argument_types
3
+ import numpy as np
3
4
from neuralmonkey .evaluators .evaluator import Evaluator
4
5
5
6
# pylint: disable=invalid-name
@@ -25,7 +26,6 @@ def __init__(self,
25
26
super ().__init__ (name )
26
27
27
28
self .n = n
28
- self .max_ord = n
29
29
self .beta_2 = beta ** 2
30
30
31
31
self .ignored = [] # type: List[str]
@@ -58,44 +58,39 @@ def score_instance(self,
58
58
/ ((self .beta_2 * precision ) + recall ))
59
59
60
60
def chr_r (self , hyp_ngrams : NGramDicts , ref_ngrams : NGramDicts ) -> float :
61
- recall = 0.0
61
+ count_all = np .zeros (self .n )
62
+ count_matched = np .zeros (self .n )
62
63
for m in range (1 , self .n + 1 ):
63
- count_all = 0
64
- count_matched = 0
65
64
for ngr in ref_ngrams [m - 1 ]:
66
65
ref_count = ref_ngrams [m - 1 ][ngr ]
67
- count_all += ref_count
66
+ count_all [ m - 1 ] += ref_count
68
67
if ngr in hyp_ngrams [m - 1 ]:
69
- count_matched += min ( ref_count , hyp_ngrams [m - 1 ][ ngr ])
70
- # Catch division by zero
71
- if count_all != 0.0 :
72
- recall += count_matched / count_all
73
- return recall / float ( self . max_ord )
68
+ count_matched [m - 1 ] += min (
69
+ ref_count , hyp_ngrams [ m - 1 ][ ngr ])
70
+ return np . mean ( np . divide (
71
+ count_matched , count_all , out = np . ones_like ( count_all ),
72
+ where = ( count_all != 0 )) )
74
73
75
74
def chr_p (self , hyp_ngrams : NGramDicts , ref_ngrams : NGramDicts ) -> float :
76
- precision = 0.0
75
+ count_all = np .zeros (self .n )
76
+ count_matched = np .zeros (self .n )
77
77
for m in range (1 , self .n + 1 ):
78
- count_all = 0
79
- count_matched = 0
80
78
for ngr in hyp_ngrams [m - 1 ]:
81
79
hyp_count = hyp_ngrams [m - 1 ][ngr ]
82
- count_all += hyp_count
80
+ count_all [ m - 1 ] += hyp_count
83
81
if ngr in ref_ngrams [m - 1 ]:
84
- count_matched += min (hyp_count , ref_ngrams [m - 1 ][ngr ])
85
- # Catch division by zero
86
- if count_all != 0.0 :
87
- precision += count_matched / count_all
88
-
89
- return precision / float (self .max_ord )
82
+ count_matched [m - 1 ] += min (
83
+ hyp_count , ref_ngrams [m - 1 ][ngr ])
84
+ return np .mean (np .divide (
85
+ count_matched , count_all , out = np .ones_like (count_all ),
86
+ where = (count_all != 0 )))
90
87
91
88
def _get_ngrams (self , tokens : List [str ], n : int ) -> NGramDicts :
92
- if len (tokens ) < n :
93
- self .max_ord = len (tokens )
94
-
95
89
ngr_dicts = []
96
90
for m in range (1 , n + 1 ):
97
91
ngr_dict = {} # type: Dict[str, int]
98
- for i in range (m , len (tokens )):
92
+ # if m > len(tokens), return an empty dict
93
+ for i in range (m , len (tokens ) + 1 ):
99
94
ngr = "" .join (tokens [i - m :i ])
100
95
ngr_dict [ngr ] = ngr_dict .setdefault (ngr , 0 ) + 1
101
96
ngr_dicts .append (ngr_dict )
0 commit comments