@@ -91,7 +91,8 @@ def __init__(self, summary_file_exist=True, summary=None, reference=None,
9191 self .summary_file_exist = summary_file_exist
9292 self .delete_xml = delete_xml
9393 self .xml_dir = xml_dir
94- # evaluation parameter
94+ # evaluation parameter - you can check details of below in ROUGE
95+ # directory pythonrouge/RELEASE-1.5.5/README.txt
9596 self .n_gram = n_gram
9697 self .ROUGE_SU4 = ROUGE_SU4
9798 self .ROUGE_L = ROUGE_L
@@ -112,15 +113,24 @@ def __init__(self, summary_file_exist=True, summary=None, reference=None,
112113 # evaluation outputs
113114 self .recall_only = recall_only
114115 self .f_measure_only = f_measure_only
116+ # check size of system/reference summary length
115117 if not summary_file_exist and len (self .summary ) != len (self .reference ):
116118 assert ('size of summary and refernece is different.' )
119+
120+ # check output ROUGE types
117121 if self .recall_only and self .f_measure_only :
118122 assert ("choose True in recall_only or f_measure_only,\
119123 or set both as 'False'" )
124+
125+ # check n-gram of ROUGE
120126 if self .n_gram == 0 :
121127 assert 'n-gram should not be less than 1.'
128+
129+ # check the length of lenght limit
122130 if self .length_limit and self .length == 0 :
123131 assert 'Length limit should not be less than 1.'
132+
133+ # check scoreing formula: best/average
124134 if self .scoring_formula != 'best' or self .scoring_formula == 'average' :
125135 assert 'Choose scoreing formula "average" or "best"'
126136
@@ -129,28 +139,37 @@ def make_xml(self):
129139 temp_dir = mkdtemp ()
130140 else :
131141 temp_dir = mkdtemp (dir = self .xml_dir )
132- # save input lists in temp_dir
142+
143+ # save summaries in temp_dir
133144 if not self .summary_file_exist :
134145 self .peer_path = os .path .join (temp_dir , 'system' )
135146 self .model_path = os .path .join (temp_dir , 'reference' )
136147 os .mkdir (self .peer_path )
137148 os .mkdir (self .model_path )
149+
150+ # save system summaries in temp_dir
138151 for i , doc in enumerate (self .summary ):
139152 path = os .path .join (self .peer_path , '{}.txt' .format (i ))
140153 with open (path , 'w' ) as f :
141154 for sent in doc :
142155 f .write ('{}\n ' .format (sent ))
156+
157+ # save reference summaries in temp_dir
143158 for j , ref in enumerate (self .reference ):
144159 for k , doc in enumerate (ref ):
145160 path = os .path .join (self .model_path ,
146161 '{}_{}.txt' .format (j , k ))
147162 with open (path , 'w' ) as f :
148163 for sent in doc :
149164 f .write ("{}\n " .format (sent ))
165+
166+ # set xml setting file path
150167 if self .delete_xml :
151168 xml_path = os .path .join (temp_dir , 'setting.xml' )
152169 else :
153170 xml_path = 'setting.xml'
171+
172+ # write system/summary path to xml
154173 xml = open ('{}' .format (xml_path ), 'w' )
155174 xml .write ('<ROUGE-EVAL version="1.0">\n ' )
156175 for n , peer in enumerate (glob ("{}/*" .format (self .peer_path ))):
@@ -178,89 +197,130 @@ def set_command(self):
178197 self .make_xml ()
179198 rouge_cmd = ['perl' , self .ROUGE_path , "-e" , self .data_path , "-a" ]
180199 rouge_cmd += '-n {}' .format (self .n_gram ).split ()
200+ # ROUGE-SU4
181201 if self .ROUGE_SU4 :
182202 rouge_cmd += '-2 4 -u' .split ()
203+
204+ # ROUGE-L
183205 if not self .ROUGE_L :
184206 rouge_cmd .append ('-x' )
207+
208+ # ROUGE-W
185209 if self .ROUGE_W :
186210 rouge_cmd .append ('-w' )
187211 rouge_cmd .append (str (self .W_Weight ))
212+
213+ # set length limit
188214 if self .length_limit :
215+ # word level length limit
189216 if self .word_level :
190217 rouge_cmd += '-l {}' .format (self .length ).split ()
218+
219+ # bytes level length limit
191220 else :
192221 rouge_cmd += '-b {}' .format (self .length ).split ()
222+
223+ # stemming
193224 if self .stemming :
194225 rouge_cmd .append ('-m' )
226+
227+ # stopwords
195228 if self .stopwords :
196229 rouge_cmd .append ('-s' )
230+
231+ # confidence interval
197232 if self .use_cf :
198233 rouge_cmd += '-c {}' .format (self .cf ).split ()
234+
235+ # scoring based on averaging scores
199236 if self .scoring_formula == 'average' :
200237 rouge_cmd += '-f A' .split ()
238+
239+ # scoring based on best scores
201240 elif self .scoring_formula :
202241 rouge_cmd += '-f B' .split ()
242+
243+ # the number of sampling point in bootstrap resampling
203244 if self .resampling :
204245 rouge_cmd += '-r {}' .format (self .samples ).split ()
246+
247+ # relative importance of recall and precision ROUGE scores
205248 if self .favor :
206249 rouge_cmd += '-p {}' .format (self .p ).split ()
250+
207251 rouge_cmd .append (self .setting_file )
208252 return rouge_cmd
209253
210254 def parse_output (self , lines ):
211255 result = dict ()
212256 n = 1
213257 for l in lines :
258+ # find ROUGE-N
214259 r_match = findall ('A ROUGE-{} Average_R: ([0-9.]+)' .format (n ), l )
215260 f_match = findall ('A ROUGE-{} Average_F: ([0-9.]+)' .format (n ), l )
261+
216262 # ROUGE-N recall
217263 if self .recall_only and r_match :
218264 result ['ROUGE-{}' .format (n )] = float (r_match [0 ])
219265 elif r_match and not self .f_measure_only :
220266 result ['ROUGE-{}-R' .format (n )] = float (r_match [0 ])
267+
221268 # ROUGE-N F-measure
222269 if self .f_measure_only and f_match :
223270 result ['ROUGE-{}' .format (n )] = float (f_match [0 ])
224271 elif f_match and not self .recall_only :
225272 result ['ROUGE-{}-F' .format (n )] = float (f_match [0 ])
273+
226274 # count up ROUGE-N
227275 if f_match :
228276 n += 1
277+
278+ # find ROUGE-SU4
229279 su_r_match = findall ('A ROUGE-SU4 Average_R: ([0-9.]+)' , l )
230280 su_f_match = findall ('A ROUGE-SU4 Average_F: ([0-9.]+)' , l )
281+
231282 # ROUGE-SU4 Recall
232283 if self .recall_only and su_r_match :
233284 result ['ROUGE-SU4' ] = float (su_r_match [0 ])
234285 elif su_r_match and not self .f_measure_only :
235286 result ['ROUGE-SU4-R' ] = float (su_r_match [0 ])
287+
236288 # ROUGE-SU4 F-measure
237289 if self .f_measure_only and su_f_match :
238290 result ['ROUGE-SU4' ] = float (su_f_match [0 ])
239291 elif su_f_match and not self .recall_only :
240292 result ['ROUGE-SU4-F' ] = float (su_f_match [0 ])
293+
294+ # find ROUGE-L
241295 l_r_match = findall ('A ROUGE-L Average_R: ([0-9.]+)' , l )
242296 l_f_match = findall ('A ROUGE-L Average_F: ([0-9.]+)' , l )
297+
243298 # ROUGE-L Recall
244299 if self .recall_only and l_r_match :
245300 result ['ROUGE-L' ] = float (l_r_match [0 ])
246301 elif l_r_match and not self .f_measure_only :
247302 result ['ROUGE-L-R' ] = float (l_r_match [0 ])
303+
248304 # ROUGE-L F-measure
249305 if self .f_measure_only and l_f_match :
250306 result ['ROUGE-L' ] = float (l_f_match [0 ])
251307 elif l_f_match and not self .recall_only :
252308 result ['ROUGE-L-F' ] = float (l_f_match [0 ])
309+
310+ # find ROUGE-W
253311 w_r_match = findall (
254312 'A ROUGE-W-{} Average_R: ([0-9.]+)' .format (self .W_Weight ), l )
255313 w_f_match = findall (
256314 'A ROUGE-W-{} Average_F: ([0-9.]+)' .format (self .W_Weight ), l )
315+
257316 # ROUGE-W recall
258317 if self .recall_only and w_r_match :
259318 result ['ROUGE-W-{}' .format (self .W_Weight )
260319 ] = float (w_r_match [0 ])
261320 elif w_r_match and not self .f_measure_only :
262321 result ['ROUGE-W-{}-R' .format (self .W_Weight )
263322 ] = float (w_r_match [0 ])
323+
264324 # ROUGE-W F-measure
265325 if self .f_measure_only and w_f_match :
266326 result ['ROUGE-W-{}' .format (self .W_Weight )
0 commit comments