tagucci
diff --git a/‎README.md‎
Lines changed: 32 additions & 30 deletions b/‎README.md‎
Lines changed: 32 additions & 30 deletions
diff --git a/‎example.py‎
Lines changed: 64 additions & 46 deletions b/‎example.py‎
Lines changed: 64 additions & 46 deletions
@@ -1,7 +1,7 @@
 # pythonrouge
-This is the python script to use ROUGE, summarization evaluation toolkit.
+This is the python wrapper to use ROUGE, summarization evaluation toolkit.
 
-In this implementation, you can evaluate various types of ROUGE metrics. You can evaluate your system summaries with reference summaries right now. It's not necessary to make an xml file as in the general ROUGE package. However, you can evaluate ROUGE scores in a standard way if you saved system summaries and reference summaries in specific directories. In document summarization research, recall or F-measure of ROUGE metrics is used in most cases. So you can choose only recall or F-measure of ROUGE evaluation result for convenience.
+In this implementation, you can evaluate various types of ROUGE metrics. You can evaluate your system summaries with reference summaries right now. It's not necessary to make an xml file as in the general ROUGE package. However, you can evaluate ROUGE scores in a standard way if you saved system summaries and reference summaries in specific directories. In the document summarization research, recall or F-measure of ROUGE metrics is used in most cases. So you can choose either recall or F-measure or both of these of ROUGE evaluation result for convenience.
 
 Any feedbacks or comments are welcome.
 
@@ -10,12 +10,13 @@ You can install pythonrouge in both ways
 
 ```
 # not using pip
+git clone https://github.com/tagucci/pythonrouge.git
 python setup.py install
 
 # using pip
 pip install git+https://github.com/tagucci/pythonrouge.git
 ```
-Then, you can use pythonrouge. If you don't have ROUGE package, I recommend you clone this repository to your local, and do "python setup.py install".
+Then, you can use pythonrouge.
 
 # Usage
 
@@ -24,28 +25,28 @@ The only things you need to evaluate ROUGE score is to specify the paths of ROUG
 ```
 from pythonrouge.pythonrouge import Pythonrouge
 
-ROUGE_path = sys.argv[1] #ROUGE-1.5.5.pl
-data_path = sys.argv[2] #data folder in RELEASE-1.5.5
-
-# initialize setting of ROUGE, eval ROUGE-1, 2, SU4, L
-rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5)
-
 # system summary & reference summary
 summary = [[" Tokyo is the one of the biggest city in the world."]]
 reference = [[["The capital of Japan, Tokyo, is the center of Japanese economy."]]]
 
-# If you evaluate ROUGE by sentence list as above, set files=False
-setting_file = rouge.setting(files=False, summary=summary, reference=reference)
-
-# If you need only recall of ROUGE metrics, set recall_only=True
-result = rouge.eval_rouge(setting_file, recall_only=True, ROUGE_path=ROUGE_path, data_path=data_path)
-print(result)
+# initialize setting of ROUGE to eval ROUGE-1, 2, SU4
+# if you evaluate ROUGE by sentence list as above, set summary_file_exist=False
+# if recall_only=True, you can get recall scores of ROUGE
+rouge = Pythonrouge(summary_file_exist=False,
+                    summary=summary, reference=reference,
+                    n_gram=2, ROUGE_SU4=True, ROUGE_L=False,
+                    recall_only=True, stemming=True, stopwords=True,
+                    word_level=True, length_limit=True, length=50,
+                    use_cf=False, cf=95, scoring_formula='average',
+                    resampling=True, samples=1000, favor=True, p=0.5)
+score = rouge.calc_score()
+print(score)
 ```
 
 The output will be below. In this case, only recall metrics of ROUGE is printed.
 
 ```
-{'ROUGE-1': 0.16667, 'ROUGE-2': 0.0, 'ROUGE-L': 0.16667, 'ROUGE-SU4': 0.05}
+{'ROUGE-1': 0.16667, 'ROUGE-2': 0.0, 'ROUGE-SU4': 0.05}
 ```
 
 You can also evaluate ROUGE scripts in a standard way.
@@ -72,23 +73,19 @@ After putting system/reference files as above, you can evaluate ROUGE metrics as
 ```
 from pythonrouge.pythonrouge import Pythonrouge
 
-ROUGE_path = sys.argv[1] #ROUGE-1.5.5.pl
-data_path = sys.argv[2] #data folder in RELEASE-1.5.5
-
-# initialize setting of ROUGE, eval ROUGE-1~4, SU4
-rouge = Pythonrouge(n_gram=4, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5)
-
-# make a setting file, set files=True because you've already save files in specific directories
-setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir)
-
-# If you need only F-measure of ROUGE metrics, set f_measure_only=True
-result = rouge.eval_rouge(setting_file, ROUGE_path=ROUGE_path, data_path=data_path)
-print(result)
-> {ROUGE-1': 0.29836, 'ROUGE-2': 0.07059, 'ROUGE-3': 0.03896, ', 'ROUGE-4': 0.02899, 'ROUGE-SU4': 0.12444}
+# initialize setting of ROUGE, eval ROUGE-1, 2, SU4
+# if summary_file_exis=True, you should specify system summary(peer_path) and reference summary(model_path) paths
+rouge = Pythonrouge(summary_file_exist=True,
+                    peer_path=summary, model_path=reference,
+                    n_gram=2, ROUGE_SU4=True, ROUGE_L=False,
+                    recall_only=True,
+                    stemming=True, stopwords=True,
+                    word_level=True, length_limit=True, length=50,
+                    use_cf=False, cf=95, scoring_formula='average',
+                    resampling=True, samples=1000, favor=True, p=0.5)
 ```
 
 
-
 # Error Handling
 If you encounter following error message when you use pythonrouge
 
@@ -103,3 +100,8 @@ cd pythonrouge/RELEASE-1.5.5/data/
 rm WordNet-2.0.exc.db
 ./WordNet-2.0-Exceptions/buildExeptionDB.pl ./WordNet-2.0-Exceptions ./smart_common_words.txt ./WordNet-2.0.exc.db
 ```
+
+# TODO
+
+- [  ] enable to non-alphabetic languages such as japanese, chinese
+- [  ] add automated testing
@@ -1,55 +1,73 @@
 # -*- coding: utf-8 -*-
 from __future__ import print_function
-import sys
 from pythonrouge.pythonrouge import Pythonrouge
-
+from pprint import pprint
 
 if __name__ == '__main__':
-    ROUGE_path = "./pythonrouge/RELEASE-1.5.5/ROUGE-1.5.5.pl"
-    data_path = "./pythonrouge/RELEASE-1.5.5/data"
-    summary_dir = "./sample/summary/"
-    reference_dir = "./sample/reference/"
-    # setting rouge options
-    rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5)
-    print("evaluate sumamry & reference in these dir\nsummary: {}\nreference: {}".format(summary_dir, reference_dir))
-    print("\nAll metric")
-    setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir)
-    print(rouge.eval_rouge(setting_file, ROUGE_path=ROUGE_path, data_path=data_path))
-    print("\nRecall Only and save setting.xml")
-    setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir, delete=False)
-    print(rouge.eval_rouge(setting_file, recall_only=True, ROUGE_path=ROUGE_path, data_path=data_path))
-    print("\nEvaluate ROUGE based on sentecnce lists")
-    summary = [["Great location, very good selection of food for breakfast buffet.",
+    summary = './sample/summary/'
+    reference = './sample/reference/'
+    ROUGE_dir = './pythonrouge/RELEASE-1.5.5/ROUGE-1.5.5.pl'
+    data_dir = './pythonrouge/RELEASE-1.5.5/data/'
+    print('evaluate sumamry & reference in these dirs')
+    print('summary:\t{}\nreference:\t{}'.format(summary, reference))
+    rouge = Pythonrouge(summary_file_exist=True,
+                        peer_path=summary, model_path=reference,
+                        n_gram=2, ROUGE_SU4=True, ROUGE_L=False,
+                        recall_only=True,
+                        stemming=True, stopwords=True,
+                        word_level=True, length_limit=True, length=50,
+                        use_cf=False, cf=95, scoring_formula='average',
+                        resampling=True, samples=1000, favor=True, p=0.5)
+    score = rouge.calc_score()
+    print('ROUGE-N(1-2) & SU4 F-measure only')
+    pprint(score)
+    print('Evaluate ROUGE based on sentecnce lists')
+    """
+    ROUGE evaluates all system summaries and its corresponding reference
+    a summary or summaries at onece.
+    Summary should be double list, in each list has each summary.
+    Reference summaries should be triple list because some of reference
+    has multiple gold summaries.
+    """
+    summary = [["Great location, very good selection of food for\
+                 breakfast buffet.",
                 "Stunning food, amazing service.",
                 "The food is excellent and the service great."],
-                ["The keyboard, more than 90% standard size, is just large enough .",
+               ["The keyboard, more than 90% standard size, is just\
+                 large enough .",
                 "Surprisingly readable screen for the size .",
                 "Smaller size videos   play even smoother ."]]
-    reference = [[["Food was excellent with a wide range of choices and good services.", "It was a bit expensive though."],
-               ["Food can be a little bit overpriced, but is good for a hotel."],
-               ["The food in the hotel was a little over priced but excellent in taste and choice.",
-               "There were also many choices to eat in the near vicinity of the hotel."],
-               ["The food is good, the service great.",
-               "Very good selection of food for breakfast buffet."]
-               ],
-               [
-               ["The size is great and allows for excellent portability.",
-               "Makes it exceptionally easy to tote around, and the keyboard is fairly big considering the size of this netbook."],
-               ["Size is small and manageable.",
-               "Perfect size and weight.",
-               "Great size for travel."],
-               ["The keyboard is a decent size, a bit smaller then average but good.",
-               "The laptop itself is small but big enough do do things on it."],
-               ["In spite of being small it is still comfortable.",
-               "The screen and keyboard are well sized for use"]
-               ]
-               ]
-    doc_id = 1
-    for s, r in zip(summary, reference):
-        print("sytem summary_{}: {}".format(doc_id, " ".join(s)))
-        for i, doc in enumerate(r):
-            print("reference summary_{}_{}: {}".format(doc_id, i+1, " ".join(doc)))
-        doc_id += 1
-    setting_file2 = rouge.setting(files=False, summary=summary, reference=reference)
-    print("\nF-measure Only")
-    print(rouge.eval_rouge(setting_file2, f_measure_only=True, ROUGE_path=ROUGE_path, data_path=data_path))
+    reference = [
+                 [["Food was excellent with a wide range of choices and\
+                   good services.", "It was a bit expensive though."],
+                  ["Food can be a little bit overpriced, but is good for\
+                  hotel."],
+                  ["The food in the hotel was a little over priced but\
+                  excellent in taste and choice.",
+                  "There were also many choices to eat in the near\
+                  vicinity of the hotel."]],
+                 [["The size is great and allows for excellent\
+                   portability.",
+                   "Makes it exceptionally easy to tote around, and the\
+                   keyboard is fairly big considering the size of this\
+                   netbook."],
+                  ["Size is small and manageable.",
+                   "Perfect size and weight.",
+                   "Great size for travel."],
+                  ["The keyboard is a decent size, a bit smaller then\
+                  average but good.",
+                  "The laptop itself is small but big enough do do\
+                  things on it."],
+                  ["In spite of being small it is still comfortable.",
+                  "The screen and keyboard are well sized for use"]]
+                  ]
+    rouge = Pythonrouge(summary_file_exist=False,
+                        summary=summary, reference=reference,
+                        n_gram=2, ROUGE_SU4=True, ROUGE_L=False,
+                        recall_only=True, stemming=True, stopwords=True,
+                        word_level=True, length_limit=True, length=50,
+                        use_cf=False, cf=95, scoring_formula='average',
+                        resampling=True, samples=1000, favor=True, p=0.5)
+    score = rouge.calc_score()
+    print('ROUGE-N(1-2) & SU4 recall only')
+    pprint(score)