stanfordnlp
diff --git a/‎src/edu/stanford/nlp/coref/CorefAlgorithm.java
Lines changed: 13 additions & 0 deletions b/‎src/edu/stanford/nlp/coref/CorefAlgorithm.java
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/edu/stanford/nlp/coref/CorefPrinter.java
Lines changed: 18 additions & 9 deletions b/‎src/edu/stanford/nlp/coref/CorefPrinter.java
Lines changed: 18 additions & 9 deletions
diff --git a/‎src/edu/stanford/nlp/coref/CorefProperties.java
Lines changed: 30 additions & 2 deletions b/‎src/edu/stanford/nlp/coref/CorefProperties.java
Lines changed: 30 additions & 2 deletions
diff --git a/‎src/edu/stanford/nlp/coref/CorefRules.java
Lines changed: 67 additions & 2 deletions b/‎src/edu/stanford/nlp/coref/CorefRules.java
Lines changed: 67 additions & 2 deletions
diff --git a/‎src/edu/stanford/nlp/coref/CorefScorer.java
Lines changed: 4 additions & 1 deletion b/‎src/edu/stanford/nlp/coref/CorefScorer.java
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/edu/stanford/nlp/coref/CorefSystem.java
Lines changed: 44 additions & 5 deletions b/‎src/edu/stanford/nlp/coref/CorefSystem.java
Lines changed: 44 additions & 5 deletions
@@ -10,6 +10,7 @@
 import edu.stanford.nlp.coref.neural.NeuralCorefAlgorithm;
 import edu.stanford.nlp.coref.statistical.ClusteringCorefAlgorithm;
 import edu.stanford.nlp.coref.statistical.StatisticalCorefAlgorithm;
+import edu.stanford.nlp.util.PropertiesUtils;
 
 /**
  * A CorefAlgorithms make coreference decisions on the provided {@link Document} after
@@ -31,6 +32,18 @@ static CorefAlgorithm fromProps(Properties props, Dictionaries dictionaries) {
       return new NeuralCorefAlgorithm(props, dictionaries);
     } else if (algorithm == CorefAlgorithmType.FASTNEURAL) {
       return new FastNeuralCorefAlgorithm(props, dictionaries);
+    } else if (algorithm == CorefAlgorithmType.CUSTOM) {
+      String classname = PropertiesUtils.getString(props, "coref.algorithm.class", null);
+      try {
+        if (classname != null) {
+          Class clazz = Class.forName(classname);
+          return (CorefAlgorithm) clazz.getConstructor(Properties.class, Dictionaries.class).newInstance(props, dictionaries);
+        } else {
+          throw new RuntimeException("Please specify coref.algorithm.class");
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error creating custom coref system", e);
+      }
     } else {
       try {
         return new HybridCorefSystem(props, dictionaries);
 
@@ -6,6 +6,7 @@
 import java.util.Set;
 import java.util.stream.Collectors;
 
+import edu.stanford.nlp.coref.data.CorefCluster;
 import edu.stanford.nlp.coref.data.Document;
 import edu.stanford.nlp.coref.data.Mention;
 import edu.stanford.nlp.ling.CoreAnnotations;
@@ -24,12 +25,18 @@ public static String printConllOutput(Document document, boolean gold) {
     return printConllOutput(document, gold, false);
   }
 
-  public static String printConllOutput(Document document, boolean gold, boolean filterSingletons) {
+  public static String printConllOutput(Document document, boolean gold, boolean filterSingletons)
+  {
+    return printConllOutput(document, gold, filterSingletons, document.corefClusters);
+  }
+
+  public static String printConllOutput(Document document, boolean gold, boolean filterSingletons,
+                                        Map<Integer, CorefCluster> corefClusters) {
     List<List<Mention>> orderedMentions = gold ? document.goldMentions : document.predictedMentions;
     if (filterSingletons) {
       orderedMentions = orderedMentions.stream().map(
-          ml -> ml.stream().filter(m -> document.corefClusters.get(m.corefClusterID) != null &&
-            document.corefClusters.get(m.corefClusterID).size() > 1)
+          ml -> ml.stream().filter(m -> corefClusters.get(m.corefClusterID) != null &&
+            corefClusters.get(m.corefClusterID).size() > 1)
             .collect(Collectors.toList()))
           .collect(Collectors.toList());
     }
@@ -39,14 +46,14 @@ public static String printConllOutput(Document document, boolean gold, boolean f
   public static String printConllOutput(Document document,
       List<List<Mention>> orderedMentions, boolean gold) {
     Annotation anno = document.annotation;
-    List<List<String[]>> conllDocSentences = document.conllDoc.sentenceWordLists;
+    List<List<String[]>> conllDocSentences = document.getSentenceWordLists();
     String docID = anno.get(CoreAnnotations.DocIDAnnotation.class);
     StringBuilder sb = new StringBuilder();
     sb.append("#begin document ").append(docID).append("\n");
     List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class);
     for(int sentNum = 0 ; sentNum < sentences.size() ; sentNum++){
       List<CoreLabel> sentence = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
-      List<String[]> conllSentence = conllDocSentences.get(sentNum);
+      List<String[]> conllSentence = (conllDocSentences != null)? conllDocSentences.get(sentNum) : null;
       Map<Integer,Set<Mention>> mentionBeginOnly = Generics.newHashMap();
       Map<Integer,Set<Mention>> mentionEndOnly = Generics.newHashMap();
       Map<Integer,Set<Mention>> mentionBeginEnd = Generics.newHashMap();
@@ -91,10 +98,12 @@ public static String printConllOutput(Document document,
         }
         if(sb2.length() == 0) sb2.append("-");
 
-        String[] columns = conllSentence.get(i);
-        for(int j = 0 ; j < columns.length-1 ; j++){
-          String column = columns[j];
-          sb.append(column).append("\t");
+        if (conllSentence != null) {
+          String[] columns = conllSentence.get(i);
+          for (int j = 0; j < columns.length - 1; j++) {
+            String column = columns[j];
+            sb.append(column).append("\t");
+          }
         }
         sb.append(sb2).append("\n");
       }
 
@@ -1,11 +1,23 @@
 package edu.stanford.nlp.coref;
 
+import java.util.List;
 import java.util.Locale;
 import java.util.Properties;
-
+import java.util.Set;
+import java.util.function.Predicate;
+
+import edu.stanford.nlp.coref.data.CorefChain;
+import edu.stanford.nlp.coref.data.CorefCluster;
+import edu.stanford.nlp.coref.data.Mention;
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.trees.HeadFinder;
 import edu.stanford.nlp.trees.SemanticHeadFinder;
 import edu.stanford.nlp.trees.international.pennchinese.ChineseSemanticHeadFinder;
+import edu.stanford.nlp.util.CollectionUtils;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.Pair;
 import edu.stanford.nlp.util.PropertiesUtils;
 
 /**
@@ -20,7 +32,7 @@ private CorefProperties() {} // static methods
 
   //---------- Coreference Algorithms ----------
 
-  public enum CorefAlgorithmType {CLUSTERING, STATISTICAL, NEURAL, FASTNEURAL, HYBRID}
+  public enum CorefAlgorithmType {CLUSTERING, STATISTICAL, NEURAL, FASTNEURAL, HYBRID, CUSTOM}
 
   public static CorefAlgorithmType algorithm(Properties props) {
     String type = PropertiesUtils.getString(props, "coref.algorithm",
@@ -56,6 +68,10 @@ public static boolean removeSingletonClusters(Properties props) {
     return PropertiesUtils.getBool(props, "coref.removeSingletonClusters", true);
   }
 
+  public static boolean removeXmlMentions(Properties props) {
+    return PropertiesUtils.getBool(props, "coref.removeXmlMentions", false);
+  }
+
   // ---------- Heuristic Mention Filtering ----------
 
   public static int maxMentionDistance(Properties props) {
@@ -183,4 +199,16 @@ public static HeadFinder getHeadFinder(Properties props) {
     }
   }
 
+  public static Predicate<Pair<CorefChain.CorefMention, List<CoreLabel>>> getCorefMentionFilter(Properties props) {
+    String filterCorefChain = props.getProperty("coref.evaluate.filter");
+    if (filterCorefChain != null) {
+        if ("filterCustomerAbstractPronouns".equals(filterCorefChain)) {
+            return CorefUtils.filterCustomerAbstractPronouns;
+        } else {
+            throw new RuntimeException("Cannot create coref.evaluate.filter " + filterCorefChain);
+        }
+    } else {
+        return null;
+    }
+  }
 }
@@ -103,6 +103,19 @@ public static boolean entityPersonDisagree(Document document, CorefCluster menti
     if(disagree) return true;
     else return false;
   }
+  public static boolean entityPersonCompatible(Document document, CorefCluster mentionCluster, CorefCluster potentialAntecedent, Dictionaries dict){
+    boolean disagree = false;
+    for(Mention m : mentionCluster.getCorefMentions()) {
+      for(Mention ant : potentialAntecedent.getCorefMentions()) {
+        if(!entityPersonCompatible(document, m, ant, dict)) {
+          disagree = true;
+          break;
+        }
+      }
+    }
+    if(disagree) return false;
+    else return true;
+  }
 
   private static final List<String> entityWordsToExclude =
           Arrays.asList(new String[]{ "the","this", "mr.", "miss", "mrs.", "dr.", "ms.", "inc.", "ltd.", "corp.", "'s"});
@@ -744,8 +757,12 @@ public static boolean entityPersonDisagree(Document document, Mention m, Mention
       if ((m.person == Person.IT && ant.person == Person.THEY)
            || (m.person == Person.THEY && ant.person == Person.IT) || (m.person == Person.THEY && ant.person == Person.THEY)) {
         return false;
-      } else if (m.person != Person.UNKNOWN && ant.person != Person.UNKNOWN)
+      } else if (m.person != Person.UNKNOWN && ant.person != Person.UNKNOWN) {
         return true;
+      } else if (((m.person == Person.I || m.person == Person.YOU) && (dict.determiners.contains(ant.spanToString()))) ||
+              ((ant.person == Person.I || ant.person == Person.YOU) && (dict.determiners.contains(m.spanToString())))) {
+        return true;
+      }
     }
     if(sameSpeaker) {
       if(!ant.isPronominal()) {
@@ -788,6 +805,42 @@ public static boolean entityPersonDisagree(Document document, Mention m, Mention
     return false;
   }
 
+  public static boolean entityPersonCompatible(Document document, Mention m, Mention ant, Dictionaries dict) {
+    // Returns if the entity person is compatible based on the speaker
+    boolean sameSpeaker = entitySameSpeaker(document, m, ant);
+
+    if (sameSpeaker && m.person!=ant.person) {
+      if ((m.person == Person.IT && ant.person == Person.THEY)
+              || (m.person == Person.THEY && ant.person == Person.IT) || (m.person == Person.THEY && ant.person == Person.THEY)) {
+        return true;
+      } else if (m.person != Person.UNKNOWN && ant.person != Person.UNKNOWN) {
+        return false;
+      } else if (((m.person == Person.I || m.person == Person.YOU) && (dict.determiners.contains(ant.spanToString().toLowerCase()))) ||
+              ((ant.person == Person.I || ant.person == Person.YOU) && (dict.determiners.contains(m.spanToString().toLowerCase())))) {
+        return false;
+      }
+    }
+    if(sameSpeaker) {
+      if(!ant.isPronominal()) {
+        if(m.person==Person.I || m.person==Person.WE || m.person==Person.YOU) return false;
+      } else if(!m.isPronominal()) {
+        if(ant.person==Person.I || ant.person==Person.WE || ant.person==Person.YOU) return false;
+      }
+    }
+    boolean differentSpeaker = entityDifferentSpeaker(document, m, ant);
+    if (differentSpeaker) {
+      if (ant.person == Person.I && m.person == Person.I) {
+        return false;
+      }
+      if (document.numberOfSpeakers() == 2) {
+        if (ant.person == Person.YOU && m.person == Person.YOU) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   /** Do the mentions share the same speaker? */
   public static boolean entitySameSpeaker(Document document, Mention m, Mention ant) {
     String mSpeakerStr = m.headWord.get(CoreAnnotations.SpeakerAnnotation.class);
@@ -814,6 +867,19 @@ public static boolean entitySameSpeaker(Document document, Mention m, Mention an
     }
   }
 
+  public static boolean entityDifferentSpeaker(Document document, Mention m, Mention ant) {
+    String mSpeakerStr = m.headWord.get(CoreAnnotations.SpeakerAnnotation.class);
+    if (mSpeakerStr == null) {
+      return false;
+    }
+    String antSpeakerStr = ant.headWord.get(CoreAnnotations.SpeakerAnnotation.class);
+    if (antSpeakerStr == null) {
+      return false;
+    }
+
+    return !entitySameSpeaker(document, m, ant);
+  }
+
   /**
    * Given the name of a speaker, returns the coref cluster id it belongs to (-1 if no cluster)
    * @param document The document to search in
@@ -981,5 +1047,4 @@ private static boolean isContextOverlapping(Mention m1, Mention m2) {
      return Sets.intersects(context1, context2);
    }
 
-
 }
@@ -49,6 +49,7 @@ public static void printScoreSummary(String summary, Logger logger, boolean afte
       for(String line : lines) {
         if(line.startsWith("Identification of Mentions")) {
           Redwood.log(line);
+          logger.info(line);
           return;
         }
       }
@@ -61,6 +62,7 @@ public static void printScoreSummary(String summary, Logger logger, boolean afte
         }
       }
       Redwood.log(sb.toString());
+      logger.info(sb.toString());
     }
   }
 
@@ -76,10 +78,11 @@ public static double getFinalConllScore(String summary) {
     return finalScore;
   }
 
-  public static void printFinalConllScore(String summary) {
+  public static void printFinalConllScore(String summary, Logger logger) {
     double finalScore = getFinalConllScore(summary);
     Redwood.log(
             "Final conll score ((muc+bcub+ceafe)/3) = " + (new DecimalFormat("#.##")).format(finalScore));
+    logger.info("Final conll score ((muc+bcub+ceafe)/3) = " + (new DecimalFormat("#.##")).format(finalScore));
   }
 
   public static double getFinalConllScoreFromOutputDir(String corefOutputDir, String scorerPath) {
 
@@ -1,11 +1,16 @@
 package edu.stanford.nlp.coref;
 
+import java.io.File;
 import java.io.FileOutputStream;
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.Calendar;
 import java.util.Map;
 import java.util.Properties;
+import java.util.logging.FileHandler;
+import java.util.logging.Level;
 import java.util.logging.Logger;
+import java.util.stream.Collectors;
 
 import edu.stanford.nlp.coref.data.CorefChain;
 import edu.stanford.nlp.coref.data.CorefCluster;
@@ -15,6 +20,7 @@
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.util.Generics;
 import edu.stanford.nlp.util.StringUtils;
+import edu.stanford.nlp.util.logging.NewlineLogFormatter;
 import edu.stanford.nlp.util.logging.Redwood;
 
 /**
@@ -69,16 +75,36 @@ public void annotate(Annotation ann) {
     ann.set(CorefCoreAnnotations.CorefChainAnnotation.class, result);
   }
 
+  public void initLogger(Logger logger, String logFileName) {
+      try {
+          FileHandler fh = new FileHandler(logFileName, false);
+          logger.addHandler(fh);
+          logger.setLevel(Level.FINE);
+          fh.setFormatter(new NewlineLogFormatter());
+      } catch (SecurityException | IOException e) {
+          throw new RuntimeException("Cannot initialize logger!", e);
+      }
+  }
+
   public void runOnConll(Properties props) throws Exception {
-    String baseName = CorefProperties.conllOutputPath(props) +
-        Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
+    File f = new File(CorefProperties.conllOutputPath(props));
+    if (! f.exists()) {
+      f.mkdirs();
+    }
+    String timestamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");
+    String baseName = CorefProperties.conllOutputPath(props) + timestamp;
     String goldOutput = baseName + ".gold.txt";
     String beforeCorefOutput = baseName + ".predicted.txt";
     String afterCorefOutput = baseName + ".coref.predicted.txt";
     PrintWriter writerGold = new PrintWriter(new FileOutputStream(goldOutput));
     PrintWriter writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
     PrintWriter writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput));
 
+    Logger logger = Logger.getLogger(CorefSystem.class.getName());
+    initLogger(logger,baseName + ".log");
+    logger.info(timestamp);
+    logger.info(props.toString());
+
     (new CorefDocumentProcessor() {
       @Override
       public void process(int id, Document document) {
@@ -94,7 +120,14 @@ public void process(int id, Document document) {
         if (verbose) {
           CorefUtils.printHumanReadableCoref(document);
         }
-        writerAfterCoref.print(CorefPrinter.printConllOutput(document, false, true));
+        if (document.filterMentionSet != null) {
+          Map<Integer,CorefCluster> filteredClusters = document.corefClusters
+                  .values().stream().filter(x -> CorefUtils.filterClustersWithMentionSpans(x, document.filterMentionSet) )
+                  .collect(Collectors.toMap(x -> x.clusterID, x -> x));
+          writerAfterCoref.print(CorefPrinter.printConllOutput(document, false, true, filteredClusters));
+        } else {
+          writerAfterCoref.print(CorefPrinter.printConllOutput(document, false, true));
+        }
       }
 
       @Override
@@ -106,14 +139,20 @@ public String getName() {
       }
     }).run(docMaker);
 
-    Logger logger = Logger.getLogger(CorefSystem.class.getName());
     String summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props),
         goldOutput, beforeCorefOutput);
+
+    logger.info("Before Coref");
     CorefScorer.printScoreSummary(summary, logger, false);
+    CorefScorer.printScoreSummary(summary, logger, true);
+    CorefScorer.printFinalConllScore(summary, logger);
+
     summary = CorefScorer.getEvalSummary(CorefProperties.getScorerPath(props), goldOutput,
         afterCorefOutput);
+    logger.info("After Coref");
+    CorefScorer.printScoreSummary(summary, logger, false);
     CorefScorer.printScoreSummary(summary, logger, true);
-    CorefScorer.printFinalConllScore(summary);
+    CorefScorer.printFinalConllScore(summary, logger);
 
     writerGold.close();
     writerBeforeCoref.close();
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ public static void printScoreSummary(String summary, Logger logger, boolean afte`
`49`	`49`	`for(String line : lines) {`
`50`	`50`	`if(line.startsWith("Identification of Mentions")) {`
`51`	`51`	`Redwood.log(line);`
	`52`	`+ logger.info(line);`
`52`	`53`	`return;`
`53`	`54`	`}`
`54`	`55`	`}`
`@@ -61,6 +62,7 @@ public static void printScoreSummary(String summary, Logger logger, boolean afte`
`61`	`62`	`}`
`62`	`63`	`}`
`63`	`64`	`Redwood.log(sb.toString());`
	`65`	`+ logger.info(sb.toString());`
`64`	`66`	`}`
`65`	`67`	`}`
`66`	`68`
`@@ -76,10 +78,11 @@ public static double getFinalConllScore(String summary) {`
`76`	`78`	`return finalScore;`
`77`	`79`	`}`
`78`	`80`
`79`		`- public static void printFinalConllScore(String summary) {`
	`81`	`+ public static void printFinalConllScore(String summary, Logger logger) {`
`80`	`82`	`double finalScore = getFinalConllScore(summary);`
`81`	`83`	`Redwood.log(`
`82`	`84`	`"Final conll score ((muc+bcub+ceafe)/3) = " + (new DecimalFormat("#.##")).format(finalScore));`
	`85`	`+ logger.info("Final conll score ((muc+bcub+ceafe)/3) = " + (new DecimalFormat("#.##")).format(finalScore));`
`83`	`86`	`}`
`84`	`87`
`85`	`88`	`public static double getFinalConllScoreFromOutputDir(String corefOutputDir, String scorerPath) {`