Switch AddDep to use AnnotationLookup to allow for generic keys. Will need to turn generic keys which can't be translated into SsurgeonParseException, and will also want to add a position to the AddDep

AngledLuffa · AngledLuffa · commit 01a61cc933e1 · 2023-03-06T23:05:18.000-08:00
diff --git a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddDep.java b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddDep.java
@@ -3,13 +3,13 @@
 import java.io.StringWriter;
 import java.util.*;
 
+import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.semgraph.SemanticGraphUtils;
 import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
 import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
 import edu.stanford.nlp.trees.GrammaticalRelation;
-import edu.stanford.nlp.semgraph.SemanticGraph;
-import edu.stanford.nlp.semgraph.SemanticGraphUtils;
-import edu.stanford.nlp.util.Generics;
 
 /**
  * Adds a new dependent node, based off of a prototype IndexedWord, with the given relation.
@@ -26,34 +26,34 @@
  */
 public class AddDep extends SsurgeonEdit {
   public static final String LABEL = "addDep";
-  IndexedWord newNodePrototype;
-  GrammaticalRelation relation;
-  String govNodeName;
-  double weight;
+  final Map<String, String> attributes;
+  final GrammaticalRelation relation;
+  final String govNodeName;
+  final double weight;
 
   /**
    * Creates an EnglishGrammaticalRelation AddDep edit.
    * @param newNode String representation of new dependent IndexedFeatureNode map.
    */
-  public static AddDep createEngAddDep(String govNodeName, String engRelation,  String newNode) {
+  public static AddDep createEngAddDep(String govNodeName, String engRelation,  Map<String, String> attributes) {
     GrammaticalRelation relation = EnglishGrammaticalRelations.valueOf(engRelation);
-//  IndexedWord newNodeObj = new IndexedWord(CoreLabel.fromAbstractMapLabel(IndexedFeatureLabel.valueOf(newNode, MapFactory.HASH_MAP_FACTORY)));
-    IndexedWord newNodeObj = fromCheapString(newNode);
-    return new AddDep(govNodeName, relation, newNodeObj);
+    return new AddDep(govNodeName, relation, attributes);
   }
 
-  public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype) {
-    this.newNodePrototype = newNodePrototype;
+  public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes) {
+    this(govNodeName, relation, attributes, 0.0);
+  }
+
+  public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes, double weight) {
+    // if there's an exception, we'll barf here rather than at runtime
+    CoreLabel newNodeObj = fromCheapStrings(attributes);
+
+    this.attributes = new TreeMap<>(attributes);
     this.relation = relation;
     this.govNodeName = govNodeName;
     this.weight = 0;
   }
 
-  public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype, double weight) {
-    this(govNodeName, relation, newNodePrototype);
-    this.weight = weight;
-  }
-
   /**
    * Emits a parseable instruction string.
    */
@@ -67,9 +67,13 @@ public String toEditString() {
     buf.write(relation.toString()); buf.write("\t");
     buf.write(Ssurgeon.NODE_PROTO_ARG);buf.write(" ");
     buf.write("\"");
-//  buf.write(newNodePrototype.toString("map")); buf.write("\"\t")
-    buf.write(cheapWordToString(newNodePrototype));
-    buf.write("\"\t");
+    for (String key : attributes.keySet()) {
+      buf.write("-");
+      buf.write(key);
+      buf.write(" ");
+      buf.write(attributes.get(key));
+      buf.write("\"\t");
+    }
 
     buf.write(Ssurgeon.WEIGHT_ARG);buf.write(" ");
     buf.write(String.valueOf(weight));
@@ -86,84 +90,43 @@ public String toEditString() {
   @Override
   public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
     IndexedWord govNode = sm.getNode(govNodeName);
-    IndexedWord newNode = new IndexedWord(newNodePrototype);
-    int newIndex = SemanticGraphUtils.leftMostChildVertice(govNode, sg).index(); // cheap En-specific hack for placing copula (beginning of governing phrase)
+    // must make new copy of CoreLabel - if the same word is added
+    // multiple times by the same operation, we don't want to have the
+    // same backing CoreLabel in each instance
+    CoreLabel newWord = fromCheapStrings(attributes);
+    IndexedWord newNode = new IndexedWord(newWord);
+    int newIndex = 0;
+    for (IndexedWord node : sg.vertexSet()) {
+      if (node.index() >= newIndex) {
+        newIndex = node.index() + 1;
+      }
+    }
     newNode.setDocID(govNode.docID());
     newNode.setIndex(newIndex);
     newNode.setSentIndex(govNode.sentIndex());
     sg.addVertex(newNode);
-    sg.addEdge(govNode, newNode, relation, weight,false);
+    sg.addEdge(govNode, newNode, relation, weight, false);
     return true;
   }
 
-  public static final String WORD_KEY = "word";
-  public static final String LEMMA_KEY = "lemma";
-  public static final String VALUE_KEY = "value";
-  public static final String CURRENT_KEY = "current";
-  public static final String POS_KEY = "POS";
-  public static final String TUPLE_DELIMITER="=";
-  public static final String ATOM_DELIMITER = " ";
-
-  // Simple mapping of all the stuff we care about (until IndexedFeatureLabel --> CoreLabel map pain is fixed)
   /**
-   * This converts the node into a simple string based representation.
-   * NOTE: this is extremely brittle, and presumes values do not contain delimiters
+   * Given the keys and values of the CoreAnnotation attributes,
+   * build a CoreLabel to use as the new word
    */
-  public static String cheapWordToString(IndexedWord node) {
-    StringWriter buf = new StringWriter();
-    buf.write("{");
-    buf.write(WORD_KEY);
-    buf.write(TUPLE_DELIMITER);
-    buf.write(nullShield(node.word()));
-    buf.write(ATOM_DELIMITER);
-
-    buf.write(LEMMA_KEY);
-    buf.write(TUPLE_DELIMITER);
-    buf.write(nullShield(node.lemma()));
-    buf.write(ATOM_DELIMITER);
-
-    buf.write(POS_KEY);
-    buf.write(TUPLE_DELIMITER);
-    buf.write(nullShield(node.tag()));
-    buf.write(ATOM_DELIMITER);
-
-    buf.write(VALUE_KEY);
-    buf.write(TUPLE_DELIMITER);
-    buf.write(nullShield(node.value()));
-    buf.write(ATOM_DELIMITER);
-
-    buf.write(CURRENT_KEY);
-    buf.write(TUPLE_DELIMITER);
-    buf.write(nullShield(node.originalText()));
-    buf.write("}");
-    return buf.toString();
-  }
-
-  /**
-   * Given the node arg string, converts it into an IndexedWord.
-   */
-  public static IndexedWord fromCheapString(String rawArg) {
-    String arg = rawArg.substring(1, rawArg.length()-1);
-    String[] tuples=arg.split(ATOM_DELIMITER);
-    Map<String,String> args = Generics.newHashMap();
-    for (String tuple : tuples) {
-      String[] vals = tuple.split(TUPLE_DELIMITER);
-      String key = vals[0];
-      String value = "";
-      if (vals.length == 2)
-        value = vals[1];
-      args.put(key, value);
+  public static CoreLabel fromCheapStrings(Map<String, String> attributes) {
+    String[] keys = new String[attributes.size()];
+    String[] values = new String[attributes.size()];
+    int idx = 0;
+    for (String key : attributes.keySet()) {
+      String value = attributes.get(key);
+      keys[idx] = key;
+      values[idx] = value;
+      ++idx;
+    }
+    CoreLabel newWord = new CoreLabel(keys, values);
+    if (newWord.value() == null && newWord.word() != null) {
+      newWord.setValue(newWord.word());
     }
-    IndexedWord newWord = new IndexedWord();
-    newWord.setWord(args.get(WORD_KEY));
-    newWord.setLemma(args.get(LEMMA_KEY));
-    newWord.setTag(args.get(POS_KEY));
-    newWord.setValue(args.get(VALUE_KEY));
-    newWord.setOriginalText(args.get(CURRENT_KEY));
     return newWord;
   }
-
-  public static String nullShield(String str) {
-    return str == null ? "" : str;
-  }
 }
diff --git a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddNode.java b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddNode.java
@@ -1,10 +1,12 @@
 package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
 
 import java.io.*;
+import java.util.Map;
 
 import edu.stanford.nlp.ling.IndexedWord;
 import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
 import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.util.Generics;
 
 public class AddNode extends SsurgeonEdit {
   public static final String LABEL="addNode";
@@ -21,7 +23,7 @@ public static AddNode createAddNode(String nodeString, String nodeName) {
   }
   
   public static AddNode createAddNode(IndexedWord node, String nodeName) {
-    String nodeString = AddDep.cheapWordToString(node);
+    String nodeString = cheapWordToString(node);
     return new AddNode(nodeString, nodeName);
   }
 
@@ -33,7 +35,7 @@ public static AddNode createAddNode(IndexedWord node, String nodeName) {
   // This one is actually used in its current form in RTE
   @Override
   public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
-    IndexedWord newNode = AddDep.fromCheapString(nodeString);
+    IndexedWord newNode = fromCheapString(nodeString);
     sg.addVertex(newNode);
     addNamedNode(newNode, nodeName);
     return true;
@@ -53,4 +55,73 @@ public String toEditString() {
     return buf.toString();
   }
 
+  public static final String WORD_KEY = "word";
+  public static final String LEMMA_KEY = "lemma";
+  public static final String VALUE_KEY = "value";
+  public static final String CURRENT_KEY = "current";
+  public static final String POS_KEY = "POS";
+  public static final String TUPLE_DELIMITER="=";
+  public static final String ATOM_DELIMITER = " ";
+
+  /**
+   * This converts the node into a simple string based representation.
+   * NOTE: this is extremely brittle, and presumes values do not contain delimiters
+   */
+  public static String cheapWordToString(IndexedWord node) {
+    StringWriter buf = new StringWriter();
+    buf.write("{");
+    buf.write(WORD_KEY);
+    buf.write(TUPLE_DELIMITER);
+    buf.write(nullShield(node.word()));
+    buf.write(ATOM_DELIMITER);
+
+    buf.write(LEMMA_KEY);
+    buf.write(TUPLE_DELIMITER);
+    buf.write(nullShield(node.lemma()));
+    buf.write(ATOM_DELIMITER);
+
+    buf.write(POS_KEY);
+    buf.write(TUPLE_DELIMITER);
+    buf.write(nullShield(node.tag()));
+    buf.write(ATOM_DELIMITER);
+
+    buf.write(VALUE_KEY);
+    buf.write(TUPLE_DELIMITER);
+    buf.write(nullShield(node.value()));
+    buf.write(ATOM_DELIMITER);
+
+    buf.write(CURRENT_KEY);
+    buf.write(TUPLE_DELIMITER);
+    buf.write(nullShield(node.originalText()));
+    buf.write("}");
+    return buf.toString();
+  }
+
+  public static String nullShield(String str) {
+    return str == null ? "" : str;
+  }
+
+  /**
+   * Given the node arg string, converts it into an IndexedWord.
+   */
+  public static IndexedWord fromCheapString(String rawArg) {
+    String arg = rawArg.substring(1, rawArg.length()-1);
+    String[] tuples=arg.split(ATOM_DELIMITER);
+    Map<String,String> args = Generics.newHashMap();
+    for (String tuple : tuples) {
+      String[] vals = tuple.split(TUPLE_DELIMITER);
+      String key = vals[0];
+      String value = "";
+      if (vals.length == 2)
+        value = vals[1];
+      args.put(key, value);
+    }
+    IndexedWord newWord = new IndexedWord();
+    newWord.setWord(args.get(WORD_KEY));
+    newWord.setLemma(args.get(LEMMA_KEY));
+    newWord.setTag(args.get(POS_KEY));
+    newWord.setValue(args.get(VALUE_KEY));
+    newWord.setOriginalText(args.get(CURRENT_KEY));
+    return newWord;
+  }
 }
diff --git a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java
@@ -24,11 +24,13 @@
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
-import edu.stanford.nlp.trees.GrammaticalRelation;
+import edu.stanford.nlp.ling.AnnotationLookup;
+import edu.stanford.nlp.ling.CoreAnnotation;
 import edu.stanford.nlp.semgraph.SemanticGraph;
 import edu.stanford.nlp.semgraph.SemanticGraphUtils;
 import edu.stanford.nlp.semgraph.semgrex.ssurgeon.pred.*;
 import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
+import edu.stanford.nlp.trees.GrammaticalRelation;
 import edu.stanford.nlp.util.Generics;
 import edu.stanford.nlp.util.XMLUtils;
 import edu.stanford.nlp.util.logging.Redwood;
@@ -268,6 +270,8 @@ protected static class SsurgeonArgs {
     public double weight = 0.0;
 
     public String name = null;
+
+    public Map<String, String> annotations = new TreeMap<>();
   }
 
   /**
@@ -333,7 +337,13 @@ private static SsurgeonArgs parseArgsBox(String args) {
           argIndex += 1;
           break;
         default:
-          throw new SsurgeonParseException("Parsing Ssurgeon args: unknown flag " + argsArray[argIndex]);
+          String key = argsArray[argIndex].substring(1);
+          Class<? extends CoreAnnotation<?>> annotation = AnnotationLookup.toCoreKey(key);
+          if (annotation == null) {
+            throw new SsurgeonParseException("Parsing Ssurgeon args: unknown flag " + argsArray[argIndex]);
+          }
+          argsBox.annotations.put(key, argsArray[argIndex + 1]);
+          argIndex += 1;
       }
     }
     return argsBox;
@@ -365,7 +375,13 @@ public static SsurgeonEdit parseEditLine(String editLine) {
     // mappings should also be stored in more appropriate data structure.
     SsurgeonEdit retEdit;
     if (command.equalsIgnoreCase(AddDep.LABEL)) {
-      retEdit = AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.nodeString);
+      if (argsBox.govNodeName == null) {
+        throw new SsurgeonParseException("No governor given for an AddDep edit: " + editLine);
+      }
+      if (argsBox.reln == null) {
+        throw new SsurgeonParseException("No relation given for an AddDep edit: " + editLine);
+      }
+      retEdit = AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.annotations);
     } else if (command.equalsIgnoreCase(AddNode.LABEL)) {
       retEdit = AddNode.createAddNode(argsBox.nodeString, argsBox.name);
     } else if (command.equalsIgnoreCase(AddEdge.LABEL)) {
diff --git a/test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java b/test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java