Skip to content

Commit 01a61cc

Browse files
committed
Switch AddDep to use AnnotationLookup to allow for generic keys. Will need to turn generic keys which can't be translated into SsurgeonParseException, and will also want to add a position to the AddDep
1 parent 4d4056c commit 01a61cc

File tree

4 files changed

+241
-101
lines changed

4 files changed

+241
-101
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddDep.java

Lines changed: 52 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
import java.io.StringWriter;
44
import java.util.*;
55

6+
import edu.stanford.nlp.ling.CoreLabel;
67
import edu.stanford.nlp.ling.IndexedWord;
8+
import edu.stanford.nlp.semgraph.SemanticGraph;
9+
import edu.stanford.nlp.semgraph.SemanticGraphUtils;
710
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
811
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
912
import edu.stanford.nlp.trees.GrammaticalRelation;
10-
import edu.stanford.nlp.semgraph.SemanticGraph;
11-
import edu.stanford.nlp.semgraph.SemanticGraphUtils;
12-
import edu.stanford.nlp.util.Generics;
1313

1414
/**
1515
* Adds a new dependent node, based off of a prototype IndexedWord, with the given relation.
@@ -26,34 +26,34 @@
2626
*/
2727
public class AddDep extends SsurgeonEdit {
2828
public static final String LABEL = "addDep";
29-
IndexedWord newNodePrototype;
30-
GrammaticalRelation relation;
31-
String govNodeName;
32-
double weight;
29+
final Map<String, String> attributes;
30+
final GrammaticalRelation relation;
31+
final String govNodeName;
32+
final double weight;
3333

3434
/**
3535
* Creates an EnglishGrammaticalRelation AddDep edit.
3636
* @param newNode String representation of new dependent IndexedFeatureNode map.
3737
*/
38-
public static AddDep createEngAddDep(String govNodeName, String engRelation, String newNode) {
38+
public static AddDep createEngAddDep(String govNodeName, String engRelation, Map<String, String> attributes) {
3939
GrammaticalRelation relation = EnglishGrammaticalRelations.valueOf(engRelation);
40-
// IndexedWord newNodeObj = new IndexedWord(CoreLabel.fromAbstractMapLabel(IndexedFeatureLabel.valueOf(newNode, MapFactory.HASH_MAP_FACTORY)));
41-
IndexedWord newNodeObj = fromCheapString(newNode);
42-
return new AddDep(govNodeName, relation, newNodeObj);
40+
return new AddDep(govNodeName, relation, attributes);
4341
}
4442

45-
public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype) {
46-
this.newNodePrototype = newNodePrototype;
43+
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes) {
44+
this(govNodeName, relation, attributes, 0.0);
45+
}
46+
47+
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes, double weight) {
48+
// if there's an exception, we'll barf here rather than at runtime
49+
CoreLabel newNodeObj = fromCheapStrings(attributes);
50+
51+
this.attributes = new TreeMap<>(attributes);
4752
this.relation = relation;
4853
this.govNodeName = govNodeName;
4954
this.weight = 0;
5055
}
5156

52-
public AddDep(String govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype, double weight) {
53-
this(govNodeName, relation, newNodePrototype);
54-
this.weight = weight;
55-
}
56-
5757
/**
5858
* Emits a parseable instruction string.
5959
*/
@@ -67,9 +67,13 @@ public String toEditString() {
6767
buf.write(relation.toString()); buf.write("\t");
6868
buf.write(Ssurgeon.NODE_PROTO_ARG);buf.write(" ");
6969
buf.write("\"");
70-
// buf.write(newNodePrototype.toString("map")); buf.write("\"\t")
71-
buf.write(cheapWordToString(newNodePrototype));
72-
buf.write("\"\t");
70+
for (String key : attributes.keySet()) {
71+
buf.write("-");
72+
buf.write(key);
73+
buf.write(" ");
74+
buf.write(attributes.get(key));
75+
buf.write("\"\t");
76+
}
7377

7478
buf.write(Ssurgeon.WEIGHT_ARG);buf.write(" ");
7579
buf.write(String.valueOf(weight));
@@ -86,84 +90,43 @@ public String toEditString() {
8690
@Override
8791
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
8892
IndexedWord govNode = sm.getNode(govNodeName);
89-
IndexedWord newNode = new IndexedWord(newNodePrototype);
90-
int newIndex = SemanticGraphUtils.leftMostChildVertice(govNode, sg).index(); // cheap En-specific hack for placing copula (beginning of governing phrase)
93+
// must make new copy of CoreLabel - if the same word is added
94+
// multiple times by the same operation, we don't want to have the
95+
// same backing CoreLabel in each instance
96+
CoreLabel newWord = fromCheapStrings(attributes);
97+
IndexedWord newNode = new IndexedWord(newWord);
98+
int newIndex = 0;
99+
for (IndexedWord node : sg.vertexSet()) {
100+
if (node.index() >= newIndex) {
101+
newIndex = node.index() + 1;
102+
}
103+
}
91104
newNode.setDocID(govNode.docID());
92105
newNode.setIndex(newIndex);
93106
newNode.setSentIndex(govNode.sentIndex());
94107
sg.addVertex(newNode);
95-
sg.addEdge(govNode, newNode, relation, weight,false);
108+
sg.addEdge(govNode, newNode, relation, weight, false);
96109
return true;
97110
}
98111

99-
public static final String WORD_KEY = "word";
100-
public static final String LEMMA_KEY = "lemma";
101-
public static final String VALUE_KEY = "value";
102-
public static final String CURRENT_KEY = "current";
103-
public static final String POS_KEY = "POS";
104-
public static final String TUPLE_DELIMITER="=";
105-
public static final String ATOM_DELIMITER = " ";
106-
107-
// Simple mapping of all the stuff we care about (until IndexedFeatureLabel --> CoreLabel map pain is fixed)
108112
/**
109-
* This converts the node into a simple string based representation.
110-
* NOTE: this is extremely brittle, and presumes values do not contain delimiters
113+
* Given the keys and values of the CoreAnnotation attributes,
114+
* build a CoreLabel to use as the new word
111115
*/
112-
public static String cheapWordToString(IndexedWord node) {
113-
StringWriter buf = new StringWriter();
114-
buf.write("{");
115-
buf.write(WORD_KEY);
116-
buf.write(TUPLE_DELIMITER);
117-
buf.write(nullShield(node.word()));
118-
buf.write(ATOM_DELIMITER);
119-
120-
buf.write(LEMMA_KEY);
121-
buf.write(TUPLE_DELIMITER);
122-
buf.write(nullShield(node.lemma()));
123-
buf.write(ATOM_DELIMITER);
124-
125-
buf.write(POS_KEY);
126-
buf.write(TUPLE_DELIMITER);
127-
buf.write(nullShield(node.tag()));
128-
buf.write(ATOM_DELIMITER);
129-
130-
buf.write(VALUE_KEY);
131-
buf.write(TUPLE_DELIMITER);
132-
buf.write(nullShield(node.value()));
133-
buf.write(ATOM_DELIMITER);
134-
135-
buf.write(CURRENT_KEY);
136-
buf.write(TUPLE_DELIMITER);
137-
buf.write(nullShield(node.originalText()));
138-
buf.write("}");
139-
return buf.toString();
140-
}
141-
142-
/**
143-
* Given the node arg string, converts it into an IndexedWord.
144-
*/
145-
public static IndexedWord fromCheapString(String rawArg) {
146-
String arg = rawArg.substring(1, rawArg.length()-1);
147-
String[] tuples=arg.split(ATOM_DELIMITER);
148-
Map<String,String> args = Generics.newHashMap();
149-
for (String tuple : tuples) {
150-
String[] vals = tuple.split(TUPLE_DELIMITER);
151-
String key = vals[0];
152-
String value = "";
153-
if (vals.length == 2)
154-
value = vals[1];
155-
args.put(key, value);
116+
public static CoreLabel fromCheapStrings(Map<String, String> attributes) {
117+
String[] keys = new String[attributes.size()];
118+
String[] values = new String[attributes.size()];
119+
int idx = 0;
120+
for (String key : attributes.keySet()) {
121+
String value = attributes.get(key);
122+
keys[idx] = key;
123+
values[idx] = value;
124+
++idx;
125+
}
126+
CoreLabel newWord = new CoreLabel(keys, values);
127+
if (newWord.value() == null && newWord.word() != null) {
128+
newWord.setValue(newWord.word());
156129
}
157-
IndexedWord newWord = new IndexedWord();
158-
newWord.setWord(args.get(WORD_KEY));
159-
newWord.setLemma(args.get(LEMMA_KEY));
160-
newWord.setTag(args.get(POS_KEY));
161-
newWord.setValue(args.get(VALUE_KEY));
162-
newWord.setOriginalText(args.get(CURRENT_KEY));
163130
return newWord;
164131
}
165-
166-
public static String nullShield(String str) {
167-
return str == null ? "" : str;
168-
}
169132
}

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddNode.java

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
22

33
import java.io.*;
4+
import java.util.Map;
45

56
import edu.stanford.nlp.ling.IndexedWord;
67
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
78
import edu.stanford.nlp.semgraph.SemanticGraph;
9+
import edu.stanford.nlp.util.Generics;
810

911
public class AddNode extends SsurgeonEdit {
1012
public static final String LABEL="addNode";
@@ -21,7 +23,7 @@ public static AddNode createAddNode(String nodeString, String nodeName) {
2123
}
2224

2325
public static AddNode createAddNode(IndexedWord node, String nodeName) {
24-
String nodeString = AddDep.cheapWordToString(node);
26+
String nodeString = cheapWordToString(node);
2527
return new AddNode(nodeString, nodeName);
2628
}
2729

@@ -33,7 +35,7 @@ public static AddNode createAddNode(IndexedWord node, String nodeName) {
3335
// This one is actually used in its current form in RTE
3436
@Override
3537
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
36-
IndexedWord newNode = AddDep.fromCheapString(nodeString);
38+
IndexedWord newNode = fromCheapString(nodeString);
3739
sg.addVertex(newNode);
3840
addNamedNode(newNode, nodeName);
3941
return true;
@@ -53,4 +55,73 @@ public String toEditString() {
5355
return buf.toString();
5456
}
5557

58+
public static final String WORD_KEY = "word";
59+
public static final String LEMMA_KEY = "lemma";
60+
public static final String VALUE_KEY = "value";
61+
public static final String CURRENT_KEY = "current";
62+
public static final String POS_KEY = "POS";
63+
public static final String TUPLE_DELIMITER="=";
64+
public static final String ATOM_DELIMITER = " ";
65+
66+
/**
67+
* This converts the node into a simple string based representation.
68+
* NOTE: this is extremely brittle, and presumes values do not contain delimiters
69+
*/
70+
public static String cheapWordToString(IndexedWord node) {
71+
StringWriter buf = new StringWriter();
72+
buf.write("{");
73+
buf.write(WORD_KEY);
74+
buf.write(TUPLE_DELIMITER);
75+
buf.write(nullShield(node.word()));
76+
buf.write(ATOM_DELIMITER);
77+
78+
buf.write(LEMMA_KEY);
79+
buf.write(TUPLE_DELIMITER);
80+
buf.write(nullShield(node.lemma()));
81+
buf.write(ATOM_DELIMITER);
82+
83+
buf.write(POS_KEY);
84+
buf.write(TUPLE_DELIMITER);
85+
buf.write(nullShield(node.tag()));
86+
buf.write(ATOM_DELIMITER);
87+
88+
buf.write(VALUE_KEY);
89+
buf.write(TUPLE_DELIMITER);
90+
buf.write(nullShield(node.value()));
91+
buf.write(ATOM_DELIMITER);
92+
93+
buf.write(CURRENT_KEY);
94+
buf.write(TUPLE_DELIMITER);
95+
buf.write(nullShield(node.originalText()));
96+
buf.write("}");
97+
return buf.toString();
98+
}
99+
100+
public static String nullShield(String str) {
101+
return str == null ? "" : str;
102+
}
103+
104+
/**
105+
* Given the node arg string, converts it into an IndexedWord.
106+
*/
107+
public static IndexedWord fromCheapString(String rawArg) {
108+
String arg = rawArg.substring(1, rawArg.length()-1);
109+
String[] tuples=arg.split(ATOM_DELIMITER);
110+
Map<String,String> args = Generics.newHashMap();
111+
for (String tuple : tuples) {
112+
String[] vals = tuple.split(TUPLE_DELIMITER);
113+
String key = vals[0];
114+
String value = "";
115+
if (vals.length == 2)
116+
value = vals[1];
117+
args.put(key, value);
118+
}
119+
IndexedWord newWord = new IndexedWord();
120+
newWord.setWord(args.get(WORD_KEY));
121+
newWord.setLemma(args.get(LEMMA_KEY));
122+
newWord.setTag(args.get(POS_KEY));
123+
newWord.setValue(args.get(VALUE_KEY));
124+
newWord.setOriginalText(args.get(CURRENT_KEY));
125+
return newWord;
126+
}
56127
}

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@
2424
import org.w3c.dom.Node;
2525
import org.w3c.dom.NodeList;
2626

27-
import edu.stanford.nlp.trees.GrammaticalRelation;
27+
import edu.stanford.nlp.ling.AnnotationLookup;
28+
import edu.stanford.nlp.ling.CoreAnnotation;
2829
import edu.stanford.nlp.semgraph.SemanticGraph;
2930
import edu.stanford.nlp.semgraph.SemanticGraphUtils;
3031
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.pred.*;
3132
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
33+
import edu.stanford.nlp.trees.GrammaticalRelation;
3234
import edu.stanford.nlp.util.Generics;
3335
import edu.stanford.nlp.util.XMLUtils;
3436
import edu.stanford.nlp.util.logging.Redwood;
@@ -268,6 +270,8 @@ protected static class SsurgeonArgs {
268270
public double weight = 0.0;
269271

270272
public String name = null;
273+
274+
public Map<String, String> annotations = new TreeMap<>();
271275
}
272276

273277
/**
@@ -333,7 +337,13 @@ private static SsurgeonArgs parseArgsBox(String args) {
333337
argIndex += 1;
334338
break;
335339
default:
336-
throw new SsurgeonParseException("Parsing Ssurgeon args: unknown flag " + argsArray[argIndex]);
340+
String key = argsArray[argIndex].substring(1);
341+
Class<? extends CoreAnnotation<?>> annotation = AnnotationLookup.toCoreKey(key);
342+
if (annotation == null) {
343+
throw new SsurgeonParseException("Parsing Ssurgeon args: unknown flag " + argsArray[argIndex]);
344+
}
345+
argsBox.annotations.put(key, argsArray[argIndex + 1]);
346+
argIndex += 1;
337347
}
338348
}
339349
return argsBox;
@@ -365,7 +375,13 @@ public static SsurgeonEdit parseEditLine(String editLine) {
365375
// mappings should also be stored in more appropriate data structure.
366376
SsurgeonEdit retEdit;
367377
if (command.equalsIgnoreCase(AddDep.LABEL)) {
368-
retEdit = AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.nodeString);
378+
if (argsBox.govNodeName == null) {
379+
throw new SsurgeonParseException("No governor given for an AddDep edit: " + editLine);
380+
}
381+
if (argsBox.reln == null) {
382+
throw new SsurgeonParseException("No relation given for an AddDep edit: " + editLine);
383+
}
384+
retEdit = AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.annotations);
369385
} else if (command.equalsIgnoreCase(AddNode.LABEL)) {
370386
retEdit = AddNode.createAddNode(argsBox.nodeString, argsBox.name);
371387
} else if (command.equalsIgnoreCase(AddEdge.LABEL)) {

0 commit comments

Comments
 (0)