Skip to content

Commit 9140384

Browse files
committed
Add a -position argument for AddDep. Currently works only for - (start of sentence) and + (end of sentence, also the default). Need to process -word and +word as well. Will also want to update the SemgrexMatcher so that potentially further operations are possible.
1 parent 15e6186 commit 9140384

File tree

3 files changed

+144
-21
lines changed

3 files changed

+144
-21
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddDep.java

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22

33
import java.io.StringWriter;
44
import java.util.*;
5+
import java.util.function.Function;
6+
import java.util.stream.Collectors;
57

68
import edu.stanford.nlp.ling.CoreLabel;
79
import edu.stanford.nlp.ling.IndexedWord;
810
import edu.stanford.nlp.semgraph.SemanticGraph;
11+
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
912
import edu.stanford.nlp.semgraph.SemanticGraphUtils;
1013
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
1114
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
@@ -16,11 +19,6 @@
1619
* The new node's sentence index is inherited from the governing node. Currently a cheap heuristic
1720
* is made, placing the new node as the leftmost child of the governing node.
1821
*
19-
* TODO: add position (a la Tregex)
20-
* TODO: determine consistent and intuitive arguments
21-
* TODO: because word position is important for certain features (such as bigram lexical overlap), need
22-
* ability to specify in which position the new node is inserted.
23-
*
2422
* @author Eric Yeh
2523
*
2624
*/
@@ -29,32 +27,40 @@ public class AddDep extends SsurgeonEdit {
2927
final Map<String, String> attributes;
3028
final GrammaticalRelation relation;
3129
final String govNodeName;
30+
final String position;
3231
final double weight;
3332

3433
/**
3534
* Creates an EnglishGrammaticalRelation AddDep edit.
3635
* @param newNode String representation of new dependent IndexedFeatureNode map.
3736
*/
38-
public static AddDep createEngAddDep(String govNodeName, String engRelation, Map<String, String> attributes) {
37+
public static AddDep createEngAddDep(String govNodeName, String engRelation, Map<String, String> attributes, String position) {
3938
GrammaticalRelation relation = EnglishGrammaticalRelations.valueOf(engRelation);
40-
return new AddDep(govNodeName, relation, attributes);
39+
return new AddDep(govNodeName, relation, attributes, position);
4140
}
4241

43-
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes) {
44-
this(govNodeName, relation, attributes, 0.0);
42+
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes, String position) {
43+
this(govNodeName, relation, attributes, position, 0.0);
4544
}
4645

47-
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes, double weight) {
46+
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes, String position, double weight) {
4847
// if there's an exception, we'll barf here rather than at runtime
4948
try {
5049
CoreLabel newNodeObj = fromCheapStrings(attributes);
5150
} catch (UnsupportedOperationException e) {
5251
throw new SsurgeonParseException("Unable to process keys for AddDep operation", e);
5352
}
5453

54+
if (position != null) {
55+
if (!position.equals("-") && !position.equals("+")) {
56+
throw new SsurgeonParseException("Unknown position " + position + " in AddDep operation");
57+
}
58+
}
59+
5560
this.attributes = new TreeMap<>(attributes);
5661
this.relation = relation;
5762
this.govNodeName = govNodeName;
63+
this.position = position;
5864
this.weight = 0;
5965
}
6066

@@ -70,6 +76,11 @@ public String toEditString() {
7076
buf.write(Ssurgeon.RELN_ARG);buf.write(" ");
7177
buf.write(relation.toString()); buf.write("\t");
7278

79+
if (position != null) {
80+
buf.write(Ssurgeon.POSITION_ARG);buf.write(" ");
81+
buf.write(position);buf.write("\t");
82+
}
83+
7384
for (String key : attributes.keySet()) {
7485
buf.write("-");
7586
buf.write(key);
@@ -83,12 +94,53 @@ public String toEditString() {
8394
return buf.toString();
8495
}
8596

97+
// TODO: update the SemgrexMatcher
98+
// currently the Ssurgeon will not be able to proceed after this edit
99+
// since all of the node and edge pointers will be rewritten
100+
public static void moveNode(SemanticGraph sg, IndexedWord word, int newIndex) {
101+
List<SemanticGraphEdge> outgoing = sg.outgoingEdgeList(word);
102+
List<SemanticGraphEdge> incoming = sg.incomingEdgeList(word);
103+
boolean isRoot = sg.isRoot(word);
104+
sg.removeVertex(word);
105+
106+
IndexedWord newWord = new IndexedWord(word.backingLabel());
107+
newWord.setIndex(newIndex);
108+
109+
// could be more expensive than necessary if we move multiple roots,
110+
// but the expectation is there is usually only the 1 root
111+
if (isRoot) {
112+
Set<IndexedWord> newRoots = new HashSet<>(sg.getRoots());
113+
newRoots.remove(word);
114+
newRoots.add(newWord);
115+
sg.setRoots(newRoots);
116+
}
117+
118+
for (SemanticGraphEdge oldEdge : outgoing) {
119+
SemanticGraphEdge newEdge = new SemanticGraphEdge(newWord, oldEdge.getTarget(), oldEdge.getRelation(), oldEdge.getWeight(), oldEdge.isExtra());
120+
sg.addEdge(newEdge);
121+
}
122+
123+
for (SemanticGraphEdge oldEdge : incoming) {
124+
SemanticGraphEdge newEdge = new SemanticGraphEdge(oldEdge.getSource(), newWord, oldEdge.getRelation(), oldEdge.getWeight(), oldEdge.isExtra());
125+
sg.addEdge(newEdge);
126+
}
127+
}
128+
129+
public static void moveNodes(SemanticGraph sg, Function<Integer, Boolean> shouldMove, Function<Integer, Integer> destination) {
130+
// iterate first, then move, so that we don't screw up the graph while iterating
131+
List<IndexedWord> toMove = sg.vertexSet().stream().filter(x -> shouldMove.apply(x.index())).collect(Collectors.toList());
132+
Collections.sort(toMove);
133+
Collections.reverse(toMove);
134+
for (IndexedWord word : toMove) {
135+
moveNode(sg, word, destination.apply(word.index()));
136+
}
137+
}
138+
86139
/**
87140
* TODO: figure out how to specify where in the sentence this node goes.
88-
* TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel.
141+
* currently allows - and + for start and end. before and after
142+
* matched node would be good
89143
* TODO: bombproof if this gov, dep, and reln already exist.
90-
* TODO: This is not used anywhere, even in the old RTE code, so we can redo it however we want.
91-
* Perhaps it could reorder the indices of the new nodes, for example
92144
*/
93145
@Override
94146
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
@@ -98,17 +150,37 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
98150
// same backing CoreLabel in each instance
99151
CoreLabel newWord = fromCheapStrings(attributes);
100152
IndexedWord newNode = new IndexedWord(newWord);
101-
int newIndex = 0;
102-
for (IndexedWord node : sg.vertexSet()) {
103-
if (node.index() >= newIndex) {
104-
newIndex = node.index() + 1;
105-
}
153+
final int tempIndex;
154+
if (position != null && !position.equals("+")) {
155+
// +2 to leave room: we will increase all other nodes with the
156+
// proper index, so we need +1 of room, then another +1 for
157+
// a temp place to put this node
158+
// TODO: when we implement updating the SemgrexMatcher,
159+
// this won't be necessary
160+
tempIndex = SemanticGraphUtils.maxIndex(sg) + 2;
161+
} else {
162+
tempIndex = SemanticGraphUtils.maxIndex(sg) + 1;
106163
}
107164
newNode.setDocID(govNode.docID());
108-
newNode.setIndex(newIndex);
165+
newNode.setIndex(tempIndex);
109166
newNode.setSentIndex(govNode.sentIndex());
167+
110168
sg.addVertex(newNode);
111169
sg.addEdge(govNode, newNode, relation, weight, false);
170+
171+
if (position != null && !position.equals("+")) {
172+
final int newIndex;
173+
if (position.equals("-")) {
174+
newIndex = SemanticGraphUtils.minIndex(sg);
175+
} else {
176+
throw new UnsupportedOperationException("Unknown position in AddDep: |" + position + "|");
177+
}
178+
// the payoff for tempIndex == maxIndex + 2:
179+
// everything will be moved one higher, unless it's the new node
180+
moveNodes(sg, x -> (x >= newIndex && x != tempIndex), x -> x+1);
181+
moveNode(sg, newNode, newIndex);
182+
}
183+
112184
return true;
113185
}
114186

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ public Collection<SsurgeonWordlist> getResources() {
248248
public static final String NODE_PROTO_ARG = "-nodearg";
249249
public static final String WEIGHT_ARG = "-weight";
250250
public static final String NAME_ARG = "-name";
251+
public static final String POSITION_ARG = "-position";
251252

252253

253254
// args for Ssurgeon edits, allowing us to not
@@ -271,6 +272,8 @@ protected static class SsurgeonArgs {
271272

272273
public String name = null;
273274

275+
public String position = null;
276+
274277
public Map<String, String> annotations = new TreeMap<>();
275278
}
276279

@@ -336,6 +339,10 @@ private static SsurgeonArgs parseArgsBox(String args) {
336339
argsBox.name = argsArray[argIndex + 1];
337340
argIndex += 1;
338341
break;
342+
case POSITION_ARG:
343+
argsBox.position = argsArray[argIndex + 1];
344+
argIndex += 1;
345+
break;
339346
default:
340347
String key = argsArray[argIndex].substring(1);
341348
Class<? extends CoreAnnotation<?>> annotation = AnnotationLookup.toCoreKey(key);
@@ -381,7 +388,7 @@ public static SsurgeonEdit parseEditLine(String editLine) {
381388
if (argsBox.reln == null) {
382389
throw new SsurgeonParseException("No relation given for an AddDep edit: " + editLine);
383390
}
384-
retEdit = AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.annotations);
391+
retEdit = AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.annotations, argsBox.position);
385392
} else if (command.equalsIgnoreCase(AddNode.LABEL)) {
386393
retEdit = AddNode.createAddNode(argsBox.nodeString, argsBox.name);
387394
} else if (command.equalsIgnoreCase(AddEdge.LABEL)) {

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,8 @@ public void readXMLAddDep() {
616616
blueVertex = newSG.getNodeByIndexSafe(4);
617617
assertNotNull(blueVertex);
618618
assertNull(blueVertex.tag());
619+
// the 4th word should be "blue" if position was not set
620+
assertEquals("blue", blueVertex.value());
619621

620622
// This time, we expect that there will be a tag
621623
add = String.join(newline,
@@ -640,9 +642,47 @@ public void readXMLAddDep() {
640642
blueVertex = newSG.getNodeByIndexSafe(4);
641643
assertNotNull(blueVertex);
642644
assertEquals("JJ", blueVertex.tag());
645+
// the 4th word should be "blue" if position was not set
646+
assertEquals("blue", blueVertex.value());
643647
}
644648

645649

650+
/**
651+
* Check that adding a word to the start of a sentence works as expected
652+
*/
653+
@Test
654+
public void readXMLAddDepStartPosition() {
655+
Ssurgeon inst = Ssurgeon.inst();
656+
657+
// use "dep" as the dependency so as to be language-agnostic in this test
658+
String add = String.join(newline,
659+
"<ssurgeon-pattern-list>",
660+
" <ssurgeon-pattern>",
661+
" <uid>38</uid>",
662+
" <notes>Remove all incoming edges for a node</notes>",
663+
// have to bomb-proof the pattern
664+
" <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:blue}") + "</semgrex>",
665+
" <edit-list>addDep -gov antennae -reln dep -word blue -position -</edit-list>",
666+
" </ssurgeon-pattern>",
667+
"</ssurgeon-pattern-list>");
668+
List<SsurgeonPattern> patterns = inst.readFromString(add);
669+
assertEquals(patterns.size(), 1);
670+
SsurgeonPattern addSsurgeon = patterns.get(0);
671+
672+
SemanticGraph sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]");
673+
IndexedWord blueVertex = sg.getNodeByIndexSafe(4);
674+
assertNull(blueVertex);
675+
SemanticGraph newSG = addSsurgeon.iterate(sg);
676+
SemanticGraph expected = SemanticGraph.valueOf("[has-3 nsubj> Jennifer-2 obj> [antennae-4 dep> blue-1]]");
677+
assertEquals(expected, newSG);
678+
// the Ssurgeon we just created should not put a tag on the word
679+
// but it SHOULD put blue at the start of the sentence
680+
blueVertex = newSG.getNodeByIndexSafe(1);
681+
assertNotNull(blueVertex);
682+
assertNull(blueVertex.tag());
683+
assertEquals("blue", blueVertex.value());
684+
}
685+
646686
/**
647687
* There should be an exception for an annotation key that does not exist
648688
*/
@@ -671,6 +711,10 @@ public void readXMLAddDepBrokenAnnotation() {
671711
}
672712
}
673713

714+
/**
715+
* Test that types which can't be converted from String
716+
* are detected when making an AddDep
717+
*/
674718
@Test
675719
public void checkAnnotationConversionErrors() {
676720
Ssurgeon inst = Ssurgeon.inst();
@@ -748,7 +792,7 @@ public void simpleTest() {
748792
attributes.put("lemma", "is");
749793
attributes.put("current", "is");
750794
attributes.put("pos", "VBN");
751-
SsurgeonEdit addCopula = new AddDep("a2", EnglishGrammaticalRelations.COPULA, attributes);
795+
SsurgeonEdit addCopula = new AddDep("a2", EnglishGrammaticalRelations.COPULA, attributes, null);
752796
pattern.addEdit(addCopula);
753797

754798
// Destroy subgraph

0 commit comments

Comments
 (0)