2
2
3
3
import java .io .StringWriter ;
4
4
import java .util .*;
5
+ import java .util .function .Function ;
6
+ import java .util .stream .Collectors ;
5
7
6
8
import edu .stanford .nlp .ling .CoreLabel ;
7
9
import edu .stanford .nlp .ling .IndexedWord ;
8
10
import edu .stanford .nlp .semgraph .SemanticGraph ;
11
+ import edu .stanford .nlp .semgraph .SemanticGraphEdge ;
9
12
import edu .stanford .nlp .semgraph .SemanticGraphUtils ;
10
13
import edu .stanford .nlp .semgraph .semgrex .SemgrexMatcher ;
11
14
import edu .stanford .nlp .trees .EnglishGrammaticalRelations ;
16
19
* The new node's sentence index is inherited from the governing node. Currently a cheap heuristic
17
20
* is made, placing the new node as the leftmost child of the governing node.
18
21
*
19
- * TODO: add position (a la Tregex)
20
- * TODO: determine consistent and intuitive arguments
21
- * TODO: because word position is important for certain features (such as bigram lexical overlap), need
22
- * ability to specify in which position the new node is inserted.
23
- *
24
22
* @author Eric Yeh
25
23
*
26
24
*/
@@ -29,32 +27,40 @@ public class AddDep extends SsurgeonEdit {
29
27
final Map <String , String > attributes ;
30
28
final GrammaticalRelation relation ;
31
29
final String govNodeName ;
30
+ final String position ;
32
31
final double weight ;
33
32
34
33
/**
35
34
* Creates an EnglishGrammaticalRelation AddDep edit.
36
35
* @param newNode String representation of new dependent IndexedFeatureNode map.
37
36
*/
38
- public static AddDep createEngAddDep (String govNodeName , String engRelation , Map <String , String > attributes ) {
37
+ public static AddDep createEngAddDep (String govNodeName , String engRelation , Map <String , String > attributes , String position ) {
39
38
GrammaticalRelation relation = EnglishGrammaticalRelations .valueOf (engRelation );
40
- return new AddDep (govNodeName , relation , attributes );
39
+ return new AddDep (govNodeName , relation , attributes , position );
41
40
}
42
41
43
- public AddDep (String govNodeName , GrammaticalRelation relation , Map <String , String > attributes ) {
44
- this (govNodeName , relation , attributes , 0.0 );
42
+ public AddDep (String govNodeName , GrammaticalRelation relation , Map <String , String > attributes , String position ) {
43
+ this (govNodeName , relation , attributes , position , 0.0 );
45
44
}
46
45
47
- public AddDep (String govNodeName , GrammaticalRelation relation , Map <String , String > attributes , double weight ) {
46
+ public AddDep (String govNodeName , GrammaticalRelation relation , Map <String , String > attributes , String position , double weight ) {
48
47
// if there's an exception, we'll barf here rather than at runtime
49
48
try {
50
49
CoreLabel newNodeObj = fromCheapStrings (attributes );
51
50
} catch (UnsupportedOperationException e ) {
52
51
throw new SsurgeonParseException ("Unable to process keys for AddDep operation" , e );
53
52
}
54
53
54
+ if (position != null ) {
55
+ if (!position .equals ("-" ) && !position .equals ("+" )) {
56
+ throw new SsurgeonParseException ("Unknown position " + position + " in AddDep operation" );
57
+ }
58
+ }
59
+
55
60
this .attributes = new TreeMap <>(attributes );
56
61
this .relation = relation ;
57
62
this .govNodeName = govNodeName ;
63
+ this .position = position ;
58
64
this .weight = 0 ;
59
65
}
60
66
@@ -70,6 +76,11 @@ public String toEditString() {
70
76
buf .write (Ssurgeon .RELN_ARG );buf .write (" " );
71
77
buf .write (relation .toString ()); buf .write ("\t " );
72
78
79
+ if (position != null ) {
80
+ buf .write (Ssurgeon .POSITION_ARG );buf .write (" " );
81
+ buf .write (position );buf .write ("\t " );
82
+ }
83
+
73
84
for (String key : attributes .keySet ()) {
74
85
buf .write ("-" );
75
86
buf .write (key );
@@ -83,12 +94,53 @@ public String toEditString() {
83
94
return buf .toString ();
84
95
}
85
96
97
+ // TODO: update the SemgrexMatcher
98
+ // currently the Ssurgeon will not be able to proceed after this edit
99
+ // since all of the node and edge pointers will be rewritten
100
+ public static void moveNode (SemanticGraph sg , IndexedWord word , int newIndex ) {
101
+ List <SemanticGraphEdge > outgoing = sg .outgoingEdgeList (word );
102
+ List <SemanticGraphEdge > incoming = sg .incomingEdgeList (word );
103
+ boolean isRoot = sg .isRoot (word );
104
+ sg .removeVertex (word );
105
+
106
+ IndexedWord newWord = new IndexedWord (word .backingLabel ());
107
+ newWord .setIndex (newIndex );
108
+
109
+ // could be more expensive than necessary if we move multiple roots,
110
+ // but the expectation is there is usually only the 1 root
111
+ if (isRoot ) {
112
+ Set <IndexedWord > newRoots = new HashSet <>(sg .getRoots ());
113
+ newRoots .remove (word );
114
+ newRoots .add (newWord );
115
+ sg .setRoots (newRoots );
116
+ }
117
+
118
+ for (SemanticGraphEdge oldEdge : outgoing ) {
119
+ SemanticGraphEdge newEdge = new SemanticGraphEdge (newWord , oldEdge .getTarget (), oldEdge .getRelation (), oldEdge .getWeight (), oldEdge .isExtra ());
120
+ sg .addEdge (newEdge );
121
+ }
122
+
123
+ for (SemanticGraphEdge oldEdge : incoming ) {
124
+ SemanticGraphEdge newEdge = new SemanticGraphEdge (oldEdge .getSource (), newWord , oldEdge .getRelation (), oldEdge .getWeight (), oldEdge .isExtra ());
125
+ sg .addEdge (newEdge );
126
+ }
127
+ }
128
+
129
+ public static void moveNodes (SemanticGraph sg , Function <Integer , Boolean > shouldMove , Function <Integer , Integer > destination ) {
130
+ // iterate first, then move, so that we don't screw up the graph while iterating
131
+ List <IndexedWord > toMove = sg .vertexSet ().stream ().filter (x -> shouldMove .apply (x .index ())).collect (Collectors .toList ());
132
+ Collections .sort (toMove );
133
+ Collections .reverse (toMove );
134
+ for (IndexedWord word : toMove ) {
135
+ moveNode (sg , word , destination .apply (word .index ()));
136
+ }
137
+ }
138
+
86
139
/**
87
140
* TODO: figure out how to specify where in the sentence this node goes.
88
- * TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel.
141
+ * currently allows - and + for start and end. before and after
142
+ * matched node would be good
89
143
* TODO: bombproof if this gov, dep, and reln already exist.
90
- * TODO: This is not used anywhere, even in the old RTE code, so we can redo it however we want.
91
- * Perhaps it could reorder the indices of the new nodes, for example
92
144
*/
93
145
@ Override
94
146
public boolean evaluate (SemanticGraph sg , SemgrexMatcher sm ) {
@@ -98,17 +150,37 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
98
150
// same backing CoreLabel in each instance
99
151
CoreLabel newWord = fromCheapStrings (attributes );
100
152
IndexedWord newNode = new IndexedWord (newWord );
101
- int newIndex = 0 ;
102
- for (IndexedWord node : sg .vertexSet ()) {
103
- if (node .index () >= newIndex ) {
104
- newIndex = node .index () + 1 ;
105
- }
153
+ final int tempIndex ;
154
+ if (position != null && !position .equals ("+" )) {
155
+ // +2 to leave room: we will increase all other nodes with the
156
+ // proper index, so we need +1 of room, then another +1 for
157
+ // a temp place to put this node
158
+ // TODO: when we implement updating the SemgrexMatcher,
159
+ // this won't be necessary
160
+ tempIndex = SemanticGraphUtils .maxIndex (sg ) + 2 ;
161
+ } else {
162
+ tempIndex = SemanticGraphUtils .maxIndex (sg ) + 1 ;
106
163
}
107
164
newNode .setDocID (govNode .docID ());
108
- newNode .setIndex (newIndex );
165
+ newNode .setIndex (tempIndex );
109
166
newNode .setSentIndex (govNode .sentIndex ());
167
+
110
168
sg .addVertex (newNode );
111
169
sg .addEdge (govNode , newNode , relation , weight , false );
170
+
171
+ if (position != null && !position .equals ("+" )) {
172
+ final int newIndex ;
173
+ if (position .equals ("-" )) {
174
+ newIndex = SemanticGraphUtils .minIndex (sg );
175
+ } else {
176
+ throw new UnsupportedOperationException ("Unknown position in AddDep: |" + position + "|" );
177
+ }
178
+ // the payoff for tempIndex == maxIndex + 2:
179
+ // everything will be moved one higher, unless it's the new node
180
+ moveNodes (sg , x -> (x >= newIndex && x != tempIndex ), x -> x +1 );
181
+ moveNode (sg , newNode , newIndex );
182
+ }
183
+
112
184
return true ;
113
185
}
114
186
0 commit comments