2
2
3
3
import java .io .StringWriter ;
4
4
import java .util .ArrayList ;
5
+ import java .util .Collections ;
6
+ import java .util .HashSet ;
5
7
import java .util .List ;
6
8
import java .util .Map ;
9
+ import java .util .Set ;
7
10
import java .util .TreeMap ;
8
11
9
12
import edu .stanford .nlp .ling .CoreAnnotations ;
26
29
*/
27
30
public class MergeNodes extends SsurgeonEdit {
28
31
public static final String LABEL = "mergeNodes" ;
29
- final List <String > nodes ;
32
+ final List <String > names ;
30
33
final Map <String , String > attributes ;
31
34
32
- public MergeNodes (List <String > nodes , Map <String , String > attributes ) {
33
- if (nodes .size () > 2 ) {
34
- throw new SsurgeonParseException ("Cannot support MergeNodes of size " + nodes .size () + " yet... please file an issue on github if you need this feature" );
35
- }
36
- this .nodes = new ArrayList <>(nodes );
35
+ public MergeNodes (List <String > names , Map <String , String > attributes ) {
36
+ this .names = new ArrayList <>(names );
37
37
this .attributes = new TreeMap <>(attributes );
38
38
}
39
39
@@ -44,7 +44,7 @@ public MergeNodes(List<String> nodes, Map<String, String> attributes) {
44
44
public String toEditString () {
45
45
StringWriter buf = new StringWriter ();
46
46
buf .write (LABEL );
47
- for (String name : nodes ) {
47
+ for (String name : names ) {
48
48
buf .write ("\t " );
49
49
buf .write (Ssurgeon .NODENAME_ARG + " " + name );
50
50
}
@@ -61,90 +61,97 @@ public String toEditString() {
61
61
}
62
62
63
63
/**
64
- * If the two named nodes are next to each other, and the edges of
65
- * the graph allow for it, squish the two words into one word
64
+ * If the named nodes are next to each other, and the edges of
65
+ * the graph allow for it, squish those words into one word
66
66
*/
67
67
@ Override
68
68
public boolean evaluate (SemanticGraph sg , SemgrexMatcher sm ) {
69
- String name1 = nodes .get (0 );
70
- String name2 = nodes .get (1 );
71
-
72
- IndexedWord node1 = sm .getNode (name1 );
73
- IndexedWord node2 = sm .getNode (name2 );
74
-
75
- if (node1 == null || node2 == null ) {
76
- return false ;
77
- }
78
-
79
- List <SemanticGraphEdge > n1_to_n2 = sg .getAllEdges (node1 , node2 );
80
- List <SemanticGraphEdge > n2_to_n1 = sg .getAllEdges (node2 , node1 );
81
- if (n1_to_n2 .size () == 0 && n2_to_n1 .size () == 0 ) {
82
- return false ;
83
- }
84
-
85
- // TODO: what about the case where the dep is or has copies?
86
- final IndexedWord head ;
87
- final IndexedWord dep ;
88
-
89
- if (n1_to_n2 .size () > 0 ) {
90
- head = node1 ;
91
- dep = node2 ;
92
- } else {
93
- head = node2 ;
94
- dep = node1 ;
95
- }
96
-
97
- // If the dep has any edges that aren't between dep & head, abort
98
- // TODO: we could probably make it adjust edges with "dep" as source, instead
99
- for (SemanticGraphEdge e : sg .outgoingEdgeIterable (dep )) {
100
- if (e .getTarget () != head ) {
69
+ List <IndexedWord > nodes = new ArrayList <>();
70
+ for (String name : names ) {
71
+ IndexedWord node = sm .getNode (name );
72
+ if (node == null ) {
101
73
return false ;
102
74
}
103
- }
104
- for (SemanticGraphEdge e : sg .incomingEdgeIterable (dep )) {
105
- if (e .getSource () != head ) {
106
- return false ;
75
+ nodes .add (node );
76
+ }
77
+ Collections .sort (nodes );
78
+
79
+ IndexedWord head = null ;
80
+ for (IndexedWord candidate : nodes ) {
81
+ if (sg .hasChildren (candidate )) {
82
+ // if multiple nodes have children inside the graph,
83
+ // perhaps we could merge them all,
84
+ // but the easiest thing to do is just abort
85
+ // TODO: an alternate approach would be to look for nodes with a head
86
+ // outside the nodes in the phrase to merge
87
+ if (head != null ) {
88
+ return false ;
89
+ }
90
+ head = candidate ;
107
91
}
108
92
}
109
93
110
- IndexedWord left ;
111
- IndexedWord right ;
112
- if (node1 .index () < node2 .index ()) {
113
- left = node1 ;
114
- right = node2 ;
115
- } else {
116
- left = node2 ;
117
- right = node1 ;
94
+ Set <Integer > depIndices = new HashSet <Integer >();
95
+ for (IndexedWord other : nodes ) {
96
+ if (other == head ) {
97
+ continue ;
98
+ }
99
+ Set <IndexedWord > parents = sg .getParents (other );
100
+ // this shouldn't happen
101
+ if (parents .size () == 0 ) {
102
+ return false ;
103
+ }
104
+ // iterate instead of just do the first in case
105
+ // this one day is doing an graph with extra dependencies
106
+ for (IndexedWord parent : parents ) {
107
+ if (parent != head ) {
108
+ return false ;
109
+ }
110
+ }
111
+ depIndices .add (other .index ());
118
112
}
119
113
120
114
CoreLabel newLabel = AddDep .fromCheapStrings (attributes );
121
115
// CoreLabel.setWord wipes out the lemma for some reason
122
116
// we may eventually change that, but for now, we compensate for that here
123
117
String lemma = newLabel .lemma ();
118
+
124
119
if (newLabel .word () == null ) {
125
- String newWord = left .word () + right .word ();
126
- newLabel .setWord (newWord );
120
+ StringBuilder newWord = new StringBuilder ();
121
+ for (IndexedWord node : nodes ) {
122
+ newWord .append (node .word ());
123
+ }
124
+ newLabel .setWord (newWord .toString ());
127
125
}
128
126
if (newLabel .value () == null ) {
129
127
newLabel .setValue (newLabel .word ());
130
128
}
131
129
132
130
newLabel .setLemma (lemma );
133
131
if (newLabel .lemma () == null ) {
134
- String newLemma = left .lemma () != null && right .lemma () != null ? left .lemma () + right .lemma () : null ;
135
- newLabel .setLemma (newLemma );
132
+ StringBuilder newLemma = new StringBuilder ();
133
+ for (IndexedWord node : nodes ) {
134
+ if (node .lemma () != null ) {
135
+ newLemma .append (node .lemma ());
136
+ }
137
+ }
138
+ lemma = newLemma .length () > 0 ? newLemma .toString () : null ;
139
+ newLabel .setLemma (lemma );
136
140
}
141
+
137
142
// after() and before() return "" if null, so we need to use the CoreAnnotations directly
138
143
if (newLabel .get (CoreAnnotations .AfterAnnotation .class ) == null ) {
139
- newLabel .setAfter (right .after ());
144
+ newLabel .setAfter (nodes . get ( nodes . size () - 1 ) .after ());
140
145
}
141
146
if (newLabel .get (CoreAnnotations .BeforeAnnotation .class ) == null ) {
142
- newLabel .setBefore (right .before ());
147
+ newLabel .setBefore (nodes . get ( 0 ) .before ());
143
148
}
144
149
145
150
// find the head, and replace all the existing annotations on the head
146
151
// with the new annotations (including word and lemma)
147
152
// from the newly built CoreLabel
153
+ // TODO: should avoid messing with empty nodes
154
+ // doing extra nodes would be good
148
155
for (IndexedWord vertex : sg .vertexSet ()) {
149
156
if (vertex .index () == head .index ()) {
150
157
for (Class key : newLabel .keySet ()) {
@@ -159,13 +166,19 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
159
166
// TODO: super fancy would be implementing iterator.remove()
160
167
// on the Set returned by the SemanticGraph
161
168
for (IndexedWord vertex : sg .vertexListSorted ()) {
162
- if (vertex .index () == dep .index ()) {
169
+ // TODO: again, don't delete empty nodes
170
+ if (depIndices .contains (vertex .index ())) {
163
171
sg .removeVertex (vertex );
164
172
}
165
173
}
166
174
167
175
// reindex everyone
168
- SsurgeonUtils .moveNodes (sg , sm , x -> (x >= dep .index ()), x -> x -1 , false );
176
+ List <Integer > sortedIndices = new ArrayList <>(depIndices );
177
+ Collections .sort (sortedIndices );
178
+ Collections .reverse (sortedIndices );
179
+ for (Integer depIndex : sortedIndices ) {
180
+ SsurgeonUtils .moveNodes (sg , sm , x -> (x >= depIndex ), x -> x -1 , false );
181
+ }
169
182
170
183
return true ;
171
184
}
0 commit comments