Skip to content

Commit e870f2a

Browse files
committed
Add an 'iterate' method to Ssurgeon which repeats an operation until it no longer does anything new
Can now use valueOf() and assertEquals to make sure the graph is as expected in the unittest - much simpler than taking apart the graph Leave a comment on something we need to figure out regarding the iteration
1 parent 6392194 commit e870f2a

File tree

2 files changed

+75
-5
lines changed

2 files changed

+75
-5
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonPattern.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,47 @@ public Collection<SemanticGraph> execute(SemanticGraph sg) {
154154
return generated;
155155
}
156156

157+
/**
158+
* This alternative processing style repeatedly matches the graph
159+
* and executes patterns until all of the matches are exhausted and
160+
* there are no more edits performed.
161+
*<br>
162+
* Note that this means "bomb" patterns will go infinite. In
163+
* particular, adding a node without a check that the node already
164+
* exists is a problem. Most other patterns are self-limiting, in
165+
* that the change will not be repeated more than once.
166+
*<br>
167+
* The graph is always copied, although operations which change the
168+
* text or otherwise edit a word node will affect the original graph.
169+
*<br>
170+
* It's not clear what to do with a multiple edit pattern.
171+
* Currently we just operate until one edit occurs, then break.
172+
* This makes it harder to do consecutive operations using the
173+
* same match, but there are a couple issues to easily doing that:
174+
* <ul>
175+
* <li> what do we do when an edit doesn't fire? keep going or break?
176+
* <li> what node names do the later edits get? rearranging nodes
177+
* may change the indices, affecting the match
178+
* </ul>
179+
*/
180+
public SemanticGraph iterate(SemanticGraph sg) {
181+
SemanticGraph copied = new SemanticGraph(sg);
182+
183+
SemgrexMatcher matcher = semgrexPattern.matcher(copied);
184+
while (matcher.find()) {
185+
// We reset the named node map with each edit set, since these edits
186+
// should exist in a separate graph for each unique Semgrex match.
187+
nodeMap = Generics.newHashMap();
188+
for (SsurgeonEdit edit : editScript) {
189+
if (edit.evaluate(copied, matcher)) {
190+
matcher = semgrexPattern.matcher(copied);
191+
break;
192+
}
193+
}
194+
}
195+
return copied;
196+
}
197+
157198
/**
158199
* Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
159200
* pattern.

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
import edu.stanford.nlp.ling.CoreAnnotations;
1111
import edu.stanford.nlp.ling.IndexedWord;
1212
import edu.stanford.nlp.semgraph.SemanticGraph;
13+
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
1314
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
1415
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
16+
import edu.stanford.nlp.util.XMLUtils;
1517

1618
public class SsurgeonTest {
1719

@@ -31,15 +33,16 @@ public void readXMLEmptyPattern() {
3133
assertEquals(pattern.size(), 1);
3234
}
3335

36+
static final String newline = System.getProperty("line.separator");
37+
3438
@Test
35-
public void readXMLEdit() {
36-
String newline = System.getProperty("line.separator");
39+
public void readXMLAddEdgeExecute() {
3740
String doc = String.join(newline,
3841
"<ssurgeon-pattern-list>",
3942
" <ssurgeon-pattern>",
4043
" <uid>38</uid>",
4144
" <notes>This is a simple test of addEdge</notes>",
42-
" <semgrex>{}=a1 &gt; {}=a2</semgrex>",
45+
" <semgrex>" + XMLUtils.escapeXML("{}=a1 > {}=a2") + "</semgrex>",
4346
" <edit-list>addEdge -gov a1 -dep a2 -reln dep -weight 0.5</edit-list>",
4447
" </ssurgeon-pattern>",
4548
"</ssurgeon-pattern-list>");
@@ -50,11 +53,37 @@ public void readXMLEdit() {
5053

5154
SemanticGraph sg = SemanticGraph.valueOf("[A obj> B obj> C]");
5255
Collection<SemanticGraph> newSgs = pattern.execute(sg);
53-
// TODO: perhaps it would be better to have an execution scheme
54-
// where one graph has all possible modifications applied
5556
assertEquals(newSgs.size(), 2);
5657
}
5758

59+
60+
/**
61+
* Test that AddEdge, when iterated, adds exactly one more edge
62+
* between each parent/child pair if they matched the target relation
63+
*/
64+
@Test
65+
public void readXMLAddEdgeIterate() {
66+
String doc = String.join(newline,
67+
"<ssurgeon-pattern-list>",
68+
" <ssurgeon-pattern>",
69+
" <uid>38</uid>",
70+
" <notes>This is a simple test of addEdge</notes>",
71+
" <semgrex>" + XMLUtils.escapeXML("{}=a1 >obj {}=a2") + "</semgrex>",
72+
" <edit-list>addEdge -gov a1 -dep a2 -reln dep -weight 0.5</edit-list>",
73+
" </ssurgeon-pattern>",
74+
"</ssurgeon-pattern-list>");
75+
Ssurgeon inst = Ssurgeon.inst();
76+
List<SsurgeonPattern> patterns = inst.readFromString(doc);
77+
assertEquals(patterns.size(), 1);
78+
SsurgeonPattern pattern = patterns.get(0);
79+
80+
SemanticGraph sg = SemanticGraph.valueOf("[A obj> B obj> C nsubj> [D obj> E]]");
81+
SemanticGraph newSg = pattern.iterate(sg);
82+
SemanticGraph expected = SemanticGraph.valueOf("[A-0 obj> B-1 dep> B-1 obj> C-2 dep> C-2 nsubj> [D-3 obj> E-4 dep> E-4]]");
83+
84+
assertEquals(newSg, expected);
85+
}
86+
5887
/**
5988
* Simple test of an Ssurgeon edit script. This instances a simple semantic graph,
6089
* a semgrex pattern, and then the resulting actions over the named nodes in the

0 commit comments

Comments
 (0)