Skip to content

Commit cf540f9

Browse files
committed
Add a Semgrex feature to test for connection in either direction, either gov or dep. Greatly simplifies expressions which previously needed to look in both directions
1 parent 77f7291 commit cf540f9

File tree

5 files changed

+100
-6
lines changed

5 files changed

+100
-6
lines changed

src/edu/stanford/nlp/semgraph/semgrex/GraphRelation.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,84 @@ void advance() {
10121012
private static final long serialVersionUID = 1L;
10131013
};
10141014

1015+
/**
1016+
* Iterates over nodes which are either gov or dep of the current node.
1017+
*<br>
1018+
* In other words, this can take the place of a disjunction over &lt; and &gt;
1019+
*/
1020+
static private class CONNECTED extends GraphRelation {
1021+
CONNECTED(String reln, String name, String edgeName) {
1022+
super("<>", reln, name);
1023+
}
1024+
1025+
@Override
1026+
boolean satisfies(IndexedWord l1, IndexedWord l2, SemanticGraph sg) {
1027+
if (l1.equals(IndexedWord.NO_WORD) || l2.equals(IndexedWord.NO_WORD) )
1028+
return false;
1029+
List<Pair<GrammaticalRelation, IndexedWord>> govs = sg.parentPairs(l1);
1030+
for (Pair<GrammaticalRelation, IndexedWord> gov : govs) {
1031+
if (this.type.test(gov.first().toString()) &&
1032+
gov.second().equals(l2)) return true;
1033+
}
1034+
List<Pair<GrammaticalRelation, IndexedWord>> deps = sg.childPairs(l1);
1035+
for (Pair<GrammaticalRelation, IndexedWord> dep : deps) {
1036+
if (this.type.test(dep.first().toString()) &&
1037+
dep.second().equals(l2)) {
1038+
return true;
1039+
}
1040+
}
1041+
return false;
1042+
}
1043+
1044+
1045+
@Override
1046+
Iterator<IndexedWord> searchNodeIterator(final IndexedWord node, final SemanticGraph sg) {
1047+
return new SearchNodeIterator() {
1048+
Iterator<SemanticGraphEdge> it1;
1049+
Iterator<SemanticGraphEdge> it2;
1050+
1051+
@Override
1052+
public void advance() {
1053+
if (node.equals(IndexedWord.NO_WORD)) {
1054+
next = null;
1055+
return;
1056+
}
1057+
if (it1 == null) {
1058+
it1 = sg.outgoingEdgeIterator(node);
1059+
}
1060+
while (it1.hasNext()) {
1061+
SemanticGraphEdge edge = it1.next();
1062+
relation = edge.getRelation().toString();
1063+
if (!type.test(relation)) {
1064+
continue;
1065+
}
1066+
this.next = edge.getTarget();
1067+
this.edge = edge;
1068+
return;
1069+
}
1070+
if (it2 == null) {
1071+
it2 = sg.incomingEdgeIterator(node);
1072+
}
1073+
while (it2.hasNext()) {
1074+
SemanticGraphEdge edge = it2.next();
1075+
relation = edge.getRelation().toString();
1076+
if (!type.test(relation)) {
1077+
continue;
1078+
}
1079+
this.next = edge.getSource();
1080+
this.edge = edge;
1081+
return;
1082+
}
1083+
this.next = null;
1084+
this.edge = null;
1085+
}
1086+
};
1087+
}
1088+
1089+
// automatically generated by button mashing
1090+
private static final long serialVersionUID = -413981378678L;
1091+
}
1092+
10151093
static private class EQUALS extends GraphRelation {
10161094
EQUALS(String reln, String name) {
10171095
super("==", reln, name);
@@ -1362,6 +1440,7 @@ public void advance() {
13621440
public static boolean isKnownRelation(String reln) {
13631441
return (reln.equals(">") || reln.equals("<") ||
13641442
reln.equals(">>") || reln.equals("<<") ||
1443+
reln.equals("<>") ||
13651444
reln.equals("@") || reln.equals("==") ||
13661445
reln.equals("$+") || reln.equals("$++") ||
13671446
reln.equals("$-") || reln.equals("$--") ||
@@ -1393,6 +1472,8 @@ public static GraphRelation getRelation(String reln,
13931472
return new DEPENDENT_RIGHT(type, name, edgeName);
13941473
case "<--":
13951474
return new DEPENDENT_LEFT(type, name, edgeName);
1475+
case "<>":
1476+
return new CONNECTED(type, name, edgeName);
13961477
}
13971478
if (edgeName != null) {
13981479
throw new ParseException("Relation " + reln + " does not allow for named edges");

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ SPECIAL_TOKEN:
6262

6363
TOKEN:
6464
{
65-
< RELATION: "<" | ">" | ">>" | "<<" | "==" | "$+" | "$-" | "$++" | "$--" | "." | ".." | "-" | "--" | ">++" | ">--" | "<++" | "<--" >
65+
< RELATION: "<" | ">" | ">>" | "<<" | "<>" | "==" | "$+" | "$-" | "$++" | "$--" | "." | ".." | "-" | "--" | ">++" | ">--" | "<++" | "<--" >
6666
| < ALIGNRELN: "@" >
6767
| < IDENTIFIER: (~[" ", "\n", "\r", "(", "/", "|", "@", "!", "#", "%", "&", ")", "=", "?", "[", "]", ">", "<", "~", ".", ",", "$", ":", ";", "{", "}", "+", "-"])+ >
6868
| < NUMBER: ( ["0"-"9"] )+ >

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserTokenManager.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ else if ((0x100002200L & l) != 0L)
171171
{ jjCheckNAdd(0); }
172172
}
173173
else if (curChar == 60)
174-
{ jjAddStates(7, 9); }
174+
{ jjCheckNAddStates(7, 10); }
175175
else if (curChar == 62)
176-
{ jjAddStates(10, 12); }
176+
{ jjCheckNAddStates(11, 13); }
177177
else if (curChar == 45)
178178
{ jjCheckNAdd(6); }
179179
else if (curChar == 46)
@@ -257,7 +257,7 @@ else if (curChar == 43)
257257
break;
258258
case 15:
259259
if (curChar == 62)
260-
{ jjAddStates(10, 12); }
260+
{ jjCheckNAddStates(11, 13); }
261261
break;
262262
case 16:
263263
if (curChar == 62 && kind > 2)
@@ -274,7 +274,7 @@ else if (curChar == 43)
274274
break;
275275
case 20:
276276
if (curChar == 60)
277-
{ jjAddStates(7, 9); }
277+
{ jjCheckNAddStates(7, 10); }
278278
break;
279279
case 21:
280280
if (curChar == 60 && kind > 2)
@@ -388,7 +388,7 @@ protected Token jjFillToken()
388388
return t;
389389
}
390390
static final int[] jjnextStates = {
391-
17, 6, 25, 26, 12, 13, 14, 21, 22, 23, 16, 18, 19,
391+
17, 6, 25, 26, 12, 13, 14, 21, 16, 22, 23, 16, 18, 19,
392392
};
393393
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
394394
{

src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
* <tr><td>A &gt;&gt;reln B <td>A is the governor of a relation reln in a chain to B following {@code gov->dep} paths
8181
* <tr><td>{@code A x,y<<reln B} <td>A is the dependent of a relation reln in a chain to B following {@code dep->gov} paths between distances of x and y
8282
* <tr><td>{@code A x,y>>reln B} <td>A is the governor of a relation reln in a chain to B following {@code gov->dep} paths between distances of x and y
83+
* <tr><td>A &lt;&gt;reln B <td> A is connected (either dependent or governor) via relation reln with B
8384
* <tr><td>A == B <td>A and B are the same nodes in the same graph
8485
* <tr><td>A . B <td>A immediately precedes B, i.e. A.index() == B.index() - 1
8586
* <tr><td>A - B <td>A immediately succeeds B, i.e. A.index() == B.index() + 1

test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,18 @@ public void testSimpleDependency() {
118118
"ate", "ate", "muffins");
119119
}
120120

121+
public void testConnected() {
122+
// the root should connect to all its children
123+
runTest("{} <> {word:ate}", "[ate subj>Bill obj>[muffins compound>blueberry]]",
124+
"Bill", "muffins");
125+
// a node in the middle should connect to both its children and its parent
126+
runTest("{} <> {word:muffins}", "[ate subj>Bill obj>[muffins compound>blueberry]]",
127+
"ate", "blueberry");
128+
// a leaf should connect to its parent
129+
runTest("{} <> {word:blueberry}", "[ate subj>Bill obj>[muffins compound>blueberry]]",
130+
"muffins");
131+
}
132+
121133
public void testMultipleAttributes() {
122134
runTest("{} >> {word:Bill}",
123135
"[ate subj>Bill/NNP obj>[muffins compound>blueberry]]",

0 commit comments

Comments
 (0)