Skip to content

Commit 00c8d93

Browse files
committed
Add an ith leaf relation to tregex
1 parent f2b8bf5 commit 00c8d93

File tree

3 files changed

+101
-1
lines changed

3 files changed

+101
-1
lines changed

src/edu/stanford/nlp/trees/tregex/Relation.java

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ static Relation getRelation(String s,
131131
/**
132132
* Static factory method for relations requiring an argument, including
133133
* HAS_ITH_CHILD, ITH_CHILD_OF, UNBROKEN_CATEGORY_DOMINATES,
134-
* UNBROKEN_CATEGORY_DOMINATED_BY.
134+
* UNBROKEN_CATEGORY_DOMINATED_BY, ANCESTOR_OF_ITH_LEAF.
135135
*
136136
* @param s The String representation of the relation
137137
* @param arg The argument to the relation, as a string; could be a node
@@ -168,6 +168,9 @@ static Relation getRelation(String s, String arg,
168168
case ",+":
169169
r = new UnbrokenCategoryFollows(arg, basicCatFunction);
170170
break;
171+
case "<<<":
172+
r = new AncestorOfIthLeaf(Integer.parseInt(arg));
173+
break;
171174
default:
172175
throw new ParseException("Unrecognized compound relation " + s + ' '
173176
+ arg);
@@ -362,6 +365,89 @@ void advance() {
362365
}
363366
};
364367

368+
/**
369+
* Looks for the ith leaf of the current node
370+
*/
371+
private static class AncestorOfIthLeaf extends Relation {
372+
373+
private static final long serialVersionUID = -6495191354526L;
374+
375+
private final int leafNum;
376+
377+
AncestorOfIthLeaf(int i) {
378+
super("<<<" + String.valueOf(i));
379+
if (i == 0) {
380+
throw new IllegalArgumentException("Error -- no such thing as zeroth leaf!");
381+
}
382+
leafNum = i;
383+
}
384+
385+
@Override
386+
boolean satisfies(Tree t1, Tree t2, Tree root, final TregexMatcher matcher) {
387+
if (t1 == t2)
388+
return false;
389+
if (!t2.isLeaf())
390+
return false;
391+
// this is kind of lazy
392+
// if it somehow became a performance limitation,
393+
// a recursive search would be faster
394+
List<Tree> leaves = t1.getLeaves();
395+
if (leaves.size() < Math.abs(leafNum))
396+
return false;
397+
398+
final int index;
399+
if (leafNum > 0) {
400+
index = leafNum - 1;
401+
} else {
402+
// eg, leafNum == -1 means we check leaves.size() - 1
403+
index = leaves.size() + leafNum;
404+
}
405+
return leaves.get(index) == t2;
406+
}
407+
408+
@Override
409+
Iterator<Tree> searchNodeIterator(final Tree t,
410+
final TregexMatcher matcher) {
411+
return new SearchNodeIterator() {
412+
@Override
413+
void initialize() {
414+
List<Tree> leaves = t.getLeaves();
415+
if (leaves.size() >= Math.abs(leafNum)) {
416+
final int index;
417+
if (leafNum > 0) {
418+
index = leafNum - 1;
419+
} else {
420+
index = leafNum + leaves.size();
421+
}
422+
next = leaves.get(index);
423+
}
424+
}
425+
};
426+
}
427+
428+
@Override
429+
public boolean equals(Object o) {
430+
if (this == o) {
431+
return true;
432+
}
433+
if (!(o instanceof AncestorOfIthLeaf)) {
434+
return false;
435+
}
436+
437+
final AncestorOfIthLeaf other = (AncestorOfIthLeaf) o;
438+
if (leafNum != other.leafNum) {
439+
return false;
440+
}
441+
442+
return true;
443+
}
444+
445+
@Override
446+
public int hashCode() {
447+
return leafNum + 20;
448+
}
449+
};
450+
365451
private static final Relation DOMINATES = new Relation("<<") {
366452

367453
private static final long serialVersionUID = -2580199434621268260L;

src/edu/stanford/nlp/trees/tregex/TregexPattern.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@
114114
* <tr><td>A &gt;i B <td>A is the ith child of B (i &gt; 0)
115115
* <tr><td>A &lt;-i B <td>B is the ith-to-last child of A (i &gt; 0)
116116
* <tr><td>A &gt;-i B <td>A is the ith-to-last child of B (i &gt; 0)
117+
* <tr><td>A &lt;&lt;&lt;i B <td>B is the ith leaf of A
118+
* <tr><td>A &lt;&lt;&lt;-i B <td>B is the ith-to-last leaf of A
117119
* <tr><td>A &lt;: B <td>B is the only child of A
118120
* <tr><td>A &gt;: B <td>A is the only child of B
119121
* <tr><td>A &lt;&lt;: B <td>A dominates B via an unbroken chain (length &gt; 0) of unary local trees.

test/src/edu/stanford/nlp/trees/tregex/TregexTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,18 @@ public void testAncestorOfLeaf() {
10811081
runTest("A <<< (b . c=foo) <<< =foo", "(ROOT (A (B b) (C c)))", "(A (B b) (C c))");
10821082
}
10831083

1084+
public void testAncestorOfIthLeaf() {
1085+
runTest("A <<<1 b", "(ROOT (A (B b)))", "(A (B b))");
1086+
runTest("A <<<2 b", "(ROOT (A (B b)))");
1087+
runTest("A <<<-1 b", "(ROOT (A (B b)))", "(A (B b))");
1088+
runTest("A <<<1 b", "(ROOT (A (B z) (C b)))");
1089+
runTest("A <<<2 b", "(ROOT (A (B z) (C b)))", "(A (B z) (C b))");
1090+
runTest("A <<<-1 b", "(ROOT (A (B z) (C b)))", "(A (B z) (C b))");
1091+
runTest("A <<<-2 b", "(ROOT (A (B z) (C b)))");
1092+
runTest("A <<<-1 z", "(ROOT (A (B z) (C b)))");
1093+
runTest("A <<<-2 z", "(ROOT (A (B z) (C b)))", "(A (B z) (C b))");
1094+
}
1095+
10841096
public void testHeadOfPhrase() {
10851097
runTest("NP <# NNS", "(NP (NN work) (NNS practices))", "(NP (NN work) (NNS practices))");
10861098
runTest("NP <# NN", "(NP (NN work) (NNS practices))"); // should have no results

0 commit comments

Comments
 (0)