Skip to content

Commit ef31e6b

Browse files
committed
Add a test of the icepahc operations Stanza will use to prepare the Icelandic treebank
1 parent bb4d17f commit ef31e6b

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,26 @@ public void testRelabel() {
430430
"(barfoo (curlew 0) (avocet 1))");
431431
}
432432

433+
/**
434+
* Test relabeling a tree from icepahc
435+
*
436+
* The goal is to check that removing the lemmas and combining detached words both work as expected
437+
*/
438+
public void testRelabelICE() {
439+
String treeText = "( (IP-MAT (NP-SBJ (PRO-N Það-það)) (BEPI er-vera) (ADVP (ADV eiginlega-eiginlega)) (ADJP (NEG ekki-ekki) (ADJ-N hægt-hægur)) (IP-INF (TO að-að) (VB lýsa-lýsa)) (NP-OB1 (N-D tilfinningu$-tilfinning) (D-D $nni-hinn)) (IP-INF (TO að-að) (VB fá-fá)) (IP-INF (TO að-að) (VB taka-taka)) (NP-OB1 (N-A þátt-þáttur)) (PP (P í-í) (NP (D-D þessu-þessi))) (, ,-,) (VBPI segir-segja) (NP-SBJ (NPR-N Sverrir-sverrir) (NPR-N Ingi-ingi)) (. .-.)))";
440+
441+
String relabeledTreeText = "( (IP-MAT (NP-SBJ (PRO-N Það)) (BEPI er) (ADVP (ADV eiginlega)) (ADJP (NEG ekki) (ADJ-N hægt)) (IP-INF (TO að) (VB lýsa)) (NP-OB1 (N-D tilfinningu$) (D-D $nni)) (IP-INF (TO að) (VB fá)) (IP-INF (TO að) (VB taka)) (NP-OB1 (N-A þátt)) (PP (P í) (NP (D-D þessu))) (, ,) (VBPI segir) (NP-SBJ (NPR-N Sverrir) (NPR-N Ingi)) (. .)))";
442+
443+
TregexPattern tregex = TregexPattern.compile("/^(.+)-.+$/#1%form=word !< __");
444+
TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("relabel word /^(.+)-.+$/%{form}/");
445+
runTest(tregex, tsurgeon, treeText, relabeledTreeText);
446+
447+
tregex = TregexPattern.compile("/^N-/ < /^([^$]+)[$]$/#1%noun=noun $+ (/^D-/ < /^[$]([^$]+)$/#1%det=det)");
448+
tsurgeon = Tsurgeon.parseOperation("relabel noun /^.+$/%{noun}%{det}/");
449+
runTest(tregex, tsurgeon, relabeledTreeText,
450+
"( (IP-MAT (NP-SBJ (PRO-N Það)) (BEPI er) (ADVP (ADV eiginlega)) (ADJP (NEG ekki) (ADJ-N hægt)) (IP-INF (TO að) (VB lýsa)) (NP-OB1 (N-D tilfinningunni) (D-D $nni)) (IP-INF (TO að) (VB fá)) (IP-INF (TO að) (VB taka)) (NP-OB1 (N-A þátt)) (PP (P í) (NP (D-D þessu))) (, ,) (VBPI segir) (NP-SBJ (NPR-N Sverrir) (NPR-N Ingi)) (. .)))");
451+
}
452+
433453
public void testReplaceNode() {
434454
TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("replace foo blah");
435455
TregexPattern tregex = TregexPattern.compile("B=foo : C=blah");

0 commit comments

Comments
 (0)