|
10 | 10 |
|
11 | 11 | import org.junit.Test;
|
12 | 12 |
|
| 13 | +import edu.stanford.nlp.international.Language; |
13 | 14 | import edu.stanford.nlp.ling.AnnotationLookup;
|
14 | 15 | import edu.stanford.nlp.ling.CoreAnnotations;
|
15 | 16 | import edu.stanford.nlp.ling.IndexedWord;
|
@@ -646,7 +647,6 @@ public void readXMLAddDep() {
|
646 | 647 | assertEquals("blue", blueVertex.value());
|
647 | 648 | }
|
648 | 649 |
|
649 |
| - |
650 | 650 | /**
|
651 | 651 | * Check that adding a word to the start of a sentence works as expected
|
652 | 652 | */
|
@@ -747,6 +747,90 @@ public void readXMLAddDepRelativePosition() {
|
747 | 747 | assertEquals("blue", blueVertex.value());
|
748 | 748 | }
|
749 | 749 |
|
| 750 | + /** |
| 751 | + * Set the language when adding a dep. Should create a UniversalEnglish dependency |
| 752 | + */ |
| 753 | + @Test |
| 754 | + public void readXMLAddUniversalDep() { |
| 755 | + Ssurgeon inst = Ssurgeon.inst(); |
| 756 | + |
| 757 | + String add = String.join(newline, |
| 758 | + "<ssurgeon-pattern-list>", |
| 759 | + " <ssurgeon-pattern>", |
| 760 | + " <uid>38</uid>", |
| 761 | + " <notes>Add a word before antennae using the position using a UniversalEnglish dependency</notes>", |
| 762 | + " <language>UniversalEnglish</language>", |
| 763 | + // have to bomb-proof the pattern |
| 764 | + " <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:blue}") + "</semgrex>", |
| 765 | + " <edit-list>addDep -gov antennae -reln amod -word blue -position -antennae</edit-list>", |
| 766 | + " </ssurgeon-pattern>", |
| 767 | + "</ssurgeon-pattern-list>"); |
| 768 | + List<SsurgeonPattern> patterns = inst.readFromString(add); |
| 769 | + assertEquals(patterns.size(), 1); |
| 770 | + SsurgeonPattern addSsurgeon = patterns.get(0); |
| 771 | + |
| 772 | + SemanticGraph sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]", Language.UniversalEnglish); |
| 773 | + IndexedWord blueVertex = sg.getNodeByIndexSafe(4); |
| 774 | + assertNull(blueVertex); |
| 775 | + SemanticGraph newSG = addSsurgeon.iterate(sg); |
| 776 | + SemanticGraph expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 amod> blue-3]]", Language.UniversalEnglish); |
| 777 | + for (SemanticGraphEdge edge : expected.edgeIterable()) { |
| 778 | + assertEquals(Language.UniversalEnglish, edge.getRelation().getLanguage()); |
| 779 | + } |
| 780 | + for (SemanticGraphEdge edge : newSG.edgeIterable()) { |
| 781 | + assertEquals(Language.UniversalEnglish, edge.getRelation().getLanguage()); |
| 782 | + } |
| 783 | + assertEquals(expected, newSG); |
| 784 | + // the Ssurgeon we just created should not put a tag on the word |
| 785 | + // but it SHOULD put blue immediately before antennae |
| 786 | + blueVertex = newSG.getNodeByIndexSafe(3); |
| 787 | + assertNotNull(blueVertex); |
| 788 | + assertNull(blueVertex.tag()); |
| 789 | + assertEquals("blue", blueVertex.value()); |
| 790 | + |
| 791 | + |
| 792 | + // If we repeat the same test with English (SD, not UD) it should fail horribly |
| 793 | + // this is because SemanticGraph.valueOf will use UniversalEnglish dependencies by default |
| 794 | + add = String.join(newline, |
| 795 | + "<ssurgeon-pattern-list>", |
| 796 | + " <ssurgeon-pattern>", |
| 797 | + " <uid>38</uid>", |
| 798 | + " <notes>Add a word before antennae using the position using an English dependency</notes>", |
| 799 | + " <language>English</language>", |
| 800 | + // have to bomb-proof the pattern |
| 801 | + " <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:blue}") + "</semgrex>", |
| 802 | + " <edit-list>addDep -gov antennae -reln amod -word blue -position -antennae</edit-list>", |
| 803 | + " </ssurgeon-pattern>", |
| 804 | + "</ssurgeon-pattern-list>"); |
| 805 | + patterns = inst.readFromString(add); |
| 806 | + assertEquals(patterns.size(), 1); |
| 807 | + addSsurgeon = patterns.get(0); |
| 808 | + |
| 809 | + sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]", Language.UniversalEnglish); |
| 810 | + blueVertex = sg.getNodeByIndexSafe(4); |
| 811 | + assertNull(blueVertex); |
| 812 | + newSG = addSsurgeon.iterate(sg); |
| 813 | + expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 amod> blue-3]]", Language.UniversalEnglish); |
| 814 | + for (SemanticGraphEdge edge : expected.edgeIterable()) { |
| 815 | + assertEquals(Language.UniversalEnglish, edge.getRelation().getLanguage()); |
| 816 | + } |
| 817 | + // they look the same, but they're really not |
| 818 | + assertEquals(expected.toString(), newSG.toString()); |
| 819 | + assertNotEquals(expected, newSG); |
| 820 | + |
| 821 | + // In this third version, now valueOf is creating an English graph, |
| 822 | + // not a UniversalEnglish graph, so it should work again |
| 823 | + sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]", Language.English); |
| 824 | + blueVertex = sg.getNodeByIndexSafe(4); |
| 825 | + assertNull(blueVertex); |
| 826 | + newSG = addSsurgeon.iterate(sg); |
| 827 | + expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 amod> blue-3]]", Language.English); |
| 828 | + for (SemanticGraphEdge edge : expected.edgeIterable()) { |
| 829 | + assertEquals(Language.English, edge.getRelation().getLanguage()); |
| 830 | + } |
| 831 | + assertEquals(expected, newSG); |
| 832 | + } |
| 833 | + |
750 | 834 | /**
|
751 | 835 | * There should be an exception for an annotation key that does not exist
|
752 | 836 | */
|
@@ -1014,7 +1098,7 @@ public void simpleTest() {
|
1014 | 1098 | attributes.put("lemma", "is");
|
1015 | 1099 | attributes.put("current", "is");
|
1016 | 1100 | attributes.put("pos", "VBN");
|
1017 |
| - SsurgeonEdit addCopula = new AddDep("a2", EnglishGrammaticalRelations.COPULA, attributes, null); |
| 1101 | + SsurgeonEdit addCopula = new AddDep("a2", EnglishGrammaticalRelations.COPULA, attributes, null, 0.0); |
1018 | 1102 | pattern.addEdit(addCopula);
|
1019 | 1103 |
|
1020 | 1104 | // Destroy subgraph
|
|
0 commit comments