Skip to content

Commit db74e51

Browse files
committed
Add a Language field to the Ssurgeon. This will allow the tool to make edges with the proper dependency scheme. Also, use the version of GrammaticalRelation.valueOf which creates an edge if it doesn't currently exist. This will allow for unknown dependency schemes to work for the most part (although perhaps some bug will crop up anyway)
1 parent ec8576b commit db74e51

File tree

5 files changed

+123
-25
lines changed

5 files changed

+123
-25
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/AddDep.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,6 @@ public class AddDep extends SsurgeonEdit {
3333
final String position;
3434
final double weight;
3535

36-
/**
37-
* Creates an EnglishGrammaticalRelation AddDep edit.
38-
* @param newNode String representation of new dependent IndexedFeatureNode map.
39-
*/
40-
public static AddDep createEngAddDep(String govNodeName, String engRelation, Map<String, String> attributes, String position) {
41-
GrammaticalRelation relation = EnglishGrammaticalRelations.valueOf(engRelation);
42-
return new AddDep(govNodeName, relation, attributes, position);
43-
}
44-
4536
public AddDep(String govNodeName, GrammaticalRelation relation, Map<String, String> attributes, String position) {
4637
this(govNodeName, relation, attributes, position, 0.0);
4738
}

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/RelabelNamedEdge.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@ public RelabelNamedEdge(String edgeName, GrammaticalRelation relation) {
3939
this.relation = relation;
4040
}
4141

42-
public static RelabelNamedEdge createEngRelabel(String edgeName, String relation) {
43-
GrammaticalRelation reln = EnglishGrammaticalRelations.valueOf(relation);
44-
return new RelabelNamedEdge(edgeName, reln);
45-
}
46-
4742
@Override
4843
public String toEditString() {
4944
StringWriter buf = new StringWriter();

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.w3c.dom.Node;
2525
import org.w3c.dom.NodeList;
2626

27+
import edu.stanford.nlp.international.Language;
2728
import edu.stanford.nlp.ling.AnnotationLookup;
2829
import edu.stanford.nlp.ling.CoreAnnotation;
2930
import edu.stanford.nlp.semgraph.SemanticGraph;
@@ -359,7 +360,7 @@ private static SsurgeonArgs parseArgsBox(String args) {
359360
/**
360361
* Given a string entry, converts it into a SsurgeonEdit object.
361362
*/
362-
public static SsurgeonEdit parseEditLine(String editLine) {
363+
public static SsurgeonEdit parseEditLine(String editLine, Language language) {
363364
try {
364365
// Extract the operation name first
365366
final String[] tuples1 = editLine.split("\\s+", 2);
@@ -380,20 +381,29 @@ public static SsurgeonEdit parseEditLine(String editLine) {
380381
final SsurgeonArgs argsBox = parseArgsBox(tuples1.length == 1 ? "" : tuples1[1]);
381382

382383
if (command.equalsIgnoreCase(AddDep.LABEL)) {
383-
return AddDep.createEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.annotations, argsBox.position);
384+
if (argsBox.reln == null) {
385+
throw new SsurgeonParseException("Relation not specified for AddDep");
386+
}
387+
GrammaticalRelation reln = GrammaticalRelation.valueOf(language, argsBox.reln);
388+
return new AddDep(argsBox.govNodeName, reln, argsBox.annotations, argsBox.position);
384389
} else if (command.equalsIgnoreCase(AddNode.LABEL)) {
385390
return AddNode.createAddNode(argsBox.nodeString, argsBox.name);
386391
} else if (command.equalsIgnoreCase(AddEdge.LABEL)) {
387-
return AddEdge.createEngAddEdge(argsBox.govNodeName, argsBox.dep, argsBox.reln, argsBox.weight);
392+
if (argsBox.reln == null) {
393+
throw new SsurgeonParseException("Relation not specified for AddEdge");
394+
}
395+
GrammaticalRelation reln = GrammaticalRelation.valueOf(language, argsBox.reln);
396+
return new AddEdge(argsBox.govNodeName, argsBox.dep, reln, argsBox.weight);
388397
} else if (command.equalsIgnoreCase(DeleteGraphFromNode.LABEL)) {
389398
return new DeleteGraphFromNode(argsBox.node);
390399
} else if (command.equalsIgnoreCase(EditNode.LABEL)) {
391400
return new EditNode(argsBox.node, argsBox.annotations);
392401
} else if (command.equalsIgnoreCase(RelabelNamedEdge.LABEL)) {
393-
// TODO: pass around a Language (perhaps via ssurgeon argument)
394-
// rather than hardcoding English, which is probably not even true
395-
// compared to UniversalEnglish these days
396-
return RelabelNamedEdge.createEngRelabel(argsBox.edge, argsBox.reln);
402+
if (argsBox.reln == null) {
403+
throw new SsurgeonParseException("Relation not specified for AddEdge");
404+
}
405+
GrammaticalRelation reln = GrammaticalRelation.valueOf(language, argsBox.reln);
406+
return new RelabelNamedEdge(argsBox.edge, reln);
397407
} else if (command.equalsIgnoreCase(RemoveEdge.LABEL)) {
398408
GrammaticalRelation reln = null;
399409
if (argsBox.reln != null) {
@@ -592,13 +602,19 @@ public static SsurgeonPattern ssurgeonPatternFromXML(Element elt) {
592602
SemgrexPattern semgrexPattern = SemgrexPattern.compile(semgrexString);
593603
SsurgeonPattern retPattern = new SsurgeonPattern(uid, semgrexPattern);
594604
retPattern.setNotes(notes);
605+
606+
String language = getTagText(elt, SsurgeonPattern.LANGUAGE_TAG);
607+
if (!language.equals("")) {
608+
retPattern.setLanguage(language);
609+
}
610+
595611
NodeList editNodes = elt.getElementsByTagName(SsurgeonPattern.EDIT_LIST_ELEM_TAG);
596612
for (int i=0; i<editNodes.getLength(); i++) {
597613
Node node = editNodes.item(i);
598614
if (node.getNodeType() == Node.ELEMENT_NODE) {
599615
Element editElt = (Element) node;
600616
String editVal = getEltText(editElt);
601-
retPattern.addEdit(Ssurgeon.parseEditLine(editVal));
617+
retPattern.addEdit(Ssurgeon.parseEditLine(editVal, retPattern.getLanguage()));
602618
}
603619
}
604620

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonPattern.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
import java.io.*;
44
import java.util.*;
55

6+
import edu.stanford.nlp.international.Language;
7+
import edu.stanford.nlp.ling.IndexedWord;
68
import edu.stanford.nlp.semgraph.SemanticGraph;
79
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
8-
import edu.stanford.nlp.ling.IndexedWord;
910
import edu.stanford.nlp.semgraph.semgrex.ssurgeon.pred.SsurgPred;
1011
import edu.stanford.nlp.semgraph.semgrex.*;
1112
import edu.stanford.nlp.util.Generics;
@@ -26,6 +27,7 @@
2627
public class SsurgeonPattern {
2728
protected String UID;
2829
protected String notes = "";
30+
protected Language language = Language.English;
2931
protected List<SsurgeonEdit> editScript;
3032
protected SemgrexPattern semgrexPattern;
3133
protected SemanticGraph semgrexGraph = null; // Source graph semgrex pattern was derived from (used for pattern learning)
@@ -236,6 +238,7 @@ public SemgrexPattern getSemgrexPattern() {
236238
* ------ */
237239
public static final String ELT_LIST_TAG = "ssurgeon-pattern-list";
238240
public static final String UID_ELEM_TAG = "uid";
241+
public static final String LANGUAGE_TAG = "language";
239242
public static final String RESOURCE_TAG = "resource";
240243
public static final String SSURGEON_ELEM_TAG = "ssurgeon-pattern";
241244
public static final String SEMGREX_ELEM_TAG = "semgrex";
@@ -274,6 +277,15 @@ public void setUID(String uid) {
274277
UID = uid;
275278
}
276279

280+
public Language getLanguage() {
281+
return language;
282+
}
283+
284+
public void setLanguage(String language) {
285+
// might be null if the language doesn't exist
286+
this.language = Language.valueOfSafe(language);
287+
}
288+
277289
/**
278290
* Simply reads the given Ssurgeon pattern from file (args[0]), parses it, and prints it out.
279291
* Use this for debugging the class and patterns.

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.junit.Test;
1212

13+
import edu.stanford.nlp.international.Language;
1314
import edu.stanford.nlp.ling.AnnotationLookup;
1415
import edu.stanford.nlp.ling.CoreAnnotations;
1516
import edu.stanford.nlp.ling.IndexedWord;
@@ -646,7 +647,6 @@ public void readXMLAddDep() {
646647
assertEquals("blue", blueVertex.value());
647648
}
648649

649-
650650
/**
651651
* Check that adding a word to the start of a sentence works as expected
652652
*/
@@ -747,6 +747,90 @@ public void readXMLAddDepRelativePosition() {
747747
assertEquals("blue", blueVertex.value());
748748
}
749749

750+
/**
751+
* Set the language when adding a dep. Should create a UniversalEnglish dependency
752+
*/
753+
@Test
754+
public void readXMLAddUniversalDep() {
755+
Ssurgeon inst = Ssurgeon.inst();
756+
757+
String add = String.join(newline,
758+
"<ssurgeon-pattern-list>",
759+
" <ssurgeon-pattern>",
760+
" <uid>38</uid>",
761+
" <notes>Add a word before antennae using the position using a UniversalEnglish dependency</notes>",
762+
" <language>UniversalEnglish</language>",
763+
// have to bomb-proof the pattern
764+
" <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:blue}") + "</semgrex>",
765+
" <edit-list>addDep -gov antennae -reln amod -word blue -position -antennae</edit-list>",
766+
" </ssurgeon-pattern>",
767+
"</ssurgeon-pattern-list>");
768+
List<SsurgeonPattern> patterns = inst.readFromString(add);
769+
assertEquals(patterns.size(), 1);
770+
SsurgeonPattern addSsurgeon = patterns.get(0);
771+
772+
SemanticGraph sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]", Language.UniversalEnglish);
773+
IndexedWord blueVertex = sg.getNodeByIndexSafe(4);
774+
assertNull(blueVertex);
775+
SemanticGraph newSG = addSsurgeon.iterate(sg);
776+
SemanticGraph expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 amod> blue-3]]", Language.UniversalEnglish);
777+
for (SemanticGraphEdge edge : expected.edgeIterable()) {
778+
assertEquals(Language.UniversalEnglish, edge.getRelation().getLanguage());
779+
}
780+
for (SemanticGraphEdge edge : newSG.edgeIterable()) {
781+
assertEquals(Language.UniversalEnglish, edge.getRelation().getLanguage());
782+
}
783+
assertEquals(expected, newSG);
784+
// the Ssurgeon we just created should not put a tag on the word
785+
// but it SHOULD put blue immediately before antennae
786+
blueVertex = newSG.getNodeByIndexSafe(3);
787+
assertNotNull(blueVertex);
788+
assertNull(blueVertex.tag());
789+
assertEquals("blue", blueVertex.value());
790+
791+
792+
// If we repeat the same test with English (SD, not UD) it should fail horribly
793+
// this is because SemanticGraph.valueOf will use UniversalEnglish dependencies by default
794+
add = String.join(newline,
795+
"<ssurgeon-pattern-list>",
796+
" <ssurgeon-pattern>",
797+
" <uid>38</uid>",
798+
" <notes>Add a word before antennae using the position using an English dependency</notes>",
799+
" <language>English</language>",
800+
// have to bomb-proof the pattern
801+
" <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:blue}") + "</semgrex>",
802+
" <edit-list>addDep -gov antennae -reln amod -word blue -position -antennae</edit-list>",
803+
" </ssurgeon-pattern>",
804+
"</ssurgeon-pattern-list>");
805+
patterns = inst.readFromString(add);
806+
assertEquals(patterns.size(), 1);
807+
addSsurgeon = patterns.get(0);
808+
809+
sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]", Language.UniversalEnglish);
810+
blueVertex = sg.getNodeByIndexSafe(4);
811+
assertNull(blueVertex);
812+
newSG = addSsurgeon.iterate(sg);
813+
expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 amod> blue-3]]", Language.UniversalEnglish);
814+
for (SemanticGraphEdge edge : expected.edgeIterable()) {
815+
assertEquals(Language.UniversalEnglish, edge.getRelation().getLanguage());
816+
}
817+
// they look the same, but they're really not
818+
assertEquals(expected.toString(), newSG.toString());
819+
assertNotEquals(expected, newSG);
820+
821+
// In this third version, now valueOf is creating an English graph,
822+
// not a UniversalEnglish graph, so it should work again
823+
sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]", Language.English);
824+
blueVertex = sg.getNodeByIndexSafe(4);
825+
assertNull(blueVertex);
826+
newSG = addSsurgeon.iterate(sg);
827+
expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 amod> blue-3]]", Language.English);
828+
for (SemanticGraphEdge edge : expected.edgeIterable()) {
829+
assertEquals(Language.English, edge.getRelation().getLanguage());
830+
}
831+
assertEquals(expected, newSG);
832+
}
833+
750834
/**
751835
* There should be an exception for an annotation key that does not exist
752836
*/
@@ -1014,7 +1098,7 @@ public void simpleTest() {
10141098
attributes.put("lemma", "is");
10151099
attributes.put("current", "is");
10161100
attributes.put("pos", "VBN");
1017-
SsurgeonEdit addCopula = new AddDep("a2", EnglishGrammaticalRelations.COPULA, attributes, null);
1101+
SsurgeonEdit addCopula = new AddDep("a2", EnglishGrammaticalRelations.COPULA, attributes, null, 0.0);
10181102
pattern.addEdit(addCopula);
10191103

10201104
// Destroy subgraph

0 commit comments

Comments
 (0)