Skip to content

Commit dc2847c

Browse files
committed
Pass arguments to an EditLine via both the text of the edit-line node and its attributes. Allows for whitespace in the xml
1 parent 0dc6e10 commit dc2847c

File tree

2 files changed

+140
-31
lines changed

2 files changed

+140
-31
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import edu.stanford.nlp.util.StringUtils;
2121
import edu.stanford.nlp.util.logging.RedwoodConfiguration;
22+
import org.w3c.dom.Attr;
2223
import org.w3c.dom.Document;
2324
import org.w3c.dom.Element;
2425
import org.w3c.dom.Node;
@@ -397,7 +398,7 @@ protected static class SsurgeonArgs {
397398
* whitespace, but retain everything inside quotes, so we can pass
398399
* in hashmaps in String form.
399400
*/
400-
private static String[] parseArgs(String argsString) {
401+
private static Map<String, String> parseArgs(String argsString) {
401402
List<String> retList = new ArrayList<>();
402403
String patternString = "(?:[^\\s\\\"]++|\\\"[^\\\"]*+\\\"|(\\\"))++";
403404
Pattern pattern = Pattern.compile(patternString);
@@ -413,59 +414,58 @@ private static String[] parseArgs(String argsString) {
413414
} else
414415
throw new SsurgeonParseException("Unmatched quote in string to parse");
415416
}
416-
return retList.toArray(StringUtils.EMPTY_STRING_ARRAY);
417+
418+
Map<String, String> parsedArgs = new LinkedHashMap<>();
419+
for (int i = 0; i < retList.size() - 1; i += 2) {
420+
parsedArgs.put(retList.get(i), retList.get(i + 1));
421+
}
422+
return parsedArgs;
417423
}
418424

419-
private static SsurgeonArgs parseArgsBox(String args) {
425+
private static SsurgeonArgs parseArgsBox(String args, Map<String, String> additionalArgs) {
420426
SsurgeonArgs argsBox = new SsurgeonArgs();
421-
final String[] argsArray = parseArgs(args);
427+
Map<String, String> argsArray = parseArgs(args);
428+
for (String additional : additionalArgs.keySet()) {
429+
argsArray.put("-" + additional, additionalArgs.get(additional));
430+
}
422431

423-
for (int argIndex = 0; argIndex < argsArray.length; ++argIndex) {
424-
switch (argsArray[argIndex]) {
432+
for (String argsKey : argsArray.keySet()) {
433+
String argsValue = argsArray.get(argsKey);
434+
switch (argsKey) {
425435
case GOV_NODENAME_ARG:
426-
argsBox.govNodeName = argsArray[argIndex + 1];
427-
argIndex += 1;
436+
argsBox.govNodeName = argsValue;
428437
break;
429438
case DEP_NODENAME_ARG:
430-
argsBox.dep = argsArray[argIndex + 1];
431-
argIndex += 1;
439+
argsBox.dep = argsValue;
432440
break;
433441
case EDGE_NAME_ARG:
434-
argsBox.edge = argsArray[argIndex + 1];
435-
argIndex += 1;
442+
argsBox.edge = argsValue;
436443
break;
437444
case RELN_ARG:
438-
argsBox.reln = argsArray[argIndex + 1];
439-
argIndex += 1;
445+
argsBox.reln = argsValue;
440446
break;
441447
case NODENAME_ARG:
442-
argsBox.node = argsArray[argIndex + 1];
443-
argIndex += 1;
448+
argsBox.node = argsValue;
444449
break;
445450
case NODE_PROTO_ARG:
446-
argsBox.nodeString = argsArray[argIndex + 1];
447-
argIndex += 1;
451+
argsBox.nodeString = argsValue;
448452
break;
449453
case WEIGHT_ARG:
450-
argsBox.weight = Double.valueOf(argsArray[argIndex + 1]);
451-
argIndex += 1;
454+
argsBox.weight = Double.valueOf(argsValue);
452455
break;
453456
case NAME_ARG:
454-
argsBox.name = argsArray[argIndex + 1];
455-
argIndex += 1;
457+
argsBox.name = argsValue;
456458
break;
457459
case POSITION_ARG:
458-
argsBox.position = argsArray[argIndex + 1];
459-
argIndex += 1;
460+
argsBox.position = argsValue;
460461
break;
461462
default:
462-
String key = argsArray[argIndex].substring(1);
463+
String key = argsKey.substring(1);
463464
Class<? extends CoreAnnotation<?>> annotation = AnnotationLookup.toCoreKey(key);
464465
if (annotation == null) {
465-
throw new SsurgeonParseException("Parsing Ssurgeon args: unknown flag " + argsArray[argIndex]);
466+
throw new SsurgeonParseException("Parsing Ssurgeon args: unknown flag " + argsKey);
466467
}
467-
argsBox.annotations.put(key, argsArray[argIndex + 1]);
468-
argIndex += 1;
468+
argsBox.annotations.put(key, argsValue);
469469
}
470470
}
471471
return argsBox;
@@ -474,7 +474,7 @@ private static SsurgeonArgs parseArgsBox(String args) {
474474
/**
475475
* Given a string entry, converts it into a SsurgeonEdit object.
476476
*/
477-
public static SsurgeonEdit parseEditLine(String editLine, Language language) {
477+
public static SsurgeonEdit parseEditLine(String editLine, Map<String, String> attributeArgs, Language language) {
478478
try {
479479
// Extract the operation name first
480480
final String[] tuples1 = editLine.split("\\s+", 2);
@@ -492,7 +492,7 @@ public static SsurgeonEdit parseEditLine(String editLine, Language language) {
492492
}
493493

494494
// Parse the arguments based upon the type of command to execute.
495-
final SsurgeonArgs argsBox = parseArgsBox(tuples1.length == 1 ? "" : tuples1[1]);
495+
final SsurgeonArgs argsBox = parseArgsBox(tuples1.length == 1 ? "" : tuples1[1], attributeArgs);
496496

497497
if (command.equalsIgnoreCase(AddDep.LABEL)) {
498498
if (argsBox.reln == null) {
@@ -726,9 +726,23 @@ public static SsurgeonPattern ssurgeonPatternFromXML(Element elt) {
726726
for (int i=0; i<editNodes.getLength(); i++) {
727727
Node node = editNodes.item(i);
728728
if (node.getNodeType() == Node.ELEMENT_NODE) {
729+
// read all arguments such as `after=" "` off the node
730+
// this way, arguments which can't be parsed via whitespace
731+
// (especially arguments which actually contain whitespace)
732+
// can be passed to an EditLine
733+
// LinkedHashMap so we can preserve insertion order
734+
Map<String, String> attributeArgs = new LinkedHashMap<>();
735+
for (int j = 0; j < node.getAttributes().getLength(); ++j) {
736+
Node attrNode = node.getAttributes().item(j);
737+
if (attrNode.getNodeType() == Node.ATTRIBUTE_NODE) {
738+
Attr attr = (Attr) attrNode;
739+
attributeArgs.put(attr.getName(), attr.getValue());
740+
}
741+
}
742+
729743
Element editElt = (Element) node;
730744
String editVal = getEltText(editElt);
731-
retPattern.addEdit(Ssurgeon.parseEditLine(editVal, retPattern.getLanguage()));
745+
retPattern.addEdit(Ssurgeon.parseEditLine(editVal, attributeArgs, retPattern.getLanguage()));
732746
}
733747
}
734748

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,101 @@ public void readXMLAddDepRelativePosition() {
748748
}
749749

750750

751+
/**
752+
* Check that adding a word using the attributes of the edit-list works as expected
753+
*/
754+
@Test
755+
public void readXMLAddDepAttributes() {
756+
Ssurgeon inst = Ssurgeon.inst();
757+
758+
// use "dep" as the dependency so as to be language-agnostic in this test
759+
String add = String.join(newline,
760+
"<ssurgeon-pattern-list>",
761+
" <ssurgeon-pattern>",
762+
" <uid>38</uid>",
763+
" <notes>Add a word using the attributes of the edit-list node</notes>",
764+
// have to bomb-proof the pattern
765+
" <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:blue}") + "</semgrex>",
766+
" <edit-list word=\"blue\" reln=\"dep\">addDep -gov antennae -position -antennae</edit-list>",
767+
" </ssurgeon-pattern>",
768+
"</ssurgeon-pattern-list>");
769+
List<SsurgeonPattern> patterns = inst.readFromString(add);
770+
assertEquals(patterns.size(), 1);
771+
SsurgeonPattern addSsurgeon = patterns.get(0);
772+
773+
SemanticGraph sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]");
774+
IndexedWord blueVertex = sg.getNodeByIndexSafe(4);
775+
assertNull(blueVertex);
776+
SemanticGraph newSG = addSsurgeon.iterate(sg);
777+
SemanticGraph expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 dep> blue-3]]");
778+
assertEquals(expected, newSG);
779+
// the Ssurgeon we just created should not put a tag on the word
780+
// but it SHOULD put blue immediately before antennae
781+
blueVertex = newSG.getNodeByIndexSafe(3);
782+
assertNotNull(blueVertex);
783+
assertNull(blueVertex.tag());
784+
assertEquals("blue", blueVertex.value());
785+
786+
// use "dep" as the dependency so as to be language-agnostic in this test
787+
add = String.join(newline,
788+
"<ssurgeon-pattern-list>",
789+
" <ssurgeon-pattern>",
790+
" <uid>38</uid>",
791+
" <notes>Add a word after the word before antennae (just to test the position)</notes>",
792+
// have to bomb-proof the pattern
793+
" <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae - {}=prev !> {word:blue}") + "</semgrex>",
794+
" <edit-list>addDep -gov antennae -reln dep -word blue -position +prev</edit-list>",
795+
" </ssurgeon-pattern>",
796+
"</ssurgeon-pattern-list>");
797+
patterns = inst.readFromString(add);
798+
assertEquals(patterns.size(), 1);
799+
addSsurgeon = patterns.get(0);
800+
801+
sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]");
802+
blueVertex = sg.getNodeByIndexSafe(4);
803+
assertNull(blueVertex);
804+
newSG = addSsurgeon.iterate(sg);
805+
expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 dep> blue-3]]");
806+
assertEquals(expected, newSG);
807+
// the Ssurgeon we just created should not put a tag on the word
808+
// but it SHOULD put blue immediately before antennae
809+
blueVertex = newSG.getNodeByIndexSafe(3);
810+
assertNotNull(blueVertex);
811+
assertNull(blueVertex.tag());
812+
assertEquals("blue", blueVertex.value());
813+
814+
815+
// use "dep" as the dependency so as to be language-agnostic in this test
816+
// this time, be cheeky and use some whitespace in the word
817+
add = String.join(newline,
818+
"<ssurgeon-pattern-list>",
819+
" <ssurgeon-pattern>",
820+
" <uid>38</uid>",
821+
" <notes>Add a word using the attributes of the edit-list node</notes>",
822+
// have to bomb-proof the pattern
823+
" <semgrex>" + XMLUtils.escapeXML("{word:antennae}=antennae !> {word:/bl ue/}") + "</semgrex>",
824+
" <edit-list word=\"bl ue\" reln=\"dep\">addDep -gov antennae -position -antennae</edit-list>",
825+
" </ssurgeon-pattern>",
826+
"</ssurgeon-pattern-list>");
827+
patterns = inst.readFromString(add);
828+
assertEquals(patterns.size(), 1);
829+
addSsurgeon = patterns.get(0);
830+
831+
sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> antennae-3]");
832+
blueVertex = sg.getNodeByIndexSafe(4);
833+
assertNull(blueVertex);
834+
newSG = addSsurgeon.iterate(sg);
835+
expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 dep> blue-3]]");
836+
assertEquals(expected, newSG);
837+
// the Ssurgeon we just created should not put a tag on the word
838+
// but it SHOULD put blue immediately before antennae
839+
blueVertex = newSG.getNodeByIndexSafe(3);
840+
assertNotNull(blueVertex);
841+
assertNull(blueVertex.tag());
842+
assertEquals("bl ue", blueVertex.value());
843+
}
844+
845+
751846
/**
752847
* Add a word, this time setting the morphological features as well
753848
*/

0 commit comments

Comments
 (0)