19
19
20
20
import edu .stanford .nlp .util .StringUtils ;
21
21
import edu .stanford .nlp .util .logging .RedwoodConfiguration ;
22
+ import org .w3c .dom .Attr ;
22
23
import org .w3c .dom .Document ;
23
24
import org .w3c .dom .Element ;
24
25
import org .w3c .dom .Node ;
@@ -397,7 +398,7 @@ protected static class SsurgeonArgs {
397
398
* whitespace, but retain everything inside quotes, so we can pass
398
399
* in hashmaps in String form.
399
400
*/
400
- private static String [] parseArgs (String argsString ) {
401
+ private static Map < String , String > parseArgs (String argsString ) {
401
402
List <String > retList = new ArrayList <>();
402
403
String patternString = "(?:[^\\ s\\ \" ]++|\\ \" [^\\ \" ]*+\\ \" |(\\ \" ))++" ;
403
404
Pattern pattern = Pattern .compile (patternString );
@@ -413,59 +414,58 @@ private static String[] parseArgs(String argsString) {
413
414
} else
414
415
throw new SsurgeonParseException ("Unmatched quote in string to parse" );
415
416
}
416
- return retList .toArray (StringUtils .EMPTY_STRING_ARRAY );
417
+
418
+ Map <String , String > parsedArgs = new LinkedHashMap <>();
419
+ for (int i = 0 ; i < retList .size () - 1 ; i += 2 ) {
420
+ parsedArgs .put (retList .get (i ), retList .get (i + 1 ));
421
+ }
422
+ return parsedArgs ;
417
423
}
418
424
419
- private static SsurgeonArgs parseArgsBox (String args ) {
425
+ private static SsurgeonArgs parseArgsBox (String args , Map < String , String > additionalArgs ) {
420
426
SsurgeonArgs argsBox = new SsurgeonArgs ();
421
- final String [] argsArray = parseArgs (args );
427
+ Map <String , String > argsArray = parseArgs (args );
428
+ for (String additional : additionalArgs .keySet ()) {
429
+ argsArray .put ("-" + additional , additionalArgs .get (additional ));
430
+ }
422
431
423
- for (int argIndex = 0 ; argIndex < argsArray .length ; ++argIndex ) {
424
- switch (argsArray [argIndex ]) {
432
+ for (String argsKey : argsArray .keySet ()) {
433
+ String argsValue = argsArray .get (argsKey );
434
+ switch (argsKey ) {
425
435
case GOV_NODENAME_ARG :
426
- argsBox .govNodeName = argsArray [argIndex + 1 ];
427
- argIndex += 1 ;
436
+ argsBox .govNodeName = argsValue ;
428
437
break ;
429
438
case DEP_NODENAME_ARG :
430
- argsBox .dep = argsArray [argIndex + 1 ];
431
- argIndex += 1 ;
439
+ argsBox .dep = argsValue ;
432
440
break ;
433
441
case EDGE_NAME_ARG :
434
- argsBox .edge = argsArray [argIndex + 1 ];
435
- argIndex += 1 ;
442
+ argsBox .edge = argsValue ;
436
443
break ;
437
444
case RELN_ARG :
438
- argsBox .reln = argsArray [argIndex + 1 ];
439
- argIndex += 1 ;
445
+ argsBox .reln = argsValue ;
440
446
break ;
441
447
case NODENAME_ARG :
442
- argsBox .node = argsArray [argIndex + 1 ];
443
- argIndex += 1 ;
448
+ argsBox .node = argsValue ;
444
449
break ;
445
450
case NODE_PROTO_ARG :
446
- argsBox .nodeString = argsArray [argIndex + 1 ];
447
- argIndex += 1 ;
451
+ argsBox .nodeString = argsValue ;
448
452
break ;
449
453
case WEIGHT_ARG :
450
- argsBox .weight = Double .valueOf (argsArray [argIndex + 1 ]);
451
- argIndex += 1 ;
454
+ argsBox .weight = Double .valueOf (argsValue );
452
455
break ;
453
456
case NAME_ARG :
454
- argsBox .name = argsArray [argIndex + 1 ];
455
- argIndex += 1 ;
457
+ argsBox .name = argsValue ;
456
458
break ;
457
459
case POSITION_ARG :
458
- argsBox .position = argsArray [argIndex + 1 ];
459
- argIndex += 1 ;
460
+ argsBox .position = argsValue ;
460
461
break ;
461
462
default :
462
- String key = argsArray [ argIndex ] .substring (1 );
463
+ String key = argsKey .substring (1 );
463
464
Class <? extends CoreAnnotation <?>> annotation = AnnotationLookup .toCoreKey (key );
464
465
if (annotation == null ) {
465
- throw new SsurgeonParseException ("Parsing Ssurgeon args: unknown flag " + argsArray [ argIndex ] );
466
+ throw new SsurgeonParseException ("Parsing Ssurgeon args: unknown flag " + argsKey );
466
467
}
467
- argsBox .annotations .put (key , argsArray [argIndex + 1 ]);
468
- argIndex += 1 ;
468
+ argsBox .annotations .put (key , argsValue );
469
469
}
470
470
}
471
471
return argsBox ;
@@ -474,7 +474,7 @@ private static SsurgeonArgs parseArgsBox(String args) {
474
474
/**
475
475
* Given a string entry, converts it into a SsurgeonEdit object.
476
476
*/
477
- public static SsurgeonEdit parseEditLine (String editLine , Language language ) {
477
+ public static SsurgeonEdit parseEditLine (String editLine , Map < String , String > attributeArgs , Language language ) {
478
478
try {
479
479
// Extract the operation name first
480
480
final String [] tuples1 = editLine .split ("\\ s+" , 2 );
@@ -492,7 +492,7 @@ public static SsurgeonEdit parseEditLine(String editLine, Language language) {
492
492
}
493
493
494
494
// Parse the arguments based upon the type of command to execute.
495
- final SsurgeonArgs argsBox = parseArgsBox (tuples1 .length == 1 ? "" : tuples1 [1 ]);
495
+ final SsurgeonArgs argsBox = parseArgsBox (tuples1 .length == 1 ? "" : tuples1 [1 ], attributeArgs );
496
496
497
497
if (command .equalsIgnoreCase (AddDep .LABEL )) {
498
498
if (argsBox .reln == null ) {
@@ -726,9 +726,23 @@ public static SsurgeonPattern ssurgeonPatternFromXML(Element elt) {
726
726
for (int i =0 ; i <editNodes .getLength (); i ++) {
727
727
Node node = editNodes .item (i );
728
728
if (node .getNodeType () == Node .ELEMENT_NODE ) {
729
+ // read all arguments such as `after=" "` off the node
730
+ // this way, arguments which can't be parsed via whitespace
731
+ // (especially arguments which actually contain whitespace)
732
+ // can be passed to an EditLine
733
+ // LinkedHashMap so we can preserve insertion order
734
+ Map <String , String > attributeArgs = new LinkedHashMap <>();
735
+ for (int j = 0 ; j < node .getAttributes ().getLength (); ++j ) {
736
+ Node attrNode = node .getAttributes ().item (j );
737
+ if (attrNode .getNodeType () == Node .ATTRIBUTE_NODE ) {
738
+ Attr attr = (Attr ) attrNode ;
739
+ attributeArgs .put (attr .getName (), attr .getValue ());
740
+ }
741
+ }
742
+
729
743
Element editElt = (Element ) node ;
730
744
String editVal = getEltText (editElt );
731
- retPattern .addEdit (Ssurgeon .parseEditLine (editVal , retPattern .getLanguage ()));
745
+ retPattern .addEdit (Ssurgeon .parseEditLine (editVal , attributeArgs , retPattern .getLanguage ()));
732
746
}
733
747
}
734
748
0 commit comments