1
1
package edu .stanford .nlp .pipeline ;
2
2
3
+ import edu .stanford .nlp .international .Language ;
3
4
import edu .stanford .nlp .io .IOUtils ;
4
5
import edu .stanford .nlp .ling .*;
5
6
import edu .stanford .nlp .semgraph .*;
@@ -432,7 +433,10 @@ public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line) {
432
433
cl .set (extraColumns .get (extraColumnIdx ), fields .get (extraColumnIdx ));
433
434
}
434
435
435
- Map <String , String > miscKeyValues = new HashMap <>();
436
+ // LinkedHashMap because we care about trying to preserve the order of the keys
437
+ // for later if we output the document in conllu
438
+ // (although this doesn't put SpaceAfter in a canonical order)
439
+ Map <String , String > miscKeyValues = new LinkedHashMap <>();
436
440
if (!fields .get (CoNLLU_MiscField ).equals ("_" )) {
437
441
Arrays .stream (fields .get (CoNLLU_MiscField ).split ("\\ |" )).forEach (
438
442
kv -> miscKeyValues .put (kv .split ("=" , 2 )[0 ], kv .split ("=" )[1 ]));
@@ -454,7 +458,7 @@ public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line) {
454
458
cl .setIsMWTFirst (false );
455
459
} else if (sentence .mwtData .containsKey (sentenceTokenIndex - 1 )) {
456
460
String miscInfo = sentence .mwtMiscs .get (sentence .mwtData .get (sentenceTokenIndex - 1 ));
457
- Map <String , String > mwtKeyValues = new HashMap <>();
461
+ Map <String , String > mwtKeyValues = new LinkedHashMap <>();
458
462
if (miscInfo != null && !miscInfo .equals ("_" )) {
459
463
Arrays .stream (miscInfo .split ("\\ |" )).forEach (
460
464
kv -> mwtKeyValues .put (kv .split ("=" , 2 )[0 ], kv .split ("=" )[1 ]));
@@ -606,7 +610,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
606
610
graphRoots .add (dependent );
607
611
} else {
608
612
IndexedWord gov = graphNodes .get (fields .get (CoNLLU_GovField ));
609
- GrammaticalRelation reln = GrammaticalRelation .valueOf (fields .get (CoNLLU_RelnField ));
613
+ GrammaticalRelation reln = GrammaticalRelation .valueOf (Language . UniversalEnglish , fields .get (CoNLLU_RelnField ));
610
614
graphEdges .add (new SemanticGraphEdge (gov , dependent , reln , 1.0 , false ));
611
615
}
612
616
}
@@ -632,7 +636,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
632
636
enhancedRoots .add (dependent );
633
637
} else {
634
638
IndexedWord gov = graphNodes .get (arcPieces [0 ]);
635
- GrammaticalRelation reln = GrammaticalRelation .valueOf (arcPieces [1 ]);
639
+ GrammaticalRelation reln = GrammaticalRelation .valueOf (Language . UniversalEnglish , arcPieces [1 ]);
636
640
enhancedEdges .add (new SemanticGraphEdge (gov , dependent , reln , 1.0 , false ));
637
641
}
638
642
}
0 commit comments