Skip to content

Commit b018344

Browse files
committed
Use a LinkedHashMap when building misc key values, allowing the pieces to stay in the same order. Use UniversalEnglish as the language for GrammaticalRelation so that the default separator is :
1 parent c7b15fd commit b018344

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package edu.stanford.nlp.pipeline;
22

3+
import edu.stanford.nlp.international.Language;
34
import edu.stanford.nlp.io.IOUtils;
45
import edu.stanford.nlp.ling.*;
56
import edu.stanford.nlp.semgraph.*;
@@ -432,7 +433,10 @@ public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line) {
432433
cl.set(extraColumns.get(extraColumnIdx), fields.get(extraColumnIdx));
433434
}
434435

435-
Map<String, String> miscKeyValues = new HashMap<>();
436+
// LinkedHashMap because we care about trying to preserve the order of the keys
437+
// for later if we output the document in conllu
438+
// (although this doesn't put SpaceAfter in a canonical order)
439+
Map<String, String> miscKeyValues = new LinkedHashMap<>();
436440
if (!fields.get(CoNLLU_MiscField).equals("_")) {
437441
Arrays.stream(fields.get(CoNLLU_MiscField).split("\\|")).forEach(
438442
kv -> miscKeyValues.put(kv.split("=", 2)[0], kv.split("=")[1]));
@@ -454,7 +458,7 @@ public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line) {
454458
cl.setIsMWTFirst(false);
455459
} else if (sentence.mwtData.containsKey(sentenceTokenIndex - 1)) {
456460
String miscInfo = sentence.mwtMiscs.get(sentence.mwtData.get(sentenceTokenIndex - 1));
457-
Map<String, String> mwtKeyValues = new HashMap<>();
461+
Map<String, String> mwtKeyValues = new LinkedHashMap<>();
458462
if (miscInfo != null && !miscInfo.equals("_")) {
459463
Arrays.stream(miscInfo.split("\\|")).forEach(
460464
kv -> mwtKeyValues.put(kv.split("=", 2)[0], kv.split("=")[1]));
@@ -606,7 +610,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
606610
graphRoots.add(dependent);
607611
} else {
608612
IndexedWord gov = graphNodes.get(fields.get(CoNLLU_GovField));
609-
GrammaticalRelation reln = GrammaticalRelation.valueOf(fields.get(CoNLLU_RelnField));
613+
GrammaticalRelation reln = GrammaticalRelation.valueOf(Language.UniversalEnglish, fields.get(CoNLLU_RelnField));
610614
graphEdges.add(new SemanticGraphEdge(gov, dependent, reln, 1.0, false));
611615
}
612616
}
@@ -632,7 +636,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
632636
enhancedRoots.add(dependent);
633637
} else {
634638
IndexedWord gov = graphNodes.get(arcPieces[0]);
635-
GrammaticalRelation reln = GrammaticalRelation.valueOf(arcPieces[1]);
639+
GrammaticalRelation reln = GrammaticalRelation.valueOf(Language.UniversalEnglish, arcPieces[1]);
636640
enhancedEdges.add(new SemanticGraphEdge(gov, dependent, reln, 1.0, false));
637641
}
638642
}

0 commit comments

Comments
 (0)