Skip to content

Commit c59716b

Browse files
committed
Read the enhanced graph in the CoNLLUReader
1 parent 40cb460 commit c59716b

File tree

2 files changed

+109
-16
lines changed

2 files changed

+109
-16
lines changed

itest/src/edu/stanford/nlp/pipeline/CoNLLUReaderITest.java

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import edu.stanford.nlp.ling.*;
44
import edu.stanford.nlp.semgraph.*;
5+
import edu.stanford.nlp.trees.GrammaticalRelation;
56
import edu.stanford.nlp.util.*;
67

78
import static org.junit.Assert.assertEquals;
@@ -11,7 +12,9 @@
1112

1213
import java.io.*;
1314
import java.util.ArrayList;
15+
import java.util.HashMap;
1416
import java.util.List;
17+
import java.util.Map;
1518
import java.util.Properties;
1619

1720
import org.junit.Before;
@@ -111,10 +114,14 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
111114
}
112115

113116
// Compare sentence ids
117+
// Check that the enhanced dependencies exist
118+
// (these sentences should all have them, but we will check it
119+
// more thoroughly for the Empty test sentence)
114120
// Check number of keys on each sentence
115121
for (int i = 0; i < sentences.size(); ++i) {
116122
assertEquals(Integer.valueOf(i), sentences.get(i).get(CoreAnnotations.SentenceIndexAnnotation.class));
117-
assertEquals(4, sentences.get(i).keySet().size());
123+
assertTrue(sentences.get(i).containsKey(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class));
124+
assertEquals(5, sentences.get(i).keySet().size());
118125
}
119126

120127
// Check the document tokens and the sentence tokens lists are the same
@@ -319,12 +326,66 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
319326
}
320327
}
321328

322-
public String emptiesPath = String.format("edu/stanford/nlp/pipeline/en-example.conllu");
329+
public static final String emptiesPath = String.format("edu/stanford/nlp/pipeline/en-example.conllu");
323330

324-
String[] EXPECTED_ENGLISH_WORDS = {
331+
static final String[] EXPECTED_ENGLISH_WORDS = {
325332
"Over", "300", "Iraqis", "are", "reported", "dead", "and", "500", "wounded", "in", "Fallujah", "alone", "."
326333
};
327334

335+
static final String[][] EXPECTED_ENHANCED_EDGES = {
336+
{"1", "2", "advmod"},
337+
{"2", "3", "nummod"},
338+
{"3", "5", "nsubj:pass"},
339+
{"3", "6", "nsubj:xsubj"},
340+
{"3", "8", "nsubj:pass"},
341+
{"4", "5", "aux:pass"},
342+
{"6", "5", "xcomp"},
343+
{"7", "8", "cc"},
344+
{"7", "8.1", "cc"},
345+
{"8", "5", "conj:and"},
346+
{"8", "8.1", "nsubj:pass"},
347+
{"8", "9", "nsubj:xsubj"},
348+
{"8.1", "5", "conj:and"},
349+
{"9", "8.1", "xcomp"},
350+
{"10", "11", "case"},
351+
{"11", "5", "obl:in"},
352+
{"12", "11", "advmod"},
353+
{"13", "5", "punct"},
354+
};
355+
static final SemanticGraph EXPECTED_ENHANCED = buildEnhancedTest();
356+
static SemanticGraph buildEnhancedTest() {
357+
Map<String, IndexedWord> graphNodes = new HashMap<>();
358+
for (int i = 0; i < EXPECTED_ENGLISH_WORDS.length; ++i) {
359+
String index = Integer.toString(i+1);
360+
CoreLabel cl = new CoreLabel();
361+
cl.setValue(EXPECTED_ENGLISH_WORDS[i]);
362+
cl.setIndex(i+1);
363+
cl.setSentIndex(0);
364+
graphNodes.put(index, new IndexedWord(cl));
365+
}
366+
{
367+
String index = "8.1";
368+
CoreLabel cl = new CoreLabel();
369+
cl.setValue("reported");
370+
cl.setIndex(8);
371+
cl.set(CoreAnnotations.EmptyIndexAnnotation.class, 1);
372+
cl.setSentIndex(0);
373+
graphNodes.put(index, new IndexedWord(cl));
374+
}
375+
List<SemanticGraphEdge> edges = new ArrayList<>();
376+
for (String[] edge : EXPECTED_ENHANCED_EDGES) {
377+
IndexedWord dep = graphNodes.get(edge[0]);
378+
IndexedWord gov = graphNodes.get(edge[1]);
379+
GrammaticalRelation reln = GrammaticalRelation.valueOf(edge[2]);
380+
edges.add(new SemanticGraphEdge(gov, dep, reln, 1.0, false));
381+
}
382+
List<IndexedWord> roots = new ArrayList<>();
383+
roots.add(graphNodes.get("5"));
384+
SemanticGraph enhancedParse = SemanticGraphFactory.makeFromEdges(edges);
385+
enhancedParse.setRoots(roots);
386+
return enhancedParse;
387+
}
388+
328389
@Test
329390
/**
330391
* Here we run fewer tests. Just make sure the EmptyToken is properly handled,
@@ -355,6 +416,9 @@ public void testReadingInEmpties() throws ClassNotFoundException, IOException {
355416
assertEquals(Integer.valueOf(1), empty.get(CoreAnnotations.EmptyIndexAnnotation.class));
356417
assertEquals(0, empty.sentIndex());
357418
assertEquals("reported", empty.value());
419+
420+
SemanticGraph enhanced = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
421+
assertEquals(EXPECTED_ENHANCED, enhanced);
358422
}
359423

360424
}

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ public class CoNLLUReader {
2323
/**
2424
* field constants
2525
**/
26-
// TODO: we should handle field 8, DEPS, for an enhanced dependencies
2726
// TODO: read sent_id?
2827
// TODO: read comments in general
2928
// TODO: SpacesBefore on the first token should be checked
@@ -36,6 +35,7 @@ public class CoNLLUReader {
3635
public static final int CoNLLU_FeaturesField = 5;
3736
public static final int CoNLLU_GovField = 6;
3837
public static final int CoNLLU_RelnField = 7;
38+
public static final int CoNLLU_EnhancedField = 8;
3939
public static final int CoNLLU_MiscField = 9;
4040

4141
public int columnCount = 10;
@@ -521,7 +521,17 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
521521
emptyLabels.add(cl);
522522
}
523523

524-
// first, prebuild the IndexedWords that will make up the basic graph
524+
// build sentence CoreMap with full text
525+
Annotation sentenceCoreMap = new Annotation(doc.docText.substring(sentenceCharBegin).trim());
526+
// add tokens
527+
sentenceCoreMap.set(CoreAnnotations.TokensAnnotation.class, coreLabels);
528+
// add empty tokens, if any exist
529+
if (emptyLabels.size() > 0) {
530+
sentenceCoreMap.set(CoreAnnotations.EmptyTokensAnnotation.class, emptyLabels);
531+
}
532+
533+
// to build the basic SemanticGraph, first, prebuild the
534+
// IndexedWords that will make up the basic graph
525535
// (and possibly the enhanced graph)
526536
Map<String, IndexedWord> graphNodes = new HashMap<>();
527537
for (CoreLabel label : coreLabels) {
@@ -533,10 +543,13 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
533543
graphNodes.put(index, new IndexedWord(empty));
534544
}
535545

546+
boolean hasEnhanced = false;
536547
// build SemanticGraphEdges for a basic graph
537548
List<SemanticGraphEdge> graphEdges = new ArrayList<>();
538549
for (int i = 0; i < lines.size(); i++) {
539550
List<String> fields = Arrays.asList(lines.get(i).split("\t"));
551+
// track whether any of these lines signify there is an enhanced graph
552+
hasEnhanced = hasEnhanced || !fields.equals("_");
540553
// skip the ROOT node
541554
if (fields.get(CoNLLU_GovField).equals("0"))
542555
continue;
@@ -547,20 +560,36 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
547560
}
548561
// build SemanticGraph
549562
SemanticGraph depParse = SemanticGraphFactory.makeFromEdges(graphEdges);
563+
// add dependency graph
564+
sentenceCoreMap.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, depParse);
550565

551-
// TODO: here we build the enhanced graph, if it exists
552-
553-
// build sentence CoreMap with full text
554-
Annotation sentenceCoreMap = new Annotation(doc.docText.substring(sentenceCharBegin).trim());
555-
// add tokens
556-
sentenceCoreMap.set(CoreAnnotations.TokensAnnotation.class, coreLabels);
557-
// add empty tokens, if any exist
558-
if (emptyLabels.size() > 0) {
559-
sentenceCoreMap.set(CoreAnnotations.EmptyTokensAnnotation.class, emptyLabels);
566+
if (hasEnhanced) {
567+
List<SemanticGraphEdge> enhancedEdges = new ArrayList<>();
568+
List<IndexedWord> roots = new ArrayList<>();
569+
570+
List<String> allLines = new ArrayList<>();
571+
allLines.addAll(lines);
572+
allLines.addAll(sentence.emptyLines);
573+
for (String line : allLines) {
574+
List<String> fields = Arrays.asList(line.split("\t"));
575+
IndexedWord dependent = graphNodes.get(fields.get(CoNLLU_IndexField));
576+
String[] arcs = fields.get(CoNLLU_EnhancedField).split("[|]");
577+
for (String arc : arcs) {
578+
String[] arcPieces = arc.split(":", 2);
579+
if (arcPieces[0].equals("0")) {
580+
roots.add(dependent);
581+
} else {
582+
IndexedWord gov = graphNodes.get(arcPieces[0]);
583+
GrammaticalRelation reln = GrammaticalRelation.valueOf(arcPieces[1]);
584+
enhancedEdges.add(new SemanticGraphEdge(gov, dependent, reln, 1.0, false));
585+
}
586+
}
587+
}
588+
SemanticGraph enhancedParse = SemanticGraphFactory.makeFromEdges(enhancedEdges);
589+
enhancedParse.setRoots(roots);
590+
sentenceCoreMap.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedParse);
560591
}
561592

562-
// add dependency graph
563-
sentenceCoreMap.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, depParse);
564593
return sentenceCoreMap;
565594
}
566595

0 commit comments

Comments
 (0)