Skip to content

Commit 9caaec5

Browse files
committed
Switch the SemgrexPattern reader used to the pipeline.CoNLLUReader, which now supports reading more features from the SemanticGraphs
1 parent 78cd918 commit 9caaec5

File tree

1 file changed

+24
-12
lines changed

1 file changed

+24
-12
lines changed

src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,22 @@
22

33
import java.io.*;
44
import java.util.*;
5+
import java.util.stream.Collectors;
56

67
import edu.stanford.nlp.semgraph.SemanticGraph;
8+
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
79
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
810
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
911
import edu.stanford.nlp.io.IOUtils;
1012
import edu.stanford.nlp.ling.*;
11-
import edu.stanford.nlp.trees.ud.CoNLLUDocumentReader;
13+
import edu.stanford.nlp.pipeline.Annotation;
14+
import edu.stanford.nlp.pipeline.CoNLLUReader;
1215
import edu.stanford.nlp.trees.GrammaticalStructure;
1316
import edu.stanford.nlp.trees.MemoryTreebank;
1417
import edu.stanford.nlp.trees.Tree;
1518
import edu.stanford.nlp.trees.TreeNormalizer;
19+
import edu.stanford.nlp.util.ArrayCoreMap;
20+
import edu.stanford.nlp.util.CoreMap;
1621
import edu.stanford.nlp.util.Generics;
1722
import edu.stanford.nlp.util.Pair;
1823
import edu.stanford.nlp.util.StringUtils;
@@ -506,7 +511,7 @@ public static void main(String[] args) throws IOException {
506511
useExtras = Boolean.parseBoolean(argsMap.get(EXTRAS)[0]);
507512
}
508513

509-
List<SemanticGraph> graphs = Generics.newArrayList();
514+
List<CoreMap> sentences = new ArrayList<>();
510515
// TODO: allow other sources of graphs, such as dependency files
511516
if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
512517
for (String treeFile : argsMap.get(TREE_FILE)) {
@@ -517,25 +522,32 @@ public static void main(String[] args) throws IOException {
517522
// TODO: allow other languages... this defaults to English
518523
SemanticGraph graph = SemanticGraphFactory.makeFromTree(tree, mode, useExtras ?
519524
GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE);
520-
graphs.add(graph);
525+
CoreMap sentence = new ArrayCoreMap();
526+
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
527+
List<CoreLabel> tokens = graph.vertexListSorted().stream().map(x -> x.backingLabel()).collect(Collectors.toList());
528+
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
529+
sentences.add(sentence);
521530
}
522531
}
523532
}
524533

525534
if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
526-
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
527-
for (String conlluFile : argsMap.get(CONLLU_FILE)) {
528-
log.info("Loading file " + conlluFile);
529-
Iterator<Pair<SemanticGraph,SemanticGraph>> it = reader.getIterator(IOUtils.readerFromString(conlluFile));
530-
531-
while (it.hasNext()) {
532-
SemanticGraph graph = it.next().first;
533-
graphs.add(graph);
535+
try {
536+
CoNLLUReader reader = new CoNLLUReader();
537+
for (String conlluFile : argsMap.get(CONLLU_FILE)) {
538+
log.info("Loading file " + conlluFile);
539+
List<Annotation> docs = reader.readCoNLLUFile(conlluFile);
540+
for (Annotation doc : docs) {
541+
sentences.addAll(doc.get(CoreAnnotations.SentencesAnnotation.class));
542+
}
534543
}
544+
} catch (ClassNotFoundException e) {
545+
throw new RuntimeException(e);
535546
}
536547
}
537548

538-
for (SemanticGraph graph : graphs) {
549+
for (CoreMap sentence : sentences) {
550+
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
539551
SemgrexMatcher matcher = semgrex.matcher(graph);
540552
if ( ! matcher.find()) {
541553
continue;

0 commit comments

Comments
 (0)