Skip to content

Commit d9b61c4

Browse files
committed
Add CoNLLU as an output format to SemgrexPattern
1 parent c3d2dec commit d9b61c4

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import edu.stanford.nlp.trees.MemoryTreebank;
1717
import edu.stanford.nlp.trees.Tree;
1818
import edu.stanford.nlp.trees.TreeNormalizer;
19+
import edu.stanford.nlp.trees.ud.CoNLLUDocumentWriter;
1920
import edu.stanford.nlp.util.ArrayCoreMap;
2021
import edu.stanford.nlp.util.CoreMap;
2122
import edu.stanford.nlp.util.Generics;
@@ -438,7 +439,8 @@ public int hashCode() {
438439

439440
public enum OutputFormat {
440441
LIST,
441-
OFFSET
442+
OFFSET,
443+
CONLLU
442444
}
443445

444446

@@ -548,6 +550,7 @@ public static void main(String[] args) throws IOException {
548550

549551
for (CoreMap sentence : sentences) {
550552
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
553+
SemanticGraph enhanced = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
551554
SemgrexMatcher matcher = semgrex.matcher(graph);
552555
if ( ! matcher.find()) {
553556
continue;
@@ -574,6 +577,30 @@ public static void main(String[] args) throws IOException {
574577
}
575578
System.out.printf("+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class),
576579
argsMap.get(CONLLU_FILE)[0]);
580+
} else if (outputFormat == OutputFormat.CONLLU) {
581+
CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();
582+
String semgrexName = semgrex.toString().trim();
583+
// TODO: comments should load from the CoNLLU document, if applicable
584+
List<String> comments = new ArrayList<>(graph.getComments());
585+
boolean found = true;
586+
while (found) {
587+
StringBuilder comment = new StringBuilder();
588+
comment.append("# semgrex pattern |" + semgrexName + "| matched at " + matcher.getMatch().toString(CoreLabel.OutputFormat.VALUE_INDEX));
589+
590+
List<String> nodeNames = new ArrayList<>();
591+
nodeNames.addAll(matcher.getNodeNames());
592+
Collections.sort(nodeNames);
593+
for (String name : nodeNames) {
594+
comment.append(" ");
595+
comment.append(name);
596+
comment.append(":");
597+
comment.append(matcher.getNode(name).toString(CoreLabel.OutputFormat.VALUE_INDEX));
598+
}
599+
comments.add(comment.toString());
600+
found = matcher.find();
601+
}
602+
String output = writer.printSemanticGraph(graph, enhanced, false, comments);
603+
System.out.print(output);
577604
}
578605
}
579606
}

src/edu/stanford/nlp/trees/ud/CoNLLUDocumentWriter.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import edu.stanford.nlp.util.CoreMap;
1212
import edu.stanford.nlp.util.IntPair;
1313

14+
import java.util.Collection;
1415
import java.util.HashMap;
1516

1617
/**
@@ -31,10 +32,14 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
3132
}
3233

3334
public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg, boolean unescapeParenthesis) {
35+
return printSemanticGraph(basicSg, enhancedSg, true, basicSg.getComments());
36+
}
37+
38+
public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg, boolean unescapeParenthesis, Collection<String> comments) {
3439
StringBuilder sb = new StringBuilder();
3540

3641
/* Print comments. */
37-
for (String comment : basicSg.getComments()) {
42+
for (String comment : comments) {
3843
sb.append(comment).append(System.lineSeparator());
3944
}
4045

0 commit comments

Comments
 (0)