Skip to content

Commit f61ca87

Browse files
committed
Separate out the finding of the matches in a Semgrex from the printing of the matches. Will make it easier to do further operations such as sorting or uniqing the matches
1 parent 9358205 commit f61ca87

File tree

3 files changed

+90
-7
lines changed

3 files changed

+90
-7
lines changed

src/edu/stanford/nlp/semgraph/semgrex/Alignment.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ public Alignment(Map<IndexedWord, IndexedWord> map,
2828
this.justification = justification;
2929
}
3030

31+
public Alignment(Alignment other) {
32+
// note that we aren't copying the words
33+
map = new HashMap<>(other.map);
34+
score = other.score;
35+
justification = other.justification;
36+
}
37+
3138
/*
3239
* Returns the score for this <code>Alignment</code>.
3340
*/
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package edu.stanford.nlp.semgraph.semgrex;
2+
3+
import java.io.Serializable;
4+
import java.util.HashMap;
5+
import java.util.Map;
6+
import java.util.Set;
7+
8+
import edu.stanford.nlp.ling.IndexedWord;
9+
import edu.stanford.nlp.semgraph.SemanticGraph;
10+
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
11+
import edu.stanford.nlp.util.VariableStrings;
12+
13+
/**
14+
* Stores the results of a single match.
15+
*<br>
16+
* This is useful for keeping track of a SemgrexMatcher after already processing its result.
17+
* In particular, it will be possible to post-process results after coordinating all of the results.
18+
* For example, results can be sorted or uniqed by the matching nodes, as long as all of the results
19+
* are already compiled
20+
*/
21+
22+
public class SemgrexMatch implements Serializable {
23+
private static final long serialVersionUID = 978254376856L;
24+
25+
final SemgrexPattern matchedPattern;
26+
27+
final SemanticGraph sg;
28+
final Map<String, IndexedWord> namesToNodes;
29+
final Map<String, String> namesToRelations;
30+
final Map<String, SemanticGraphEdge> namesToEdges;
31+
final VariableStrings variableStrings;
32+
33+
final Alignment alignment;
34+
final SemanticGraph sg_aligned;
35+
final boolean hyp;
36+
37+
final IndexedWord match;
38+
39+
public SemgrexMatch(SemgrexPattern pattern, SemgrexMatcher matcher) {
40+
matchedPattern = pattern;
41+
sg = matcher.sg;
42+
namesToNodes = new HashMap<>(matcher.namesToNodes);
43+
namesToRelations = new HashMap<>(matcher.namesToRelations);
44+
namesToEdges = new HashMap<>(matcher.namesToEdges);
45+
variableStrings = new VariableStrings(matcher.variableStrings);
46+
if (matcher.alignment != null) {
47+
alignment = new Alignment(matcher.alignment);
48+
} else {
49+
alignment = null;
50+
}
51+
sg_aligned = matcher.sg_aligned;
52+
hyp = matcher.hyp;
53+
match = matcher.getMatch();
54+
}
55+
56+
public IndexedWord getMatch() {
57+
return match;
58+
}
59+
60+
public IndexedWord getNode(String name) {
61+
return namesToNodes.get(name);
62+
}
63+
64+
public Set<String> getNodeNames() {
65+
return namesToNodes.keySet();
66+
}
67+
}

src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -549,27 +549,38 @@ public static void main(String[] args) throws IOException {
549549
}
550550
}
551551

552+
List<Pair<CoreMap, List<SemgrexMatch>>> matches = new ArrayList<>();
552553
for (CoreMap sentence : sentences) {
553554
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
554555
SemanticGraph enhanced = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
555556
SemgrexMatcher matcher = semgrex.matcher(graph);
556557
if ( ! matcher.find()) {
557558
continue;
558559
}
560+
matches.add(new Pair<>(sentence, new ArrayList<>()));
561+
boolean found = true;
562+
while (found) {
563+
matches.get(matches.size() - 1).second().add(new SemgrexMatch(semgrex, matcher));
564+
found = matcher.find();
565+
}
566+
}
559567

568+
for (Pair<CoreMap, List<SemgrexMatch>> sentenceMatches : matches) {
569+
CoreMap sentence = sentenceMatches.first();
570+
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
571+
SemanticGraph enhanced = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
560572
if (outputFormat == OutputFormat.LIST) {
561573
log.info("Matched graph:" + System.lineSeparator() + graph.toString(SemanticGraph.OutputFormat.LIST));
562-
int i = 1;
563-
boolean found = true;
564-
while (found) {
574+
int i = 0;
575+
for (SemgrexMatch matcher : sentenceMatches.second()) {
576+
i++;
565577
log.info("Match " + i + " at: " + matcher.getMatch().toString(CoreLabel.OutputFormat.VALUE_INDEX));
566578
List<String> nodeNames = Generics.newArrayList();
567579
nodeNames.addAll(matcher.getNodeNames());
568580
Collections.sort(nodeNames);
569581
for (String name : nodeNames) {
570582
log.info(" " + name + ": " + matcher.getNode(name).toString(CoreLabel.OutputFormat.VALUE_INDEX));
571583
}
572-
found = matcher.find();
573584
}
574585
} else if (outputFormat == OutputFormat.OFFSET) {
575586
if (graph.vertexListSorted().isEmpty()) {
@@ -585,8 +596,7 @@ public static void main(String[] args) throws IOException {
585596
if (comments.size() == 0) {
586597
comments.addAll(graph.getComments());
587598
}
588-
boolean found = true;
589-
while (found) {
599+
for (SemgrexMatch matcher : sentenceMatches.second()) {
590600
StringBuilder comment = new StringBuilder();
591601
comment.append("# semgrex pattern |" + semgrexName + "| matched at " + matcher.getMatch().toString(CoreLabel.OutputFormat.VALUE_INDEX));
592602

@@ -600,7 +610,6 @@ public static void main(String[] args) throws IOException {
600610
comment.append(matcher.getNode(name).toString(CoreLabel.OutputFormat.VALUE_INDEX));
601611
}
602612
comments.add(comment.toString());
603-
found = matcher.find();
604613
}
605614
String output = writer.printSemanticGraph(graph, enhanced, false, comments);
606615
System.out.print(output);

0 commit comments

Comments
 (0)