Skip to content

Commit b301397

Browse files
committed
Implement VariableStrings for the attributes.
So far, not implemented for the contained attributes yet. Perhaps that could be another feature to add. Includes the capacity to check variable strings on an exact match or __ match by treating any variable as the entire text
1 parent 8eba4a3 commit b301397

File tree

11 files changed

+295
-133
lines changed

11 files changed

+295
-133
lines changed

src/edu/stanford/nlp/semgraph/semgrex/Attribute.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
package edu.stanford.nlp.semgraph.semgrex;
22

33
import java.io.Serializable;
4+
import java.util.ArrayList;
5+
import java.util.Collections;
6+
import java.util.List;
7+
8+
import edu.stanford.nlp.util.Pair;
49

510
public class Attribute implements Serializable {
611
final String key;
712
final Object cased;
813
final Object caseless;
914
final boolean negated;
1015

11-
Attribute(String key, Object cased, Object caseless, boolean negated) {
16+
// specifies the groups in a regex that are captured as
17+
// matcher-global string variables
18+
final List<Pair<Integer, String>> variableGroups;
19+
20+
Attribute(String key, Object cased, Object caseless, boolean negated, List<Pair<Integer, String>> varGroups) {
1221
this.key = key;
1322
this.cased = cased;
1423
this.caseless = caseless;
1524
this.negated = negated;
25+
this.variableGroups = Collections.unmodifiableList(new ArrayList<>(varGroups));
1626
}
1727

1828
private static final long serialVersionUID = 973567614155612487L;

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import java.util.List;
77
import java.util.Set;
88

9+
import edu.stanford.nlp.util.Pair;
910
import edu.stanford.nlp.util.Quadruple;
10-
import edu.stanford.nlp.util.Triple;
1111

1212
/**
1313
* Stores attributes for a Semgrex NodePattern.
@@ -25,7 +25,7 @@ public class NodeAttributes {
2525
private boolean root;
2626
private boolean empty;
2727
// String, String, Boolean: key, value, negated
28-
private List<Triple<String, String, Boolean>> attributes;
28+
private List<Quadruple<String, String, Boolean, List<Pair<Integer, String>>>> attributes;
2929
private Set<String> positiveAttributes;
3030
// Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats)
3131
// are represented by Maps. In some cases it will be easier to search
@@ -59,21 +59,21 @@ public boolean empty() {
5959
return empty;
6060
}
6161

62-
public void setAttribute(String key, String value, boolean negated) {
62+
public void setAttribute(String key, String value, boolean negated, List<Pair<Integer, String>> varGroups) {
6363
if (!negated) {
6464
if (positiveAttributes.contains(key)) {
6565
throw new SemgrexParseException("Duplicate attribute " + key + " found in semgrex expression");
6666
}
6767
positiveAttributes.add(key);
6868
}
69-
attributes.add(new Triple(key, value, negated));
69+
attributes.add(new Quadruple<>(key, value, negated, varGroups));
7070
}
7171

7272
public void addContains(String annotation, String key, String value, Boolean negated) {
7373
contains.add(new Quadruple(annotation, key, value, negated));
7474
}
7575

76-
public List<Triple<String, String, Boolean>> attributes() {
76+
public List<Quadruple<String, String, Boolean, List<Pair<Integer, String>>>> attributes() {
7777
return Collections.unmodifiableList(attributes);
7878
}
7979

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

Lines changed: 87 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,9 @@ public class NodePattern extends SemgrexPattern {
4747
private final String name;
4848
private String descString;
4949
SemgrexPattern child;
50-
// specifies the groups in a regex that are captured as
51-
// matcher-global string variables
52-
private List<Pair<Integer, String>> variableGroups;
5350

5451
public NodePattern(GraphRelation r, boolean negDesc,
5552
NodeAttributes attrs, boolean isLink, String name) {
56-
this(r, negDesc, attrs, isLink, name,
57-
new ArrayList<>(0));
58-
}
59-
60-
// TODO: there is no capacity for named variable groups in the parser right now
61-
public NodePattern(GraphRelation r, boolean negDesc,
62-
NodeAttributes attrs, boolean isLink, String name,
63-
List<Pair<Integer, String>> variableGroups) {
6453
this.reln = r;
6554
this.negDesc = negDesc;
6655
this.isLink = isLink;
@@ -72,20 +61,21 @@ public NodePattern(GraphRelation r, boolean negDesc,
7261
this.regexPartialAttributes = new ArrayList<>();
7362

7463
descString = "{";
75-
for (Triple<String, String, Boolean> entry : attrs.attributes()) {
64+
for (Quadruple<String, String, Boolean, List<Pair<Integer, String>>> entry : attrs.attributes()) {
7665
if (!descString.equals("{"))
7766
descString += ";";
7867
String key = entry.first();
7968
String value = entry.second();
8069
boolean negated = entry.third();
70+
List<Pair<Integer, String>> varGroups = entry.fourth();
8171

8272
// Add the attributes for this key
8373
if (value.equals("__")) {
84-
attributes.add(new Attribute(key, true, true, negated));
74+
attributes.add(new Attribute(key, true, true, negated, varGroups));
8575
} else if (value.matches("/.*/")) {
86-
attributes.add(buildRegexAttribute(key, value, negated));
76+
attributes.add(buildRegexAttribute(key, value, negated, varGroups));
8777
} else { // raw description
88-
attributes.add(new Attribute(key, value, value, negated));
78+
attributes.add(new Attribute(key, value, value, negated, varGroups));
8979
}
9080

9181
if (negated) {
@@ -100,6 +90,8 @@ public NodePattern(GraphRelation r, boolean negDesc,
10090
String key = entry.second();
10191
String value = entry.third();
10292
boolean negated = entry.fourth();
93+
// TODO: can add varGroups, especially for the regex matches
94+
List<Pair<Integer, String>> varGroups = Collections.emptyList();
10395

10496
Class<?> clazz = AnnotationLookup.getValueType(AnnotationLookup.toCoreKey(annotation));
10597
boolean isMap = clazz != null && Map.class.isAssignableFrom(clazz);
@@ -115,11 +107,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
115107
} else {
116108
// Add the attributes for this key
117109
if (value.equals("__")) {
118-
attr = new Attribute(key, true, true, negated);
110+
attr = new Attribute(key, true, true, negated, varGroups);
119111
} else if (value.matches("/.*/")) {
120-
attr = buildRegexAttribute(key, value, negated);
112+
attr = buildRegexAttribute(key, value, negated, varGroups);
121113
} else { // raw description
122-
attr = new Attribute(key, value, value, negated);
114+
attr = new Attribute(key, value, value, negated, varGroups);
123115
}
124116
partialAttributes.add(new Pair<>(annotation, attr));
125117
}
@@ -148,15 +140,13 @@ public NodePattern(GraphRelation r, boolean negDesc,
148140
this.child = null;
149141
this.isRoot = attrs.root();
150142
this.isEmpty = attrs.empty();
151-
152-
this.variableGroups = Collections.unmodifiableList(variableGroups);
153143
}
154144

155145
/**
156146
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
157147
* Return an Attribute which matches this expression
158148
*/
159-
private Attribute buildRegexAttribute(String key, String value, boolean negated) {
149+
private Attribute buildRegexAttribute(String key, String value, boolean negated, List<Pair<Integer, String>> varGroups) {
160150
boolean isRegexp = false;
161151
for (int i = 1; i < value.length() - 1; ++i) {
162152
char chr = value.charAt(i);
@@ -170,13 +160,29 @@ private Attribute buildRegexAttribute(String key, String value, boolean negated)
170160
return new Attribute(key,
171161
Pattern.compile(patternContent),
172162
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
173-
negated);
163+
negated, varGroups);
174164
} else {
175-
return new Attribute(key, patternContent, patternContent, negated);
165+
return new Attribute(key, patternContent, patternContent, negated, varGroups);
166+
}
167+
}
168+
169+
private static boolean checkVarMatch(String key, String matchedString,
170+
VariableStrings variableStrings, VariableStrings tempVariableStrings) {
171+
String existingString = variableStrings.getString(key);
172+
if (existingString == null) {
173+
existingString = tempVariableStrings.getString(key);
174+
}
175+
if (existingString != null && !existingString.equals(matchedString)) {
176+
return false;
177+
}
178+
if (matchedString != null) {
179+
tempVariableStrings.setVar(key, matchedString);
176180
}
181+
return true;
177182
}
178183

179-
private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue) {
184+
private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue,
185+
VariableStrings variableStrings, VariableStrings tempVariableStrings) {
180186
if (nodeValue == null) {
181187
// treat non-existent attributes has having matched a negated expression
182188
// so for example, `cpos!:NUM` matches not having a cpos at all
@@ -188,14 +194,51 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue)
188194
boolean matches;
189195
if (toMatch instanceof Boolean) {
190196
matches = ((Boolean) toMatch);
197+
198+
if (matches) {
199+
for (Pair<Integer, String> varGroup : attr.variableGroups) {
200+
// TODO possibly a bug here - it is not honoring ignoreCase
201+
String matchedString = nodeValue;
202+
String key = varGroup.second();
203+
if (!checkVarMatch(key, matchedString, variableStrings, tempVariableStrings)) {
204+
matches = false;
205+
break;
206+
}
207+
}
208+
}
191209
} else if (toMatch instanceof String) {
192210
if (ignoreCase) {
193211
matches = nodeValue.equalsIgnoreCase(toMatch.toString());
194212
} else {
195213
matches = nodeValue.equals(toMatch.toString());
196214
}
215+
216+
if (matches) {
217+
for (Pair<Integer, String> varGroup : attr.variableGroups) {
218+
// TODO possibly a bug here - it is not honoring ignoreCase
219+
String matchedString = nodeValue;
220+
String key = varGroup.second();
221+
if (!checkVarMatch(key, matchedString, variableStrings, tempVariableStrings)) {
222+
matches = false;
223+
break;
224+
}
225+
}
226+
}
197227
} else if (toMatch instanceof Pattern) {
198-
matches = ((Pattern) toMatch).matcher(nodeValue).matches();
228+
Matcher matcher = ((Pattern) toMatch).matcher(nodeValue);
229+
if (matcher.matches()) {
230+
matches = true;
231+
for (Pair<Integer, String> varGroup : attr.variableGroups) {
232+
String matchedString = matcher.group(varGroup.first());
233+
String key = varGroup.second();
234+
if (!checkVarMatch(key, matchedString, variableStrings, tempVariableStrings)) {
235+
matches = false;
236+
break;
237+
}
238+
}
239+
} else {
240+
matches = false;
241+
}
199242
} else {
200243
throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
201244
}
@@ -206,7 +249,8 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue)
206249
}
207250

208251
@SuppressWarnings("unchecked")
209-
public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
252+
public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase,
253+
VariableStrings variableStrings, VariableStrings tempVariableStrings) {
210254
// System.out.println(node.word());
211255
if (isRoot) {
212256
// System.out.println("checking root");
@@ -240,7 +284,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
240284
// }
241285
// System.out.println(nodeValue);
242286

243-
boolean matches = checkMatch(attr, ignoreCase, nodeValue);
287+
boolean matches = checkMatch(attr, ignoreCase, nodeValue, variableStrings, tempVariableStrings);
288+
244289
if (!matches) {
245290
// System.out.println("doesn't match");
246291
// System.out.println("");
@@ -266,7 +311,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
266311
nodeValue = (value == null) ? null : value.toString();
267312
}
268313

269-
boolean matches = checkMatch(attr, ignoreCase, nodeValue);
314+
// TODO: not connected to varGroups yet
315+
boolean matches = checkMatch(attr, ignoreCase, nodeValue, variableStrings, tempVariableStrings);
270316
if (!matches) {
271317
return negDesc;
272318
}
@@ -282,6 +328,7 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
282328
throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!");
283329
map = (Map) rawmap;
284330
}
331+
// TODO: check varGroups here
285332
boolean matches = partialAttribute.checkMatches(map, ignoreCase);
286333
if (!matches) {
287334
return negDesc;
@@ -411,6 +458,7 @@ private static class NodeMatcher extends SemgrexMatcher {
411458
private SemgrexMatcher childMatcher;
412459
private boolean matchedOnce = false;
413460
private boolean committedVariables = false;
461+
private VariableStrings localVariableStrings = null;
414462

415463
private String nextMatchReln = null;
416464
private SemanticGraphEdge nextMatchEdge = null;
@@ -420,7 +468,7 @@ private static class NodeMatcher extends SemgrexMatcher {
420468
private boolean relnNamedFirst = false;
421469
private boolean edgeNamedFirst = false;
422470

423-
private boolean ignoreCase = false;
471+
private final boolean ignoreCase;
424472

425473
// universal: childMatcher is null if and only if
426474
// myNode.child == null OR resetChild has never been called
@@ -477,7 +525,8 @@ private void goToNextNodeMatch() {
477525
decommitNamedNodes();
478526
decommitNamedRelations();
479527
finished = true;
480-
Matcher m = null;
528+
VariableStrings tempVariableStrings = new VariableStrings();
529+
481530
while (nodeMatchCandidateIterator.hasNext()) {
482531
if (myNode.reln.getName() != null) {
483532
String foundReln = namesToRelations.get(myNode.reln.getName());
@@ -517,21 +566,8 @@ private void goToNextNodeMatch() {
517566
} else {
518567
boolean found = myNode.nodeAttrMatch(nextMatch,
519568
hyp ? sg : sg_aligned,
520-
ignoreCase);
569+
ignoreCase, variableStrings, tempVariableStrings);
521570
if (found) {
522-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
523-
// if variables have been captured from a regex, they
524-
// must match any previous matchings
525-
String thisVariable = varGroup.second();
526-
String thisVarString = variableStrings.getString(thisVariable);
527-
if (thisVarString != null &&
528-
!thisVarString.equals(m.group(varGroup.first()))) {
529-
// failed to match a variable
530-
found = false;
531-
break;
532-
}
533-
}
534-
535571
// nodeAttrMatch already checks negDesc, so no need to
536572
// check for that here
537573
finished = false;
@@ -541,21 +577,8 @@ private void goToNextNodeMatch() {
541577
} else { // try to match the description pattern.
542578
boolean found = myNode.nodeAttrMatch(nextMatch,
543579
hyp ? sg : sg_aligned,
544-
ignoreCase);
580+
ignoreCase, variableStrings, tempVariableStrings);
545581
if (found) {
546-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
547-
// if variables have been captured from a regex, they
548-
// must match any previous matchings
549-
String thisVariable = varGroup.second();
550-
String thisVarString = variableStrings.getString(thisVariable);
551-
if (thisVarString != null &&
552-
!thisVarString.equals(m.group(varGroup.first()))) {
553-
// failed to match a variable
554-
found = false;
555-
break;
556-
}
557-
}
558-
559582
// nodeAttrMatch already checks negDesc, so no need to
560583
// check for that here
561584
finished = false;
@@ -586,26 +609,23 @@ private void goToNextNodeMatch() {
586609
edgeNamedFirst = true;
587610
namesToEdges.put(myNode.reln.getEdgeName(), nextMatchEdge);
588611
}
589-
commitVariableGroups(m); // commit my variable groups.
612+
commitVariableGroups(tempVariableStrings); // commit my variable groups.
590613
}
591614
// finished is false exiting this if and only if nextChild exists
592615
// and has a label or backreference that matches
593616
// (also it will just have been reset)
594617
}
595618

596-
private void commitVariableGroups(Matcher m) {
619+
private void commitVariableGroups(VariableStrings tempVariableStrings) {
597620
committedVariables = true; // commit all my variable groups.
598-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
599-
String thisVarString = m.group(varGroup.first());
600-
variableStrings.setVar(varGroup.second(), thisVarString);
601-
}
621+
localVariableStrings = tempVariableStrings;
622+
variableStrings.setVars(tempVariableStrings);
602623
}
603624

604625
private void decommitVariableGroups() {
605626
if (committedVariables) {
606-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
607-
variableStrings.unsetVar(varGroup.second());
608-
}
627+
variableStrings.unsetVars(localVariableStrings);
628+
localVariableStrings = null;
609629
}
610630
committedVariables = false;
611631
}

0 commit comments

Comments
 (0)