Skip to content

Commit bfb0f3d

Browse files
committed
Add the capacity to check variable strings on an exact match or global match by treating any variable as the entire text
1 parent baedc9a commit bfb0f3d

File tree

2 files changed

+61
-9
lines changed

2 files changed

+61
-9
lines changed

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,21 @@ private Attribute buildRegexAttribute(String key, String value, boolean negated,
166166
}
167167
}
168168

169+
private static boolean checkVarMatch(String key, String matchedString,
170+
VariableStrings variableStrings, VariableStrings tempVariableStrings) {
171+
String existingString = variableStrings.getString(key);
172+
if (existingString == null) {
173+
existingString = tempVariableStrings.getString(key);
174+
}
175+
if (existingString != null && !existingString.equals(matchedString)) {
176+
return false;
177+
}
178+
if (matchedString != null) {
179+
tempVariableStrings.setVar(key, matchedString);
180+
}
181+
return true;
182+
}
183+
169184
private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue,
170185
VariableStrings variableStrings, VariableStrings tempVariableStrings) {
171186
if (nodeValue == null) {
@@ -179,29 +194,47 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue,
179194
boolean matches;
180195
if (toMatch instanceof Boolean) {
181196
matches = ((Boolean) toMatch);
197+
198+
if (matches) {
199+
for (Pair<Integer, String> varGroup : attr.variableGroups) {
200+
// TODO possibly a bug here - it is not honoring ignoreCase
201+
String matchedString = nodeValue;
202+
String key = varGroup.second();
203+
if (!checkVarMatch(key, matchedString, variableStrings, tempVariableStrings)) {
204+
matches = false;
205+
break;
206+
}
207+
}
208+
}
182209
} else if (toMatch instanceof String) {
183210
if (ignoreCase) {
184211
matches = nodeValue.equalsIgnoreCase(toMatch.toString());
185212
} else {
186213
matches = nodeValue.equals(toMatch.toString());
187214
}
215+
216+
if (matches) {
217+
for (Pair<Integer, String> varGroup : attr.variableGroups) {
218+
// TODO possibly a bug here - it is not honoring ignoreCase
219+
String matchedString = nodeValue;
220+
String key = varGroup.second();
221+
if (!checkVarMatch(key, matchedString, variableStrings, tempVariableStrings)) {
222+
matches = false;
223+
break;
224+
}
225+
}
226+
}
188227
} else if (toMatch instanceof Pattern) {
189228
Matcher matcher = ((Pattern) toMatch).matcher(nodeValue);
190229
if (matcher.matches()) {
191230
matches = true;
192231
for (Pair<Integer, String> varGroup : attr.variableGroups) {
193-
String existingString = variableStrings.getString(varGroup.second());
194-
if (existingString == null) {
195-
existingString = tempVariableStrings.getString(varGroup.second());
196-
}
197232
String matchedString = matcher.group(varGroup.first());
198-
if (existingString != null && !existingString.equals(matchedString)) {
233+
String key = varGroup.second();
234+
if (!checkVarMatch(key, matchedString, variableStrings, tempVariableStrings)) {
199235
matches = false;
200236
break;
201237
}
202-
if (matchedString != null) {
203-
tempVariableStrings.setVar(varGroup.second(), matchedString);
204-
}
205238
}
206239
} else {
207240
matches = false;

test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1572,7 +1572,7 @@ public void testBatchUniq() {
15721572
assertEquals(1, matches.get(2).second().size());
15731573
}
15741574

1575-
public void testVariableGroups() {
1575+
public void testRegexVariableGroups() {
15761576
// first, a basic test that it is capturing the variable groups correctly
15771577
SemgrexPattern pattern = SemgrexPattern.compile("{word:/(.*ill.*)/#1%name}");
15781578
SemanticGraph graph = SemanticGraph.valueOf("[ate-2 subj> Bill-1 obj>[muffins-6 compound> Blueberry-3 compound> Flueberry-4 compound> filled-5]]");
@@ -1612,6 +1612,25 @@ public void testVariableGroups() {
16121612
assertEquals(expectedMatches, matches);
16131613
}
16141614

1615+
public void testExactVariableGroups() {
1616+
SemgrexPattern pattern = SemgrexPattern.compile("{word:__#1%name} .. {word:__#1%name}");
1617+
SemanticGraph graph = SemanticGraph.valueOf("[ate-2 subj> Bill-1 obj>[muffins-6 compound> Blueberry-3 compound> Bill-4 compound> filled-5]]");
1618+
1619+
// This should match exactly once, for Bill & Bill
1620+
SemgrexMatcher matcher = pattern.matcher(graph);
1621+
assertTrue(matcher.find());
1622+
assertEquals("Bill", matcher.variableStrings.getString("name"));
1623+
assertFalse(matcher.find());
1624+
1625+
pattern = SemgrexPattern.compile("{word:Bill#1%name} .. {word:__#1%name}");
1626+
1627+
// This should match exactly once, for Bill & Bill
1628+
matcher = pattern.matcher(graph);
1629+
assertTrue(matcher.find());
1630+
assertEquals("Bill", matcher.variableStrings.getString("name"));
1631+
assertFalse(matcher.find());
1632+
}
1633+
16151634
public static void outputBatchResults(SemgrexPattern pattern, List<CoreMap> sentences) {
16161635
List<Pair<CoreMap, List<SemgrexMatch>>> matches = pattern.matchSentences(sentences, false);
16171636
for (Pair<CoreMap, List<SemgrexMatch>> sentenceMatch : matches) {

0 commit comments

Comments
 (0)