Skip to content

Commit 57a7e72

Browse files
committed
Builds, but doesn't actually do anything with the VariableStrings yet
1 parent 382055f commit 57a7e72

File tree

6 files changed

+94
-86
lines changed

6 files changed

+94
-86
lines changed

src/edu/stanford/nlp/semgraph/semgrex/Attribute.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
package edu.stanford.nlp.semgraph.semgrex;
22

33
import java.io.Serializable;
4+
import java.util.ArrayList;
5+
import java.util.Collections;
6+
import java.util.List;
7+
8+
import edu.stanford.nlp.util.Pair;
49

510
public class Attribute implements Serializable {
611
final String key;
712
final Object cased;
813
final Object caseless;
914
final boolean negated;
1015

11-
Attribute(String key, Object cased, Object caseless, boolean negated) {
16+
// specifies the groups in a regex that are captured as
17+
// matcher-global string variables
18+
final List<Pair<Integer, String>> variableGroups;
19+
20+
Attribute(String key, Object cased, Object caseless, boolean negated, List<Pair<Integer, String>> varGroups) {
1221
this.key = key;
1322
this.cased = cased;
1423
this.caseless = caseless;
1524
this.negated = negated;
25+
this.variableGroups = Collections.unmodifiableList(new ArrayList<>(varGroups));
1626
}
1727

1828
private static final long serialVersionUID = 973567614155612487L;

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import java.util.List;
77
import java.util.Set;
88

9+
import edu.stanford.nlp.util.Pair;
910
import edu.stanford.nlp.util.Quadruple;
10-
import edu.stanford.nlp.util.Triple;
1111

1212
/**
1313
* Stores attributes for a Semgrex NodePattern.
@@ -25,7 +25,7 @@ public class NodeAttributes {
2525
private boolean root;
2626
private boolean empty;
2727
// String, String, Boolean: key, value, negated
28-
private List<Triple<String, String, Boolean>> attributes;
28+
private List<Quadruple<String, String, Boolean, List<Pair<Integer, String>>>> attributes;
2929
private Set<String> positiveAttributes;
3030
// Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats)
3131
// are represented by Maps. In some cases it will be easier to search
@@ -59,21 +59,21 @@ public boolean empty() {
5959
return empty;
6060
}
6161

62-
public void setAttribute(String key, String value, boolean negated) {
62+
public void setAttribute(String key, String value, boolean negated, List<Pair<Integer, String>> varGroups) {
6363
if (!negated) {
6464
if (positiveAttributes.contains(key)) {
6565
throw new SemgrexParseException("Duplicate attribute " + key + " found in semgrex expression");
6666
}
6767
positiveAttributes.add(key);
6868
}
69-
attributes.add(new Triple(key, value, negated));
69+
attributes.add(new Quadruple<>(key, value, negated, varGroups));
7070
}
7171

7272
public void addContains(String annotation, String key, String value, Boolean negated) {
7373
contains.add(new Quadruple(annotation, key, value, negated));
7474
}
7575

76-
public List<Triple<String, String, Boolean>> attributes() {
76+
public List<Quadruple<String, String, Boolean, List<Pair<Integer, String>>>> attributes() {
7777
return Collections.unmodifiableList(attributes);
7878
}
7979

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

Lines changed: 28 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,9 @@ public class NodePattern extends SemgrexPattern {
4747
private final String name;
4848
private String descString;
4949
SemgrexPattern child;
50-
// specifies the groups in a regex that are captured as
51-
// matcher-global string variables
52-
private final List<Pair<Integer, String>> variableGroups;
5350

5451
public NodePattern(GraphRelation r, boolean negDesc,
55-
NodeAttributes attrs, boolean isLink, String name,
56-
List<Pair<Integer, String>> variableGroups) {
52+
NodeAttributes attrs, boolean isLink, String name) {
5753
this.reln = r;
5854
this.negDesc = negDesc;
5955
this.isLink = isLink;
@@ -65,20 +61,21 @@ public NodePattern(GraphRelation r, boolean negDesc,
6561
this.regexPartialAttributes = new ArrayList<>();
6662

6763
descString = "{";
68-
for (Triple<String, String, Boolean> entry : attrs.attributes()) {
64+
for (Quadruple<String, String, Boolean, List<Pair<Integer, String>>> entry : attrs.attributes()) {
6965
if (!descString.equals("{"))
7066
descString += ";";
7167
String key = entry.first();
7268
String value = entry.second();
7369
boolean negated = entry.third();
70+
List<Pair<Integer, String>> varGroups = entry.fourth();
7471

7572
// Add the attributes for this key
7673
if (value.equals("__")) {
77-
attributes.add(new Attribute(key, true, true, negated));
74+
attributes.add(new Attribute(key, true, true, negated, varGroups));
7875
} else if (value.matches("/.*/")) {
79-
attributes.add(buildRegexAttribute(key, value, negated));
76+
attributes.add(buildRegexAttribute(key, value, negated, varGroups));
8077
} else { // raw description
81-
attributes.add(new Attribute(key, value, value, negated));
78+
attributes.add(new Attribute(key, value, value, negated, varGroups));
8279
}
8380

8481
if (negated) {
@@ -93,6 +90,8 @@ public NodePattern(GraphRelation r, boolean negDesc,
9390
String key = entry.second();
9491
String value = entry.third();
9592
boolean negated = entry.fourth();
93+
// TODO: can add varGroups, especially for the regex matches
94+
List<Pair<Integer, String>> varGroups = Collections.emptyList();
9695

9796
Class<?> clazz = AnnotationLookup.getValueType(AnnotationLookup.toCoreKey(annotation));
9897
boolean isMap = clazz != null && Map.class.isAssignableFrom(clazz);
@@ -108,11 +107,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
108107
} else {
109108
// Add the attributes for this key
110109
if (value.equals("__")) {
111-
attr = new Attribute(key, true, true, negated);
110+
attr = new Attribute(key, true, true, negated, varGroups);
112111
} else if (value.matches("/.*/")) {
113-
attr = buildRegexAttribute(key, value, negated);
112+
attr = buildRegexAttribute(key, value, negated, varGroups);
114113
} else { // raw description
115-
attr = new Attribute(key, value, value, negated);
114+
attr = new Attribute(key, value, value, negated, varGroups);
116115
}
117116
partialAttributes.add(new Pair<>(annotation, attr));
118117
}
@@ -141,15 +140,13 @@ public NodePattern(GraphRelation r, boolean negDesc,
141140
this.child = null;
142141
this.isRoot = attrs.root();
143142
this.isEmpty = attrs.empty();
144-
145-
this.variableGroups = Collections.unmodifiableList(variableGroups);
146143
}
147144

148145
/**
149146
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
150147
* Return an Attribute which matches this expression
151148
*/
152-
private Attribute buildRegexAttribute(String key, String value, boolean negated) {
149+
private Attribute buildRegexAttribute(String key, String value, boolean negated, List<Pair<Integer, String>> varGroups) {
153150
boolean isRegexp = false;
154151
for (int i = 1; i < value.length() - 1; ++i) {
155152
char chr = value.charAt(i);
@@ -163,9 +160,9 @@ private Attribute buildRegexAttribute(String key, String value, boolean negated)
163160
return new Attribute(key,
164161
Pattern.compile(patternContent),
165162
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
166-
negated);
163+
negated, varGroups);
167164
} else {
168-
return new Attribute(key, patternContent, patternContent, negated);
165+
return new Attribute(key, patternContent, patternContent, negated, varGroups);
169166
}
170167
}
171168

@@ -233,7 +230,9 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
233230
// }
234231
// System.out.println(nodeValue);
235232

233+
// TODO: check varGroups here
236234
boolean matches = checkMatch(attr, ignoreCase, nodeValue);
235+
237236
if (!matches) {
238237
// System.out.println("doesn't match");
239238
// System.out.println("");
@@ -404,6 +403,7 @@ private static class NodeMatcher extends SemgrexMatcher {
404403
private SemgrexMatcher childMatcher;
405404
private boolean matchedOnce = false;
406405
private boolean committedVariables = false;
406+
private VariableStrings localVariableStrings = null;
407407

408408
private String nextMatchReln = null;
409409
private SemanticGraphEdge nextMatchEdge = null;
@@ -413,7 +413,7 @@ private static class NodeMatcher extends SemgrexMatcher {
413413
private boolean relnNamedFirst = false;
414414
private boolean edgeNamedFirst = false;
415415

416-
private boolean ignoreCase = false;
416+
private final boolean ignoreCase;
417417

418418
// universal: childMatcher is null if and only if
419419
// myNode.child == null OR resetChild has never been called
@@ -470,7 +470,8 @@ private void goToNextNodeMatch() {
470470
decommitNamedNodes();
471471
decommitNamedRelations();
472472
finished = true;
473-
Matcher m = null;
473+
VariableStrings tempVariableStrings = new VariableStrings();
474+
474475
while (nodeMatchCandidateIterator.hasNext()) {
475476
if (myNode.reln.getName() != null) {
476477
String foundReln = namesToRelations.get(myNode.reln.getName());
@@ -508,47 +509,23 @@ private void goToNextNodeMatch() {
508509
}
509510
}
510511
} else {
512+
// TODO: pass in all varstrings and local varstrings
511513
boolean found = myNode.nodeAttrMatch(nextMatch,
512514
hyp ? sg : sg_aligned,
513515
ignoreCase);
514516
if (found) {
515-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
516-
// if variables have been captured from a regex, they
517-
// must match any previous matchings
518-
String thisVariable = varGroup.second();
519-
String thisVarString = variableStrings.getString(thisVariable);
520-
if (thisVarString != null &&
521-
!thisVarString.equals(m.group(varGroup.first()))) {
522-
// failed to match a variable
523-
found = false;
524-
break;
525-
}
526-
}
527-
528517
// nodeAttrMatch already checks negDesc, so no need to
529518
// check for that here
530519
finished = false;
531520
break;
532521
}
533522
}
534523
} else { // try to match the description pattern.
524+
// TODO: pass in all varstrings and local varstrings
535525
boolean found = myNode.nodeAttrMatch(nextMatch,
536526
hyp ? sg : sg_aligned,
537527
ignoreCase);
538528
if (found) {
539-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
540-
// if variables have been captured from a regex, they
541-
// must match any previous matchings
542-
String thisVariable = varGroup.second();
543-
String thisVarString = variableStrings.getString(thisVariable);
544-
if (thisVarString != null &&
545-
!thisVarString.equals(m.group(varGroup.first()))) {
546-
// failed to match a variable
547-
found = false;
548-
break;
549-
}
550-
}
551-
552529
// nodeAttrMatch already checks negDesc, so no need to
553530
// check for that here
554531
finished = false;
@@ -582,30 +559,23 @@ private void goToNextNodeMatch() {
582559
// TODO FIXME: this would need to read all of the matchers used
583560
// (eg, from the various attributes)
584561
// and commit all of them
585-
commitVariableGroups(m); // commit my variable groups.
562+
commitVariableGroups(tempVariableStrings); // commit my variable groups.
586563
}
587564
// finished is false exiting this if and only if nextChild exists
588565
// and has a label or backreference that matches
589566
// (also it will just have been reset)
590567
}
591568

592-
private void commitVariableGroups(Matcher m) {
569+
private void commitVariableGroups(VariableStrings tempVariableStrings) {
593570
committedVariables = true; // commit all my variable groups.
594-
if (myNode.variableGroups.size() > 0) {
595-
System.out.println(myNode.variableGroups);
596-
}
597-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
598-
System.out.println(varGroup);
599-
String thisVarString = m.group(varGroup.first());
600-
variableStrings.setVar(varGroup.second(), thisVarString);
601-
}
571+
localVariableStrings = tempVariableStrings;
572+
variableStrings.setVars(tempVariableStrings);
602573
}
603574

604575
private void decommitVariableGroups() {
605576
if (committedVariables) {
606-
for (Pair<Integer, String> varGroup : myNode.variableGroups) {
607-
variableStrings.unsetVar(varGroup.second());
608-
}
577+
variableStrings.unsetVars(localVariableStrings);
578+
localVariableStrings = null;
609579
}
610580
committedVariables = false;
611581
}

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -572,13 +572,14 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
572572
throw new Error("Missing return statement in function");
573573
}
574574

575-
final public void AddAttribute(NodeAttributes attributes, List<Pair<Integer,String>> varGroups) throws ParseException {Token attr = null;
575+
final public void AddAttribute(NodeAttributes attributes) throws ParseException {Token attr = null;
576576
Token key = null;
577577
Token value = null;
578578
Token attrType = null;
579579
boolean negated = false;
580580
Token groupNum;
581581
Token groupVar;
582+
List<Pair<Integer,String>> varGroups = new ArrayList<Pair<Integer,String>>();
582583
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
583584
case UNIQ:
584585
case IDENTIFIER:{
@@ -615,10 +616,6 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
615616
jj_consume_token(-1);
616617
throw new ParseException();
617618
}
618-
if (attr != null && value != null) {
619-
negated = attrType.image.equals("!:");
620-
attributes.setAttribute(attr.image, value.image, negated);
621-
}
622619
label_7:
623620
while (true) {
624621
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
@@ -637,6 +634,10 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
637634
// TODO: this should have been NUMBER, but that doesn't seem to exist
638635
varGroups.add(new Pair<Integer,String>(Integer.parseInt(groupNum.image),groupVar.image));
639636
}
637+
if (attr != null && value != null) {
638+
negated = attrType.image.equals("!:");
639+
attributes.setAttribute(attr.image, value.image, negated, varGroups);
640+
}
640641
break;
641642
}
642643
case 26:{
@@ -785,14 +786,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
785786
boolean link = false;
786787
NodeAttributes attributes = new NodeAttributes();
787788
NodePattern pat;
788-
List<Pair<Integer,String>> varGroups = new ArrayList<Pair<Integer,String>>();
789789
jj_consume_token(29);
790790
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
791791
case UNIQ:
792792
case IDENTIFIER:
793793
case EMPTY:
794794
case ROOT:{
795-
AddAttribute(attributes, varGroups);
795+
AddAttribute(attributes);
796796
label_9:
797797
while (true) {
798798
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
@@ -805,7 +805,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
805805
break label_9;
806806
}
807807
jj_consume_token(27);
808-
AddAttribute(attributes, varGroups);
808+
AddAttribute(attributes);
809809
}
810810
break;
811811
}
@@ -833,7 +833,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
833833
jj_la1[39] = jj_gen;
834834
;
835835
}
836-
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null, varGroups);
836+
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
837837
{if ("" != null) return pat;}
838838
throw new Error("Missing return statement in function");
839839
}

0 commit comments

Comments
 (0)