@@ -47,20 +47,9 @@ public class NodePattern extends SemgrexPattern {
47
47
private final String name ;
48
48
private String descString ;
49
49
SemgrexPattern child ;
50
- // specifies the groups in a regex that are captured as
51
- // matcher-global string variables
52
- private List <Pair <Integer , String >> variableGroups ;
53
50
54
51
public NodePattern (GraphRelation r , boolean negDesc ,
55
52
NodeAttributes attrs , boolean isLink , String name ) {
56
- this (r , negDesc , attrs , isLink , name ,
57
- new ArrayList <>(0 ));
58
- }
59
-
60
- // TODO: there is no capacity for named variable groups in the parser right now
61
- public NodePattern (GraphRelation r , boolean negDesc ,
62
- NodeAttributes attrs , boolean isLink , String name ,
63
- List <Pair <Integer , String >> variableGroups ) {
64
53
this .reln = r ;
65
54
this .negDesc = negDesc ;
66
55
this .isLink = isLink ;
@@ -72,20 +61,21 @@ public NodePattern(GraphRelation r, boolean negDesc,
72
61
this .regexPartialAttributes = new ArrayList <>();
73
62
74
63
descString = "{" ;
75
- for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
64
+ for (Quadruple <String , String , Boolean , List < Pair < Integer , String >> > entry : attrs .attributes ()) {
76
65
if (!descString .equals ("{" ))
77
66
descString += ";" ;
78
67
String key = entry .first ();
79
68
String value = entry .second ();
80
69
boolean negated = entry .third ();
70
+ List <Pair <Integer , String >> varGroups = entry .fourth ();
81
71
82
72
// Add the attributes for this key
83
73
if (value .equals ("__" )) {
84
- attributes .add (new Attribute (key , true , true , negated ));
74
+ attributes .add (new Attribute (key , true , true , negated , varGroups ));
85
75
} else if (value .matches ("/.*/" )) {
86
- attributes .add (buildRegexAttribute (key , value , negated ));
76
+ attributes .add (buildRegexAttribute (key , value , negated , varGroups ));
87
77
} else { // raw description
88
- attributes .add (new Attribute (key , value , value , negated ));
78
+ attributes .add (new Attribute (key , value , value , negated , varGroups ));
89
79
}
90
80
91
81
if (negated ) {
@@ -100,6 +90,8 @@ public NodePattern(GraphRelation r, boolean negDesc,
100
90
String key = entry .second ();
101
91
String value = entry .third ();
102
92
boolean negated = entry .fourth ();
93
+ // TODO: can add varGroups, especially for the regex matches
94
+ List <Pair <Integer , String >> varGroups = Collections .emptyList ();
103
95
104
96
Class <?> clazz = AnnotationLookup .getValueType (AnnotationLookup .toCoreKey (annotation ));
105
97
boolean isMap = clazz != null && Map .class .isAssignableFrom (clazz );
@@ -115,11 +107,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
115
107
} else {
116
108
// Add the attributes for this key
117
109
if (value .equals ("__" )) {
118
- attr = new Attribute (key , true , true , negated );
110
+ attr = new Attribute (key , true , true , negated , varGroups );
119
111
} else if (value .matches ("/.*/" )) {
120
- attr = buildRegexAttribute (key , value , negated );
112
+ attr = buildRegexAttribute (key , value , negated , varGroups );
121
113
} else { // raw description
122
- attr = new Attribute (key , value , value , negated );
114
+ attr = new Attribute (key , value , value , negated , varGroups );
123
115
}
124
116
partialAttributes .add (new Pair <>(annotation , attr ));
125
117
}
@@ -148,15 +140,13 @@ public NodePattern(GraphRelation r, boolean negDesc,
148
140
this .child = null ;
149
141
this .isRoot = attrs .root ();
150
142
this .isEmpty = attrs .empty ();
151
-
152
- this .variableGroups = Collections .unmodifiableList (variableGroups );
153
143
}
154
144
155
145
/**
156
146
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
157
147
* Return an Attribute which matches this expression
158
148
*/
159
- private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
149
+ private Attribute buildRegexAttribute (String key , String value , boolean negated , List < Pair < Integer , String >> varGroups ) {
160
150
boolean isRegexp = false ;
161
151
for (int i = 1 ; i < value .length () - 1 ; ++i ) {
162
152
char chr = value .charAt (i );
@@ -170,13 +160,29 @@ private Attribute buildRegexAttribute(String key, String value, boolean negated)
170
160
return new Attribute (key ,
171
161
Pattern .compile (patternContent ),
172
162
Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
173
- negated );
163
+ negated , varGroups );
174
164
} else {
175
- return new Attribute (key , patternContent , patternContent , negated );
165
+ return new Attribute (key , patternContent , patternContent , negated , varGroups );
166
+ }
167
+ }
168
+
169
+ private static boolean checkVarMatch (String key , String matchedString ,
170
+ VariableStrings variableStrings , VariableStrings tempVariableStrings ) {
171
+ String existingString = variableStrings .getString (key );
172
+ if (existingString == null ) {
173
+ existingString = tempVariableStrings .getString (key );
174
+ }
175
+ if (existingString != null && !existingString .equals (matchedString )) {
176
+ return false ;
177
+ }
178
+ if (matchedString != null ) {
179
+ tempVariableStrings .setVar (key , matchedString );
176
180
}
181
+ return true ;
177
182
}
178
183
179
- private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ) {
184
+ private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ,
185
+ VariableStrings variableStrings , VariableStrings tempVariableStrings ) {
180
186
if (nodeValue == null ) {
181
187
// treat non-existent attributes has having matched a negated expression
182
188
// so for example, `cpos!:NUM` matches not having a cpos at all
@@ -188,14 +194,51 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue)
188
194
boolean matches ;
189
195
if (toMatch instanceof Boolean ) {
190
196
matches = ((Boolean ) toMatch );
197
+
198
+ if (matches ) {
199
+ for (Pair <Integer , String > varGroup : attr .variableGroups ) {
200
+ // TODO possibly a bug here - it is not honoring ignoreCase
201
+ String matchedString = nodeValue ;
202
+ String key = varGroup .second ();
203
+ if (!checkVarMatch (key , matchedString , variableStrings , tempVariableStrings )) {
204
+ matches = false ;
205
+ break ;
206
+ }
207
+ }
208
+ }
191
209
} else if (toMatch instanceof String ) {
192
210
if (ignoreCase ) {
193
211
matches = nodeValue .equalsIgnoreCase (toMatch .toString ());
194
212
} else {
195
213
matches = nodeValue .equals (toMatch .toString ());
196
214
}
215
+
216
+ if (matches ) {
217
+ for (Pair <Integer , String > varGroup : attr .variableGroups ) {
218
+ // TODO possibly a bug here - it is not honoring ignoreCase
219
+ String matchedString = nodeValue ;
220
+ String key = varGroup .second ();
221
+ if (!checkVarMatch (key , matchedString , variableStrings , tempVariableStrings )) {
222
+ matches = false ;
223
+ break ;
224
+ }
225
+ }
226
+ }
197
227
} else if (toMatch instanceof Pattern ) {
198
- matches = ((Pattern ) toMatch ).matcher (nodeValue ).matches ();
228
+ Matcher matcher = ((Pattern ) toMatch ).matcher (nodeValue );
229
+ if (matcher .matches ()) {
230
+ matches = true ;
231
+ for (Pair <Integer , String > varGroup : attr .variableGroups ) {
232
+ String matchedString = matcher .group (varGroup .first ());
233
+ String key = varGroup .second ();
234
+ if (!checkVarMatch (key , matchedString , variableStrings , tempVariableStrings )) {
235
+ matches = false ;
236
+ break ;
237
+ }
238
+ }
239
+ } else {
240
+ matches = false ;
241
+ }
199
242
} else {
200
243
throw new IllegalStateException ("Unknown matcher type: " + toMatch + " (of class + " + toMatch .getClass () + ")" );
201
244
}
@@ -206,7 +249,8 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue)
206
249
}
207
250
208
251
@ SuppressWarnings ("unchecked" )
209
- public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ) {
252
+ public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ,
253
+ VariableStrings variableStrings , VariableStrings tempVariableStrings ) {
210
254
// System.out.println(node.word());
211
255
if (isRoot ) {
212
256
// System.out.println("checking root");
@@ -240,7 +284,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
240
284
// }
241
285
// System.out.println(nodeValue);
242
286
243
- boolean matches = checkMatch (attr , ignoreCase , nodeValue );
287
+ boolean matches = checkMatch (attr , ignoreCase , nodeValue , variableStrings , tempVariableStrings );
288
+
244
289
if (!matches ) {
245
290
// System.out.println("doesn't match");
246
291
// System.out.println("");
@@ -266,7 +311,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
266
311
nodeValue = (value == null ) ? null : value .toString ();
267
312
}
268
313
269
- boolean matches = checkMatch (attr , ignoreCase , nodeValue );
314
+ // TODO: not connected to varGroups yet
315
+ boolean matches = checkMatch (attr , ignoreCase , nodeValue , variableStrings , tempVariableStrings );
270
316
if (!matches ) {
271
317
return negDesc ;
272
318
}
@@ -282,6 +328,7 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
282
328
throw new RuntimeException ("Can only use partial attributes with Maps... this should have been checked at creation time!" );
283
329
map = (Map ) rawmap ;
284
330
}
331
+ // TODO: check varGroups here
285
332
boolean matches = partialAttribute .checkMatches (map , ignoreCase );
286
333
if (!matches ) {
287
334
return negDesc ;
@@ -411,6 +458,7 @@ private static class NodeMatcher extends SemgrexMatcher {
411
458
private SemgrexMatcher childMatcher ;
412
459
private boolean matchedOnce = false ;
413
460
private boolean committedVariables = false ;
461
+ private VariableStrings localVariableStrings = null ;
414
462
415
463
private String nextMatchReln = null ;
416
464
private SemanticGraphEdge nextMatchEdge = null ;
@@ -420,7 +468,7 @@ private static class NodeMatcher extends SemgrexMatcher {
420
468
private boolean relnNamedFirst = false ;
421
469
private boolean edgeNamedFirst = false ;
422
470
423
- private boolean ignoreCase = false ;
471
+ private final boolean ignoreCase ;
424
472
425
473
// universal: childMatcher is null if and only if
426
474
// myNode.child == null OR resetChild has never been called
@@ -477,7 +525,8 @@ private void goToNextNodeMatch() {
477
525
decommitNamedNodes ();
478
526
decommitNamedRelations ();
479
527
finished = true ;
480
- Matcher m = null ;
528
+ VariableStrings tempVariableStrings = new VariableStrings ();
529
+
481
530
while (nodeMatchCandidateIterator .hasNext ()) {
482
531
if (myNode .reln .getName () != null ) {
483
532
String foundReln = namesToRelations .get (myNode .reln .getName ());
@@ -517,21 +566,8 @@ private void goToNextNodeMatch() {
517
566
} else {
518
567
boolean found = myNode .nodeAttrMatch (nextMatch ,
519
568
hyp ? sg : sg_aligned ,
520
- ignoreCase );
569
+ ignoreCase , variableStrings , tempVariableStrings );
521
570
if (found ) {
522
- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
523
- // if variables have been captured from a regex, they
524
- // must match any previous matchings
525
- String thisVariable = varGroup .second ();
526
- String thisVarString = variableStrings .getString (thisVariable );
527
- if (thisVarString != null &&
528
- !thisVarString .equals (m .group (varGroup .first ()))) {
529
- // failed to match a variable
530
- found = false ;
531
- break ;
532
- }
533
- }
534
-
535
571
// nodeAttrMatch already checks negDesc, so no need to
536
572
// check for that here
537
573
finished = false ;
@@ -541,21 +577,8 @@ private void goToNextNodeMatch() {
541
577
} else { // try to match the description pattern.
542
578
boolean found = myNode .nodeAttrMatch (nextMatch ,
543
579
hyp ? sg : sg_aligned ,
544
- ignoreCase );
580
+ ignoreCase , variableStrings , tempVariableStrings );
545
581
if (found ) {
546
- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
547
- // if variables have been captured from a regex, they
548
- // must match any previous matchings
549
- String thisVariable = varGroup .second ();
550
- String thisVarString = variableStrings .getString (thisVariable );
551
- if (thisVarString != null &&
552
- !thisVarString .equals (m .group (varGroup .first ()))) {
553
- // failed to match a variable
554
- found = false ;
555
- break ;
556
- }
557
- }
558
-
559
582
// nodeAttrMatch already checks negDesc, so no need to
560
583
// check for that here
561
584
finished = false ;
@@ -586,26 +609,23 @@ private void goToNextNodeMatch() {
586
609
edgeNamedFirst = true ;
587
610
namesToEdges .put (myNode .reln .getEdgeName (), nextMatchEdge );
588
611
}
589
- commitVariableGroups (m ); // commit my variable groups.
612
+ commitVariableGroups (tempVariableStrings ); // commit my variable groups.
590
613
}
591
614
// finished is false exiting this if and only if nextChild exists
592
615
// and has a label or backreference that matches
593
616
// (also it will just have been reset)
594
617
}
595
618
596
- private void commitVariableGroups (Matcher m ) {
619
+ private void commitVariableGroups (VariableStrings tempVariableStrings ) {
597
620
committedVariables = true ; // commit all my variable groups.
598
- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
599
- String thisVarString = m .group (varGroup .first ());
600
- variableStrings .setVar (varGroup .second (), thisVarString );
601
- }
621
+ localVariableStrings = tempVariableStrings ;
622
+ variableStrings .setVars (tempVariableStrings );
602
623
}
603
624
604
625
private void decommitVariableGroups () {
605
626
if (committedVariables ) {
606
- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
607
- variableStrings .unsetVar (varGroup .second ());
608
- }
627
+ variableStrings .unsetVars (localVariableStrings );
628
+ localVariableStrings = null ;
609
629
}
610
630
committedVariables = false ;
611
631
}
0 commit comments