@@ -40,25 +40,32 @@ public class SplitWord extends SsurgeonEdit {
40
40
41
41
final String node ;
42
42
final List <Pattern > nodeRegex ;
43
+ final List <String > exactPieces ;
43
44
final int headIndex ;
44
45
final GrammaticalRelation relation ;
45
46
final Map <Integer , String > nodeNames ;
46
47
47
- public SplitWord (String node , List <String > nodeRegex , Integer headIndex , GrammaticalRelation relation , String nodeNames ) {
48
+ public SplitWord (String node , List <String > nodePieces , Integer headIndex , GrammaticalRelation relation , String nodeNames , boolean exactSplit ) {
48
49
if (node == null ) {
49
50
throw new SsurgeonParseException ("SplitWord expected -node with the name of the matched node to split" );
50
51
}
51
52
this .node = node ;
52
53
53
- if (nodeRegex == null || nodeRegex .size () == 0 ) {
54
- throw new SsurgeonParseException ("SplitWord expected -regex with regex to determine which pieces to split the word into" );
54
+ if (nodePieces == null || nodePieces .size () == 0 ) {
55
+ throw new SsurgeonParseException ("SplitWord expected -exact or - regex with regex to determine which pieces to split the word into" );
55
56
}
56
- if (nodeRegex .size () == 1 ) {
57
- throw new SsurgeonParseException ("SplitWord expected at least two -regex" );
57
+ if (nodePieces .size () == 1 ) {
58
+ throw new SsurgeonParseException ("SplitWord expected at least two -exact or - regex" );
58
59
}
59
- this .nodeRegex = new ArrayList <>();
60
- for (int i = 0 ; i < nodeRegex .size (); ++i ) {
61
- this .nodeRegex .add (Pattern .compile (nodeRegex .get (i )));
60
+ if (exactSplit ) {
61
+ this .exactPieces = new ArrayList <>(nodePieces );
62
+ this .nodeRegex = null ;
63
+ } else {
64
+ this .nodeRegex = new ArrayList <>();
65
+ for (int i = 0 ; i < nodePieces .size (); ++i ) {
66
+ this .nodeRegex .add (Pattern .compile (nodePieces .get (i )));
67
+ }
68
+ this .exactPieces = null ;
62
69
}
63
70
64
71
if (headIndex == null ) {
@@ -80,7 +87,7 @@ public SplitWord(String node, List<String> nodeRegex, Integer headIndex, Grammat
80
87
throw new SsurgeonParseException ("SplitWord got a -name parameter which did not have a number for one of the names. Should look like 0=foo,1=bar" );
81
88
}
82
89
int idx = Integer .valueOf (pieces [0 ]);
83
- if (idx >= this . nodeRegex .size ()) {
90
+ if (idx >= nodePieces .size ()) {
84
91
throw new SsurgeonParseException ("SplitWord got an index in -name which was larger than the largest possible split piece, " + idx + " (this is 0-indexed)" );
85
92
}
86
93
this .nodeNames .put (idx , pieces [1 ]);
@@ -96,8 +103,14 @@ public String toEditString() {
96
103
buf .write (LABEL );
97
104
buf .write ("\t " );
98
105
buf .write ("-node " + node + "\t " );
99
- for (Pattern regex : nodeRegex ) {
100
- buf .write ("-regex " + regex + "\t " );
106
+ if (nodeRegex != null ) {
107
+ for (Pattern regex : nodeRegex ) {
108
+ buf .write ("-regex " + regex + "\t " );
109
+ }
110
+ } else {
111
+ for (String piece : exactPieces ) {
112
+ buf .write ("-exact " + piece + "\t " );
113
+ }
101
114
}
102
115
buf .write ("-reln " + relation .toString () + "\t " );
103
116
buf .write ("-headIndex " + headIndex );
@@ -113,22 +126,27 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
113
126
//
114
127
// each new word created will be the concatenation of all of the
115
128
// matching groups from this pattern
116
- List <String > words = new ArrayList <>();
117
- for (int i = 0 ; i < nodeRegex .size (); ++i ) {
118
- Matcher regexMatcher = nodeRegex .get (i ).matcher (origWord );
119
- if (!regexMatcher .matches ()) {
120
- return false ;
121
- }
129
+ List <String > words ;
130
+ if (exactPieces != null ) {
131
+ words = new ArrayList <>(exactPieces );
132
+ } else {
133
+ words = new ArrayList <>();
134
+ for (int i = 0 ; i < nodeRegex .size (); ++i ) {
135
+ Matcher regexMatcher = nodeRegex .get (i ).matcher (origWord );
136
+ if (!regexMatcher .matches ()) {
137
+ return false ;
138
+ }
122
139
123
- StringBuilder newWordBuilder = new StringBuilder ();
124
- for (int j = 0 ; j < regexMatcher .groupCount (); ++j ) {
125
- newWordBuilder .append (regexMatcher .group (j +1 ));
126
- }
127
- String newWord = newWordBuilder .toString ();
128
- if (newWord .length () == 0 ) {
129
- return false ;
140
+ StringBuilder newWordBuilder = new StringBuilder ();
141
+ for (int j = 0 ; j < regexMatcher .groupCount (); ++j ) {
142
+ newWordBuilder .append (regexMatcher .group (j +1 ));
143
+ }
144
+ String newWord = newWordBuilder .toString ();
145
+ if (newWord .length () == 0 ) {
146
+ return false ;
147
+ }
148
+ words .add (newWord );
130
149
}
131
- words .add (newWord );
132
150
}
133
151
134
152
int matchedIndex = matchedNode .index ();
@@ -137,7 +155,7 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
137
155
138
156
// move all words down by nodeRegex.size() - 1
139
157
// then move the original word down by headIndex
140
- SsurgeonUtils .moveNodes (sg , sm , x -> (x > matchedIndex ), x -> x +nodeRegex .size () - 1 , true );
158
+ SsurgeonUtils .moveNodes (sg , sm , x -> (x > matchedIndex ), x -> x +words .size () - 1 , true );
141
159
// the head node has its word replaced, and its index & links need
142
160
// to be rearranged, but none of the links are added or removed
143
161
if (headIndex > 0 ) {
@@ -147,7 +165,8 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
147
165
matchedNode .setWord (words .get (headIndex ));
148
166
matchedNode .setValue (words .get (headIndex ));
149
167
150
- for (int i = 0 ; i < nodeRegex .size (); ++i ) {
168
+ // TODO: update SpaceAfter in a reasonable manner
169
+ for (int i = 0 ; i < words .size (); ++i ) {
151
170
if (i == headIndex ) {
152
171
if (nodeNames .containsKey (i )) {
153
172
sm .putNode (nodeNames .get (i ), matchedNode );
0 commit comments