12
12
13
13
14
14
public class Tokenizer {
15
- private String WHITESPACE_REGEX ;
16
- private String NUMBER_REGEX ;
17
- private String OPERATOR_REGEX ;
15
+ private final Pattern WHITESPACE_PATTERN ;
16
+ private final Pattern NUMBER_PATTERN ;
17
+ private final Pattern OPERATOR_PATTERN ;
18
18
19
- private String BLOCK_COMMENT_REGEX ;
20
- private String LINE_COMMENT_REGEX ;
19
+ private final Pattern BLOCK_COMMENT_PATTERN ;
20
+ private final Pattern LINE_COMMENT_PATTERN ;
21
21
22
- private String RESERVED_TOPLEVEL_REGEX ;
23
- private String RESERVED_NEWLINE_REGEX ;
24
- private String RESERVED_PLAIN_REGEX ;
22
+ private final Pattern RESERVED_TOPLEVEL_PATTERN ;
23
+ private final Pattern RESERVED_NEWLINE_PATTERN ;
24
+ private final Pattern RESERVED_PLAIN_PATTERN ;
25
25
26
- private String WORD_REGEX ;
27
- private String STRING_REGEX ;
26
+ private final Pattern WORD_PATTERN ;
27
+ private final Pattern STRING_PATTERN ;
28
28
29
- private String OPEN_PAREN_REGEX ;
30
- private String CLOSE_PAREN_REGEX ;
29
+ private final Pattern OPEN_PAREN_PATTERN ;
30
+ private final Pattern CLOSE_PAREN_PATTERN ;
31
31
32
- private String INDEXED_PLACEHOLDER_REGEX ;
33
- private String IDENT_NAMED_PLACEHOLDER_REGEX ;
34
- private String STRING_NAMED_PLACEHOLDER_REGEX ;
32
+ private final Pattern INDEXED_PLACEHOLDER_PATTERN ;
33
+ private final Pattern IDENT_NAMED_PLACEHOLDER_PATTERN ;
34
+ private final Pattern STRING_NAMED_PLACEHOLDER_PATTERN ;
35
35
36
36
37
37
/**
@@ -47,27 +47,28 @@ public class Tokenizer {
47
47
* {String[]} cfg.specialWordChars Special chars that can be found inside of words, like @ and #
48
48
*/
49
49
public Tokenizer (DialectConfig cfg ) {
50
- this .WHITESPACE_REGEX = "^(\\ s+)" ;
51
- this .NUMBER_REGEX = "^((-\\ s*)?[0-9]+(\\ .[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)\\ b" ;
52
- this .OPERATOR_REGEX = "^(!=|<>|==|<=|>=|!<|!>|\\ |\\ ||::|->>|=>|->|~~\\ *|~~|!~~\\ *|!~~|~\\ *|!~\\ *|!~|.)" ;
50
+ this .WHITESPACE_PATTERN = Pattern . compile ( "^(\\ s+)" ) ;
51
+ this .NUMBER_PATTERN = Pattern . compile ( "^((-\\ s*)?[0-9]+(\\ .[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)\\ b" ) ;
52
+ this .OPERATOR_PATTERN = Pattern . compile ( "^(!=|<>|==|<=|>=|!<|!>|\\ |\\ ||::|->>|=>|->|~~\\ *|~~|!~~\\ *|!~~|~\\ *|!~\\ *|!~|.)" ) ;
53
53
54
54
// this.BLOCK_COMMENT_REGEX = /^(\/\*[^]*?(?:\*\/|$))/;
55
- this .BLOCK_COMMENT_REGEX = "^(/\\ *(?s).*?(?:\\ */|$))" ;
56
- this .LINE_COMMENT_REGEX = this .createLineCommentRegex (new JSLikeList <>(cfg .lineCommentTypes ));
55
+ this .BLOCK_COMMENT_PATTERN = Pattern . compile ( "^(/\\ *(?s).*?(?:\\ */|$))" ) ;
56
+ this .LINE_COMMENT_PATTERN = Pattern . compile ( this .createLineCommentRegex (new JSLikeList <>(cfg .lineCommentTypes ) ));
57
57
58
- this .RESERVED_TOPLEVEL_REGEX = this .createReservedWordRegex (new JSLikeList <>(cfg .reservedToplevelWords ));
59
- this .RESERVED_NEWLINE_REGEX = this .createReservedWordRegex (new JSLikeList <>(cfg .reservedNewlineWords ));
60
- this .RESERVED_PLAIN_REGEX = this .createReservedWordRegex (new JSLikeList <>(cfg .reservedWords ));
58
+ this .RESERVED_TOPLEVEL_PATTERN = Pattern . compile ( this .createReservedWordRegex (new JSLikeList <>(cfg .reservedToplevelWords ) ));
59
+ this .RESERVED_NEWLINE_PATTERN = Pattern . compile ( this .createReservedWordRegex (new JSLikeList <>(cfg .reservedNewlineWords ) ));
60
+ this .RESERVED_PLAIN_PATTERN = Pattern . compile ( this .createReservedWordRegex (new JSLikeList <>(cfg .reservedWords ) ));
61
61
62
- this .WORD_REGEX = this .createWordRegex (new JSLikeList <>(cfg .specialWordChars ));
63
- this .STRING_REGEX = this .createStringRegex (new JSLikeList <>(cfg .stringTypes ));
62
+ this .WORD_PATTERN = Pattern . compile ( this .createWordRegex (new JSLikeList <>(cfg .specialWordChars ) ));
63
+ this .STRING_PATTERN = Pattern . compile ( this .createStringRegex (new JSLikeList <>(cfg .stringTypes ) ));
64
64
65
- this .OPEN_PAREN_REGEX = this .createParenRegex (new JSLikeList <>(cfg .openParens ));
66
- this .CLOSE_PAREN_REGEX = this .createParenRegex (new JSLikeList <>(cfg .closeParens ));
65
+ this .OPEN_PAREN_PATTERN = Pattern . compile ( this .createParenRegex (new JSLikeList <>(cfg .openParens ) ));
66
+ this .CLOSE_PAREN_PATTERN = Pattern . compile ( this .createParenRegex (new JSLikeList <>(cfg .closeParens ) ));
67
67
68
- this .INDEXED_PLACEHOLDER_REGEX = createPlaceholderRegex (new JSLikeList <>(cfg .indexedPlaceholderTypes ), "[0-9]*" );
69
- this .IDENT_NAMED_PLACEHOLDER_REGEX = createPlaceholderRegex (new JSLikeList <>(cfg .namedPlaceholderTypes ), "[a-zA-Z0-9._$]+" );
70
- this .STRING_NAMED_PLACEHOLDER_REGEX = createPlaceholderRegex (
68
+
69
+ this .INDEXED_PLACEHOLDER_PATTERN = createPlaceholderRegexPattern (new JSLikeList <>(cfg .indexedPlaceholderTypes ), "[0-9]*" );
70
+ this .IDENT_NAMED_PLACEHOLDER_PATTERN = createPlaceholderRegexPattern (new JSLikeList <>(cfg .namedPlaceholderTypes ), "[a-zA-Z0-9._$]+" );
71
+ this .STRING_NAMED_PLACEHOLDER_PATTERN = createPlaceholderRegexPattern (
71
72
new JSLikeList <>(cfg .namedPlaceholderTypes ),
72
73
this .createStringPattern (new JSLikeList <>(cfg .stringTypes ))
73
74
);
@@ -124,13 +125,13 @@ private static String escapeParen(String paren) {
124
125
}
125
126
}
126
127
127
- private static String createPlaceholderRegex (JSLikeList <String > types , String pattern ) {
128
+ private static Pattern createPlaceholderRegexPattern (JSLikeList <String > types , String pattern ) {
128
129
if (types .isEmpty ()) {
129
130
return null ;
130
131
}
131
132
String typesRegex = types .map (Util ::escapeRegExp ).join ("|" );
132
133
133
- return String .format ("^((?:%s)(?:%s))" , typesRegex , pattern );
134
+ return Pattern . compile ( String .format ("^((?:%s)(?:%s))" , typesRegex , pattern ) );
134
135
}
135
136
136
137
/**
@@ -174,7 +175,7 @@ private Token getWhitespaceToken(String input) {
174
175
return this .getTokenOnFirstMatch (
175
176
input ,
176
177
TokenTypes .WHITESPACE ,
177
- this .WHITESPACE_REGEX
178
+ this .WHITESPACE_PATTERN
178
179
);
179
180
}
180
181
@@ -188,39 +189,39 @@ private Token getLineCommentToken(String input) {
188
189
return this .getTokenOnFirstMatch (
189
190
input ,
190
191
TokenTypes .LINE_COMMENT ,
191
- this .LINE_COMMENT_REGEX
192
+ this .LINE_COMMENT_PATTERN
192
193
);
193
194
}
194
195
195
196
private Token getBlockCommentToken (String input ) {
196
197
return this .getTokenOnFirstMatch (
197
198
input ,
198
199
TokenTypes .BLOCK_COMMENT ,
199
- this .BLOCK_COMMENT_REGEX
200
+ this .BLOCK_COMMENT_PATTERN
200
201
);
201
202
}
202
203
203
204
private Token getStringToken (String input ) {
204
205
return this .getTokenOnFirstMatch (
205
206
input ,
206
207
TokenTypes .STRING ,
207
- this .STRING_REGEX
208
+ this .STRING_PATTERN
208
209
);
209
210
}
210
211
211
212
private Token getOpenParenToken (String input ) {
212
213
return this .getTokenOnFirstMatch (
213
214
input ,
214
215
TokenTypes .OPEN_PAREN ,
215
- this .OPEN_PAREN_REGEX
216
+ this .OPEN_PAREN_PATTERN
216
217
);
217
218
}
218
219
219
220
private Token getCloseParenToken (String input ) {
220
221
return this .getTokenOnFirstMatch (
221
222
input ,
222
223
TokenTypes .CLOSE_PAREN ,
223
- this .CLOSE_PAREN_REGEX
224
+ this .CLOSE_PAREN_PATTERN
224
225
);
225
226
}
226
227
@@ -234,28 +235,28 @@ private Token getPlaceholderToken(String input) {
234
235
private Token getIdentNamedPlaceholderToken (String input ) {
235
236
return this .getPlaceholderTokenWithKey (
236
237
input ,
237
- this .IDENT_NAMED_PLACEHOLDER_REGEX ,
238
+ this .IDENT_NAMED_PLACEHOLDER_PATTERN ,
238
239
v -> v .substring (1 )
239
240
);
240
241
}
241
242
242
243
private Token getStringNamedPlaceholderToken (String input ) {
243
244
return this .getPlaceholderTokenWithKey (
244
245
input ,
245
- this .STRING_NAMED_PLACEHOLDER_REGEX ,
246
+ this .STRING_NAMED_PLACEHOLDER_PATTERN ,
246
247
v -> this .getEscapedPlaceholderKey (v .substring (2 , v .length () - 1 ), v .substring (v .length () - 1 ))
247
248
);
248
249
}
249
250
250
251
private Token getIndexedPlaceholderToken (String input ) {
251
252
return this .getPlaceholderTokenWithKey (
252
253
input ,
253
- this .INDEXED_PLACEHOLDER_REGEX ,
254
+ this .INDEXED_PLACEHOLDER_PATTERN ,
254
255
v -> v .substring (1 )
255
256
);
256
257
}
257
258
258
- private Token getPlaceholderTokenWithKey (String input , String regex , java .util .function .Function <String , String > parseKey ) {
259
+ private Token getPlaceholderTokenWithKey (String input , Pattern regex , java .util .function .Function <String , String > parseKey ) {
259
260
Token token = this .getTokenOnFirstMatch (input , TokenTypes .PLACEHOLDER , regex );
260
261
if (token != null ) {
261
262
token .key = parseKey .apply (token .value );
@@ -272,7 +273,7 @@ private Token getNumberToken(String input) {
272
273
return this .getTokenOnFirstMatch (
273
274
input ,
274
275
TokenTypes .NUMBER ,
275
- this .NUMBER_REGEX
276
+ this .NUMBER_PATTERN
276
277
);
277
278
}
278
279
@@ -281,7 +282,7 @@ private Token getOperatorToken(String input) {
281
282
return this .getTokenOnFirstMatch (
282
283
input ,
283
284
TokenTypes .OPERATOR ,
284
- this .OPERATOR_REGEX
285
+ this .OPERATOR_PATTERN
285
286
);
286
287
}
287
288
@@ -301,48 +302,48 @@ private Token getToplevelReservedToken(String input) {
301
302
return this .getTokenOnFirstMatch (
302
303
input ,
303
304
TokenTypes .RESERVED_TOPLEVEL ,
304
- this .RESERVED_TOPLEVEL_REGEX
305
+ this .RESERVED_TOPLEVEL_PATTERN
305
306
);
306
307
}
307
308
308
309
private Token getNewlineReservedToken (String input ) {
309
310
return this .getTokenOnFirstMatch (
310
311
input ,
311
312
TokenTypes .RESERVED_NEWLINE ,
312
- this .RESERVED_NEWLINE_REGEX
313
+ this .RESERVED_NEWLINE_PATTERN
313
314
);
314
315
}
315
316
316
317
private Token getPlainReservedToken (String input ) {
317
318
return this .getTokenOnFirstMatch (
318
319
input ,
319
320
TokenTypes .RESERVED ,
320
- this .RESERVED_PLAIN_REGEX
321
+ this .RESERVED_PLAIN_PATTERN
321
322
);
322
323
}
323
324
324
325
private Token getWordToken (String input ) {
325
326
return this .getTokenOnFirstMatch (
326
327
input ,
327
328
TokenTypes .WORD ,
328
- this .WORD_REGEX
329
+ this .WORD_PATTERN
329
330
);
330
331
}
331
332
332
- private String getFirstMatch (String input , String regex ) {
333
+ private String getFirstMatch (String input , Pattern regex ) {
333
334
if (regex == null ) {
334
335
return null ;
335
336
}
336
337
337
- Matcher matcher = Pattern . compile ( regex ) .matcher (input );
338
+ Matcher matcher = regex .matcher (input );
338
339
if (matcher .find ()) {
339
340
return matcher .group ();
340
341
} else {
341
342
return null ;
342
343
}
343
344
}
344
345
345
- private Token getTokenOnFirstMatch (String input , TokenTypes type , String regex ) {
346
+ private Token getTokenOnFirstMatch (String input , TokenTypes type , Pattern regex ) {
346
347
String matches = getFirstMatch (input , regex );
347
348
348
349
if (matches != null ) {
0 commit comments