Skip to content

Commit f8a2ce6

Browse files
Merge pull request #25 from vatsalmevada/master
Reusing compiled regex patterns
2 parents 6228bce + f0183fc commit f8a2ce6

File tree

4 files changed

+96
-55
lines changed

4 files changed

+96
-55
lines changed

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@
5858
<version>2.5.6</version>
5959
<scope>test</scope>
6060
</dependency>
61+
62+
<dependency>
63+
<groupId>org.openjdk.jmh</groupId>
64+
<artifactId>jmh-generator-annprocess</artifactId>
65+
<version>1.19</version>
66+
<scope>test</scope>
67+
</dependency>
6168
</dependencies>
6269

6370
<build>

src/main/java/com/github/vertical_blank/sqlformatter/core/Tokenizer.java

Lines changed: 52 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,26 @@
1212

1313

1414
public class Tokenizer {
15-
private String WHITESPACE_REGEX;
16-
private String NUMBER_REGEX;
17-
private String OPERATOR_REGEX;
15+
private final Pattern WHITESPACE_PATTERN;
16+
private final Pattern NUMBER_PATTERN;
17+
private final Pattern OPERATOR_PATTERN;
1818

19-
private String BLOCK_COMMENT_REGEX;
20-
private String LINE_COMMENT_REGEX;
19+
private final Pattern BLOCK_COMMENT_PATTERN;
20+
private final Pattern LINE_COMMENT_PATTERN;
2121

22-
private String RESERVED_TOPLEVEL_REGEX;
23-
private String RESERVED_NEWLINE_REGEX;
24-
private String RESERVED_PLAIN_REGEX;
22+
private final Pattern RESERVED_TOPLEVEL_PATTERN;
23+
private final Pattern RESERVED_NEWLINE_PATTERN;
24+
private final Pattern RESERVED_PLAIN_PATTERN;
2525

26-
private String WORD_REGEX;
27-
private String STRING_REGEX;
26+
private final Pattern WORD_PATTERN;
27+
private final Pattern STRING_PATTERN;
2828

29-
private String OPEN_PAREN_REGEX;
30-
private String CLOSE_PAREN_REGEX;
29+
private final Pattern OPEN_PAREN_PATTERN;
30+
private final Pattern CLOSE_PAREN_PATTERN;
3131

32-
private String INDEXED_PLACEHOLDER_REGEX;
33-
private String IDENT_NAMED_PLACEHOLDER_REGEX;
34-
private String STRING_NAMED_PLACEHOLDER_REGEX;
32+
private final Pattern INDEXED_PLACEHOLDER_PATTERN;
33+
private final Pattern IDENT_NAMED_PLACEHOLDER_PATTERN;
34+
private final Pattern STRING_NAMED_PLACEHOLDER_PATTERN;
3535

3636

3737
/**
@@ -47,27 +47,28 @@ public class Tokenizer {
4747
* {String[]} cfg.specialWordChars Special chars that can be found inside of words, like @ and #
4848
*/
4949
public Tokenizer(DialectConfig cfg) {
50-
this.WHITESPACE_REGEX = "^(\\s+)";
51-
this.NUMBER_REGEX = "^((-\\s*)?[0-9]+(\\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)\\b";
52-
this.OPERATOR_REGEX = "^(!=|<>|==|<=|>=|!<|!>|\\|\\||::|->>|=>|->|~~\\*|~~|!~~\\*|!~~|~\\*|!~\\*|!~|.)";
50+
this.WHITESPACE_PATTERN = Pattern.compile("^(\\s+)");
51+
this.NUMBER_PATTERN = Pattern.compile("^((-\\s*)?[0-9]+(\\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)\\b");
52+
this.OPERATOR_PATTERN = Pattern.compile("^(!=|<>|==|<=|>=|!<|!>|\\|\\||::|->>|=>|->|~~\\*|~~|!~~\\*|!~~|~\\*|!~\\*|!~|.)");
5353

5454
// this.BLOCK_COMMENT_REGEX = /^(\/\*[^]*?(?:\*\/|$))/;
55-
this.BLOCK_COMMENT_REGEX = "^(/\\*(?s).*?(?:\\*/|$))";
56-
this.LINE_COMMENT_REGEX = this.createLineCommentRegex(new JSLikeList<>(cfg.lineCommentTypes));
55+
this.BLOCK_COMMENT_PATTERN = Pattern.compile("^(/\\*(?s).*?(?:\\*/|$))");
56+
this.LINE_COMMENT_PATTERN = Pattern.compile(this.createLineCommentRegex(new JSLikeList<>(cfg.lineCommentTypes)));
5757

58-
this.RESERVED_TOPLEVEL_REGEX = this.createReservedWordRegex(new JSLikeList<>(cfg.reservedToplevelWords));
59-
this.RESERVED_NEWLINE_REGEX = this.createReservedWordRegex(new JSLikeList<>(cfg.reservedNewlineWords));
60-
this.RESERVED_PLAIN_REGEX = this.createReservedWordRegex(new JSLikeList<>(cfg.reservedWords));
58+
this.RESERVED_TOPLEVEL_PATTERN = Pattern.compile(this.createReservedWordRegex(new JSLikeList<>(cfg.reservedToplevelWords)));
59+
this.RESERVED_NEWLINE_PATTERN = Pattern.compile(this.createReservedWordRegex(new JSLikeList<>(cfg.reservedNewlineWords)));
60+
this.RESERVED_PLAIN_PATTERN = Pattern.compile(this.createReservedWordRegex(new JSLikeList<>(cfg.reservedWords)));
6161

62-
this.WORD_REGEX = this.createWordRegex(new JSLikeList<>(cfg.specialWordChars));
63-
this.STRING_REGEX = this.createStringRegex(new JSLikeList<>(cfg.stringTypes));
62+
this.WORD_PATTERN = Pattern.compile(this.createWordRegex(new JSLikeList<>(cfg.specialWordChars)));
63+
this.STRING_PATTERN = Pattern.compile(this.createStringRegex(new JSLikeList<>(cfg.stringTypes)));
6464

65-
this.OPEN_PAREN_REGEX = this.createParenRegex(new JSLikeList<>(cfg.openParens));
66-
this.CLOSE_PAREN_REGEX = this.createParenRegex(new JSLikeList<>(cfg.closeParens));
65+
this.OPEN_PAREN_PATTERN = Pattern.compile(this.createParenRegex(new JSLikeList<>(cfg.openParens)));
66+
this.CLOSE_PAREN_PATTERN = Pattern.compile(this.createParenRegex(new JSLikeList<>(cfg.closeParens)));
6767

68-
this.INDEXED_PLACEHOLDER_REGEX = createPlaceholderRegex(new JSLikeList<>(cfg.indexedPlaceholderTypes), "[0-9]*");
69-
this.IDENT_NAMED_PLACEHOLDER_REGEX = createPlaceholderRegex(new JSLikeList<>(cfg.namedPlaceholderTypes), "[a-zA-Z0-9._$]+");
70-
this.STRING_NAMED_PLACEHOLDER_REGEX = createPlaceholderRegex(
68+
69+
this.INDEXED_PLACEHOLDER_PATTERN = createPlaceholderRegexPattern(new JSLikeList<>(cfg.indexedPlaceholderTypes), "[0-9]*");
70+
this.IDENT_NAMED_PLACEHOLDER_PATTERN = createPlaceholderRegexPattern(new JSLikeList<>(cfg.namedPlaceholderTypes), "[a-zA-Z0-9._$]+");
71+
this.STRING_NAMED_PLACEHOLDER_PATTERN = createPlaceholderRegexPattern(
7172
new JSLikeList<>(cfg.namedPlaceholderTypes),
7273
this.createStringPattern(new JSLikeList<>(cfg.stringTypes))
7374
);
@@ -124,13 +125,13 @@ private static String escapeParen(String paren) {
124125
}
125126
}
126127

127-
private static String createPlaceholderRegex(JSLikeList<String> types, String pattern) {
128+
private static Pattern createPlaceholderRegexPattern(JSLikeList<String> types, String pattern) {
128129
if (types.isEmpty()) {
129130
return null;
130131
}
131132
String typesRegex = types.map(Util::escapeRegExp).join("|");
132133

133-
return String.format("^((?:%s)(?:%s))", typesRegex, pattern);
134+
return Pattern.compile(String.format("^((?:%s)(?:%s))", typesRegex, pattern));
134135
}
135136

136137
/**
@@ -174,7 +175,7 @@ private Token getWhitespaceToken(String input) {
174175
return this.getTokenOnFirstMatch(
175176
input,
176177
TokenTypes.WHITESPACE,
177-
this.WHITESPACE_REGEX
178+
this.WHITESPACE_PATTERN
178179
);
179180
}
180181

@@ -188,39 +189,39 @@ private Token getLineCommentToken(String input) {
188189
return this.getTokenOnFirstMatch(
189190
input,
190191
TokenTypes.LINE_COMMENT,
191-
this.LINE_COMMENT_REGEX
192+
this.LINE_COMMENT_PATTERN
192193
);
193194
}
194195

195196
private Token getBlockCommentToken(String input) {
196197
return this.getTokenOnFirstMatch(
197198
input,
198199
TokenTypes.BLOCK_COMMENT,
199-
this.BLOCK_COMMENT_REGEX
200+
this.BLOCK_COMMENT_PATTERN
200201
);
201202
}
202203

203204
private Token getStringToken(String input) {
204205
return this.getTokenOnFirstMatch(
205206
input,
206207
TokenTypes.STRING,
207-
this.STRING_REGEX
208+
this.STRING_PATTERN
208209
);
209210
}
210211

211212
private Token getOpenParenToken(String input) {
212213
return this.getTokenOnFirstMatch(
213214
input,
214215
TokenTypes.OPEN_PAREN,
215-
this.OPEN_PAREN_REGEX
216+
this.OPEN_PAREN_PATTERN
216217
);
217218
}
218219

219220
private Token getCloseParenToken(String input) {
220221
return this.getTokenOnFirstMatch(
221222
input,
222223
TokenTypes.CLOSE_PAREN,
223-
this.CLOSE_PAREN_REGEX
224+
this.CLOSE_PAREN_PATTERN
224225
);
225226
}
226227

@@ -234,28 +235,28 @@ private Token getPlaceholderToken(String input) {
234235
private Token getIdentNamedPlaceholderToken(String input) {
235236
return this.getPlaceholderTokenWithKey(
236237
input,
237-
this.IDENT_NAMED_PLACEHOLDER_REGEX,
238+
this.IDENT_NAMED_PLACEHOLDER_PATTERN,
238239
v -> v.substring(1)
239240
);
240241
}
241242

242243
private Token getStringNamedPlaceholderToken(String input) {
243244
return this.getPlaceholderTokenWithKey(
244245
input,
245-
this.STRING_NAMED_PLACEHOLDER_REGEX,
246+
this.STRING_NAMED_PLACEHOLDER_PATTERN,
246247
v -> this.getEscapedPlaceholderKey(v.substring(2, v.length() - 1), v.substring(v.length() - 1))
247248
);
248249
}
249250

250251
private Token getIndexedPlaceholderToken(String input) {
251252
return this.getPlaceholderTokenWithKey(
252253
input,
253-
this.INDEXED_PLACEHOLDER_REGEX,
254+
this.INDEXED_PLACEHOLDER_PATTERN,
254255
v -> v.substring(1)
255256
);
256257
}
257258

258-
private Token getPlaceholderTokenWithKey(String input, String regex, java.util.function.Function<String, String> parseKey) {
259+
private Token getPlaceholderTokenWithKey(String input, Pattern regex, java.util.function.Function<String, String> parseKey) {
259260
Token token = this.getTokenOnFirstMatch(input, TokenTypes.PLACEHOLDER, regex);
260261
if (token != null) {
261262
token.key = parseKey.apply(token.value);
@@ -272,7 +273,7 @@ private Token getNumberToken(String input) {
272273
return this.getTokenOnFirstMatch(
273274
input,
274275
TokenTypes.NUMBER,
275-
this.NUMBER_REGEX
276+
this.NUMBER_PATTERN
276277
);
277278
}
278279

@@ -281,7 +282,7 @@ private Token getOperatorToken(String input) {
281282
return this.getTokenOnFirstMatch(
282283
input,
283284
TokenTypes.OPERATOR,
284-
this.OPERATOR_REGEX
285+
this.OPERATOR_PATTERN
285286
);
286287
}
287288

@@ -301,48 +302,48 @@ private Token getToplevelReservedToken(String input) {
301302
return this.getTokenOnFirstMatch(
302303
input,
303304
TokenTypes.RESERVED_TOPLEVEL,
304-
this.RESERVED_TOPLEVEL_REGEX
305+
this.RESERVED_TOPLEVEL_PATTERN
305306
);
306307
}
307308

308309
private Token getNewlineReservedToken(String input) {
309310
return this.getTokenOnFirstMatch(
310311
input,
311312
TokenTypes.RESERVED_NEWLINE,
312-
this.RESERVED_NEWLINE_REGEX
313+
this.RESERVED_NEWLINE_PATTERN
313314
);
314315
}
315316

316317
private Token getPlainReservedToken(String input) {
317318
return this.getTokenOnFirstMatch(
318319
input,
319320
TokenTypes.RESERVED,
320-
this.RESERVED_PLAIN_REGEX
321+
this.RESERVED_PLAIN_PATTERN
321322
);
322323
}
323324

324325
private Token getWordToken(String input) {
325326
return this.getTokenOnFirstMatch(
326327
input,
327328
TokenTypes.WORD,
328-
this.WORD_REGEX
329+
this.WORD_PATTERN
329330
);
330331
}
331332

332-
private String getFirstMatch(String input, String regex) {
333+
private String getFirstMatch(String input, Pattern regex) {
333334
if (regex == null) {
334335
return null;
335336
}
336337

337-
Matcher matcher = Pattern.compile(regex).matcher(input);
338+
Matcher matcher = regex.matcher(input);
338339
if (matcher.find()) {
339340
return matcher.group();
340341
} else {
341342
return null;
342343
}
343344
}
344345

345-
private Token getTokenOnFirstMatch(String input, TokenTypes type, String regex) {
346+
private Token getTokenOnFirstMatch(String input, TokenTypes type, Pattern regex) {
346347
String matches = getFirstMatch(input, regex);
347348

348349
if (matches != null) {

src/main/java/com/github/vertical_blank/sqlformatter/core/util/Util.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010

1111
public class Util {
1212

13+
private static final String ESCAPE_REGEX = Stream.of("^", "$", "\\", ".", "*", "+", "*", "?", "(", ")", "[", "]", "{", "}", "|")
14+
.map(spChr -> "(\\" + spChr + ")").collect(Collectors.joining("|"));
15+
public static final Pattern ESCAPE_REGEX_PATTERN = Pattern.compile(ESCAPE_REGEX);
16+
17+
1318
public static <T> List<T> nullToEmpty(List<T> ts) {
1419
if (ts == null) {
1520
return Collections.emptyList();
@@ -28,10 +33,7 @@ public static String trimEnd(String s) {
2833
}
2934

3035
public static String escapeRegExp(String s) {
31-
String regexp = Stream.of("^", "$", "\\", ".", "*", "+", "*", "?", "(", ")", "[", "]", "{", "}", "|")
32-
.map(spChr -> "(\\" + spChr + ")").collect(Collectors.joining("|"));
33-
34-
return Pattern.compile(regexp).matcher(s).replaceAll("\\\\$0");
36+
return ESCAPE_REGEX_PATTERN.matcher(s).replaceAll("\\\\$0");
3537
}
3638

3739
@SafeVarargs
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package com.github.vertical_blank.sqlformatter;
2+
3+
import org.openjdk.jmh.annotations.*;
4+
import org.openjdk.jmh.runner.Runner;
5+
import org.openjdk.jmh.runner.RunnerException;
6+
import org.openjdk.jmh.runner.options.Options;
7+
import org.openjdk.jmh.runner.options.OptionsBuilder;
8+
9+
import java.util.concurrent.TimeUnit;
10+
11+
@BenchmarkMode(Mode.AverageTime)
12+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
13+
@State(Scope.Benchmark)
14+
public class Benchmark {
15+
16+
public static final String SQL = "SELECT foo, bar, CASE baz WHEN 'one' THEN 1 WHEN 'two' THEN 2 ELSE 3 END FROM table";
17+
18+
public static void main(String[] args) throws RunnerException {
19+
Options opt = new OptionsBuilder()
20+
.include(Benchmark.class.getSimpleName())
21+
.forks(1)
22+
.build();
23+
24+
new Runner(opt).run();
25+
}
26+
27+
@org.openjdk.jmh.annotations.Benchmark
28+
public void format() {
29+
SqlFormatter.format(SQL);
30+
}
31+
}

0 commit comments

Comments
 (0)