Skip to content

Commit 6fb916d

Browse files
toverdijkmp911de
authored andcommitted
Added BasicPostgresqlSqlLexer to tokenize statements
[#468]
1 parent b33548a commit 6fb916d

File tree

6 files changed

+743
-0
lines changed

6 files changed

+743
-0
lines changed
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Copyright 2021 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.r2dbc.postgresql.util.sql;
18+
19+
import java.util.ArrayList;
20+
import java.util.Arrays;
21+
import java.util.List;
22+
23+
import static java.lang.Character.isDigit;
24+
import static java.lang.Character.isWhitespace;
25+
import static java.lang.Character.toLowerCase;
26+
27+
public class BasicPostgresqlSqlLexer {
28+
29+
private static final char[] SPECIAL_AND_OPERATOR_CHARS = {
30+
'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', '^', '&', '|', '`', '?',
31+
'(', ')', '[', ']', ',', ';', ':', '*', '.', '\'', '"'
32+
};
33+
34+
static {
35+
Arrays.sort(SPECIAL_AND_OPERATOR_CHARS);
36+
}
37+
38+
public static TokenizedSql tokenize(String sql) {
39+
List<Token> tokens = new ArrayList<>();
40+
List<TokenizedStatement> statements = new ArrayList<>();
41+
42+
int statementStartIndex = 0;
43+
int i = 0;
44+
while (i < sql.length()) {
45+
char c = sql.charAt(i);
46+
Token token = null;
47+
48+
if (isWhitespace(c)) {
49+
i++;
50+
continue;
51+
}
52+
switch (c) {
53+
case '\'': // "Standard" string constant
54+
token = getStandardQuoteToken(sql, i);
55+
break;
56+
case '\"': // Quoted Identifier
57+
token = getQuotedIdentifierToken(sql, i);
58+
break;
59+
case '-': // Possible start of double-dash comment
60+
if ((i + 1) < sql.length() && sql.charAt(i + 1) == '-') {
61+
token = getCommentToLineEndToken(sql, i);
62+
}
63+
break;
64+
case '/': // Possible start of c-style comment
65+
if ((i + 1) < sql.length() && sql.charAt(i + 1) == '*') {
66+
token = getBlockCommentToken(sql, i);
67+
}
68+
break;
69+
case '$': // Dollar-quoted constant or parameter
70+
token = getParameterOrDollarQuoteToken(sql, i);
71+
break;
72+
case ';':
73+
token = new Token(TokenType.STATEMENT_END, ";");
74+
break;
75+
default:
76+
break;
77+
}
78+
if (token == null) {
79+
if (isSpecialOrOperatorChar(c)) {
80+
token = new Token(TokenType.SPECIAL_OR_OPERATOR, Character.toString(c));//getSpecialOrOperatorToken(sql, i);
81+
} else {
82+
token = getDefaultToken(sql, i);
83+
}
84+
}
85+
86+
i += token.getValue().length();
87+
88+
if (token.getType() == TokenType.STATEMENT_END) {
89+
90+
tokens.add(token);
91+
statements.add(new TokenizedStatement(sql.substring(statementStartIndex, i), tokens));
92+
93+
tokens = new ArrayList<>();
94+
statementStartIndex = i + 1;
95+
} else {
96+
tokens.add(token);
97+
}
98+
}
99+
// If tokens is not empty, implicit statement end
100+
if (!tokens.isEmpty()) {
101+
statements.add(new TokenizedStatement(sql.substring(statementStartIndex), tokens));
102+
}
103+
104+
return new TokenizedSql(sql, statements);
105+
}
106+
107+
private static Token getDefaultToken(String sql, int beginIndex) {
108+
for (int i = beginIndex + 1; i < sql.length(); i++) {
109+
char c = sql.charAt(i);
110+
if (Character.isWhitespace(c) || isSpecialOrOperatorChar(c)) {
111+
return new Token(TokenType.DEFAULT, sql.substring(beginIndex, i));
112+
}
113+
}
114+
return new Token(TokenType.DEFAULT, sql.substring(beginIndex));
115+
}
116+
117+
private static boolean isSpecialOrOperatorChar(char c) {
118+
return Arrays.binarySearch(SPECIAL_AND_OPERATOR_CHARS, c) >= 0;
119+
}
120+
121+
private static Token getBlockCommentToken(String sql, int beginIndex) {
122+
int depth = 1;
123+
for (int i = beginIndex + 2; i < (sql.length() - 1); i++) {
124+
String biGraph = sql.substring(i, i + 2);
125+
126+
if (biGraph.equals("/*")) {
127+
depth++;
128+
i++;
129+
} else if (biGraph.equals("*/")) {
130+
depth--;
131+
i++;
132+
}
133+
if (depth == 0) {
134+
return new Token(TokenType.COMMENT, sql.substring(beginIndex, i + 1));
135+
}
136+
}
137+
throw new IllegalArgumentException("Sql cannot be parsed: unclosed block comment (comment opened at index " + beginIndex + ") in statement: " + sql);
138+
}
139+
140+
private static Token getCommentToLineEndToken(String sql, int beginIndex) {
141+
int lineEnding = sql.indexOf('\n', beginIndex);
142+
if (lineEnding == -1) {
143+
return new Token(TokenType.COMMENT, sql.substring(beginIndex));
144+
} else {
145+
return new Token(TokenType.COMMENT, sql.substring(beginIndex, lineEnding));
146+
}
147+
}
148+
149+
private static Token getDollarQuoteToken(String sql, String tag, int beginIndex) {
150+
int nextQuote = sql.indexOf(tag, beginIndex + tag.length());
151+
if (nextQuote == -1) {
152+
throw new IllegalArgumentException("Sql cannot be parsed: unclosed quote (quote opened at index " + beginIndex + ") in statement: " + sql);
153+
} else {
154+
return new Token(TokenType.STRING_CONSTANT, sql.substring(beginIndex, nextQuote + tag.length()));
155+
}
156+
}
157+
158+
private static Token getParameterToken(String sql, int beginIndex) {
159+
for (int i = beginIndex + 1; i < sql.length(); i++) {
160+
char c = sql.charAt(i);
161+
if (isWhitespace(c) || isSpecialOrOperatorChar(c)) {
162+
return new Token(TokenType.PARAMETER, sql.substring(beginIndex, i));
163+
}
164+
if (!isAsciiDigit(c)) {
165+
throw new IllegalArgumentException("Sql cannot be parsed: illegal character in parameter or dollar-quote tag: " + c);
166+
}
167+
}
168+
return new Token(TokenType.PARAMETER, sql.substring(beginIndex));
169+
}
170+
171+
private static Token getParameterOrDollarQuoteToken(String sql, int beginIndex) {
172+
char firstChar = sql.charAt(beginIndex + 1);
173+
if (firstChar == '$') {
174+
return getDollarQuoteToken(sql, "$$", beginIndex);
175+
} else if (isAsciiDigit(firstChar)) {
176+
return getParameterToken(sql, beginIndex);
177+
} else {
178+
for (int i = beginIndex + 1; i < sql.length(); i++) {
179+
char c = sql.charAt(i);
180+
if (c == '$') {
181+
return getDollarQuoteToken(sql, sql.substring(beginIndex, i + 1), beginIndex);
182+
}
183+
if (!(isAsciiLetter(c) || c == '_' || isAsciiDigit(c))) {
184+
throw new IllegalArgumentException("Sql cannot be parsed: illegal character in dollar-quote tag (quote opened at index " + beginIndex + ") in statement: " + sql);
185+
}
186+
}
187+
throw new IllegalArgumentException("Sql cannot be parsed: unclosed dollar-quote tag(quote opened at index " + beginIndex + ") in statement: " + sql);
188+
}
189+
}
190+
191+
private static Token getStandardQuoteToken(String sql, int beginIndex) {
192+
int nextQuote = sql.indexOf('\'', beginIndex + 1);
193+
if (nextQuote == -1) {
194+
throw new IllegalArgumentException("Sql cannot be parsed: unclosed quote (quote opened at index " + beginIndex + ") in statement: " + sql);
195+
} else {
196+
return new Token(TokenType.STRING_CONSTANT, sql.substring(beginIndex, nextQuote + 1));
197+
}
198+
}
199+
200+
private static Token getQuotedIdentifierToken(String sql, int beginIndex) {
201+
int nextQuote = sql.indexOf('\"', beginIndex + 1);
202+
if (nextQuote == -1) {
203+
throw new IllegalArgumentException("Sql cannot be parsed: unclosed quoted identifier (identifier opened at index " + beginIndex + ") in statement: " + sql);
204+
} else {
205+
return new Token(TokenType.QUOTED_IDENTIFIER, sql.substring(beginIndex, nextQuote + 1));
206+
}
207+
}
208+
209+
private static boolean isAsciiLetter(char c){
210+
char lower = Character.toLowerCase(c);
211+
return lower >= 'a' && lower <= 'z';
212+
}
213+
214+
private static boolean isAsciiDigit(char c){
215+
return c >= '0' && c <= '9';
216+
}
217+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright 2021 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.r2dbc.postgresql.util.sql;
18+
19+
public class Token {
20+
21+
private TokenType type;
22+
23+
private String value;
24+
25+
public Token(TokenType type, String value) {
26+
this.type = type;
27+
this.value = value;
28+
}
29+
30+
public TokenType getType() {
31+
return type;
32+
}
33+
34+
public String getValue() {
35+
return value;
36+
}
37+
38+
@Override
39+
public boolean equals(Object o) {
40+
if (this == o) {
41+
return true;
42+
}
43+
if (o == null || getClass() != o.getClass()) {
44+
return false;
45+
}
46+
47+
Token token = (Token) o;
48+
49+
if (type != token.type) {
50+
return false;
51+
}
52+
return value.equals(token.value);
53+
}
54+
55+
@Override
56+
public int hashCode() {
57+
int result = type.hashCode();
58+
result = 31 * result + value.hashCode();
59+
return result;
60+
}
61+
62+
@Override
63+
public String toString() {
64+
return "Token{" +
65+
"type=" + type +
66+
", value=" + value +
67+
'}';
68+
}
69+
70+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright 2021 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.r2dbc.postgresql.util.sql;
18+
19+
public enum TokenType {
20+
DEFAULT,
21+
STRING_CONSTANT,
22+
COMMENT,
23+
PARAMETER,
24+
QUOTED_IDENTIFIER,
25+
STATEMENT_END,
26+
SPECIAL_OR_OPERATOR
27+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright 2021 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.r2dbc.postgresql.util.sql;
18+
19+
import java.util.List;
20+
21+
public class TokenizedSql {
22+
23+
private final String sql;
24+
25+
private final List<TokenizedStatement> statements;
26+
27+
private final int statementCount;
28+
29+
private final int parameterCount;
30+
31+
public TokenizedSql(String sql, List<TokenizedStatement> statements) {
32+
this.sql = sql;
33+
this.statements = statements;
34+
this.statementCount = statements.size();
35+
this.parameterCount = statements.stream().mapToInt(TokenizedStatement::getParameterCount).sum();
36+
}
37+
38+
public List<TokenizedStatement> getStatements() {
39+
return statements;
40+
}
41+
42+
public int getStatementCount() {
43+
return this.statementCount;
44+
}
45+
46+
public int getParameterCount() {
47+
return this.parameterCount;
48+
}
49+
50+
public String getSql() {
51+
return sql;
52+
}
53+
54+
}

0 commit comments

Comments
 (0)