Skip to content

Commit fed0b5b

Browse files
committed
feat(spark): add spark expression column
1 parent 31719b9 commit fed0b5b

File tree

11 files changed

+5456
-5026
lines changed

11 files changed

+5456
-5026
lines changed

src/grammar/hive/HiveSqlParser.g4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License. You may obtain a copy of the
66
License at
7-
7+
88
http://www.apache.org/licenses/LICENSE-2.0
9-
9+
1010
Unless required by applicable law or agreed to in writing, software distributed under the License
1111
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
1212
implied. See the License for the specific language governing permissions and limitations under the

src/grammar/spark/SparkSqlParser.g4

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,10 @@ columnName
455455
| {this.shouldMatchEmpty()}?
456456
;
457457

458+
columnNamePath
459+
: multipartIdentifier
460+
;
461+
458462
columnNameSeq
459463
: columnName (COMMA columnName)*
460464
;
@@ -469,11 +473,23 @@ identifierReference
469473
;
470474

471475
queryOrganization
472-
: (KW_ORDER KW_BY order+=sortItem (COMMA order+=sortItem)*)? (
473-
KW_CLUSTER KW_BY clusterBy+=expression (COMMA clusterBy+=expression)*
474-
)? (KW_DISTRIBUTE KW_BY distributeBy+=expression (COMMA distributeBy+=expression)*)? (
475-
KW_SORT KW_BY sort+=sortItem (COMMA sort+=sortItem)*
476-
)? windowClause? (KW_LIMIT (KW_ALL | limit=expression))? (KW_OFFSET offset=expression)?
476+
: (KW_ORDER KW_BY orderOrSortByClause)? (KW_CLUSTER KW_BY clusterOrDistributeBy)? (
477+
KW_DISTRIBUTE KW_BY clusterOrDistributeBy
478+
)? (KW_SORT KW_BY orderOrSortByClause)? windowClause? limitClause? (
479+
KW_OFFSET offset=expression
480+
)?
481+
;
482+
483+
limitClause
484+
: KW_LIMIT (KW_ALL | limit=expression)
485+
;
486+
487+
orderOrSortByClause
488+
: sortItem (COMMA sortItem)*
489+
;
490+
491+
clusterOrDistributeBy
492+
: expression (COMMA expression)*
477493
;
478494

479495
multiInsertQueryBody
@@ -825,11 +841,7 @@ tableArgumentPartitioning
825841
| partition+=expression
826842
)
827843
)
828-
) (
829-
(KW_ORDER | KW_SORT) KW_BY (
830-
((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) | sortItem)
831-
)
832-
)?
844+
) ((KW_ORDER | KW_SORT) KW_BY ( ((LEFT_PAREN orderOrSortByClause RIGHT_PAREN) | sortItem)))?
833845
;
834846

835847
functionTableNamedArgumentExpression
@@ -1013,7 +1025,7 @@ primaryExpression
10131025
| identifier ARROW expression
10141026
| LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression
10151027
| value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET
1016-
| identifier
1028+
| columnNamePath
10171029
| base=primaryExpression DOT fieldName=identifier
10181030
| LEFT_PAREN expression RIGHT_PAREN
10191031
| KW_EXTRACT LEFT_PAREN field=identifier KW_FROM source=valueExpression RIGHT_PAREN
@@ -1286,7 +1298,7 @@ windowSpec
12861298
(KW_PARTITION | KW_DISTRIBUTE) KW_BY partition+=expression (
12871299
COMMA partition+=expression
12881300
)*
1289-
)? ((KW_ORDER | KW_SORT) KW_BY sortItem (COMMA sortItem)*)?
1301+
)? ((KW_ORDER | KW_SORT) KW_BY orderOrSortByClause)?
12901302
) windowFrame? RIGHT_PAREN
12911303
;
12921304

src/lib/spark/SparkSqlParser.interp

Lines changed: 5 additions & 1 deletion
Large diffs are not rendered by default.

src/lib/spark/SparkSqlParser.ts

Lines changed: 5164 additions & 5009 deletions
Large diffs are not rendered by default.

src/lib/spark/SparkSqlParserListener.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,14 @@ import { TableNameContext } from "./SparkSqlParser.js";
149149
import { ViewNameCreateContext } from "./SparkSqlParser.js";
150150
import { ViewNameContext } from "./SparkSqlParser.js";
151151
import { ColumnNameContext } from "./SparkSqlParser.js";
152+
import { ColumnNamePathContext } from "./SparkSqlParser.js";
152153
import { ColumnNameSeqContext } from "./SparkSqlParser.js";
153154
import { ColumnNameCreateContext } from "./SparkSqlParser.js";
154155
import { IdentifierReferenceContext } from "./SparkSqlParser.js";
155156
import { QueryOrganizationContext } from "./SparkSqlParser.js";
157+
import { LimitClauseContext } from "./SparkSqlParser.js";
158+
import { OrderOrSortByClauseContext } from "./SparkSqlParser.js";
159+
import { ClusterOrDistributeByContext } from "./SparkSqlParser.js";
156160
import { MultiInsertQueryBodyContext } from "./SparkSqlParser.js";
157161
import { QueryTermContext } from "./SparkSqlParser.js";
158162
import { QueryPrimaryContext } from "./SparkSqlParser.js";
@@ -1913,6 +1917,16 @@ export class SparkSqlParserListener implements ParseTreeListener {
19131917
* @param ctx the parse tree
19141918
*/
19151919
exitColumnName?: (ctx: ColumnNameContext) => void;
1920+
/**
1921+
* Enter a parse tree produced by `SparkSqlParser.columnNamePath`.
1922+
* @param ctx the parse tree
1923+
*/
1924+
enterColumnNamePath?: (ctx: ColumnNamePathContext) => void;
1925+
/**
1926+
* Exit a parse tree produced by `SparkSqlParser.columnNamePath`.
1927+
* @param ctx the parse tree
1928+
*/
1929+
exitColumnNamePath?: (ctx: ColumnNamePathContext) => void;
19161930
/**
19171931
* Enter a parse tree produced by `SparkSqlParser.columnNameSeq`.
19181932
* @param ctx the parse tree
@@ -1953,6 +1967,36 @@ export class SparkSqlParserListener implements ParseTreeListener {
19531967
* @param ctx the parse tree
19541968
*/
19551969
exitQueryOrganization?: (ctx: QueryOrganizationContext) => void;
1970+
/**
1971+
* Enter a parse tree produced by `SparkSqlParser.limitClause`.
1972+
* @param ctx the parse tree
1973+
*/
1974+
enterLimitClause?: (ctx: LimitClauseContext) => void;
1975+
/**
1976+
* Exit a parse tree produced by `SparkSqlParser.limitClause`.
1977+
* @param ctx the parse tree
1978+
*/
1979+
exitLimitClause?: (ctx: LimitClauseContext) => void;
1980+
/**
1981+
* Enter a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
1982+
* @param ctx the parse tree
1983+
*/
1984+
enterOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => void;
1985+
/**
1986+
* Exit a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
1987+
* @param ctx the parse tree
1988+
*/
1989+
exitOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => void;
1990+
/**
1991+
* Enter a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
1992+
* @param ctx the parse tree
1993+
*/
1994+
enterClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => void;
1995+
/**
1996+
* Exit a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
1997+
* @param ctx the parse tree
1998+
*/
1999+
exitClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => void;
19562000
/**
19572001
* Enter a parse tree produced by `SparkSqlParser.multiInsertQueryBody`.
19582002
* @param ctx the parse tree

src/lib/spark/SparkSqlParserVisitor.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,14 @@ import { TableNameContext } from "./SparkSqlParser.js";
149149
import { ViewNameCreateContext } from "./SparkSqlParser.js";
150150
import { ViewNameContext } from "./SparkSqlParser.js";
151151
import { ColumnNameContext } from "./SparkSqlParser.js";
152+
import { ColumnNamePathContext } from "./SparkSqlParser.js";
152153
import { ColumnNameSeqContext } from "./SparkSqlParser.js";
153154
import { ColumnNameCreateContext } from "./SparkSqlParser.js";
154155
import { IdentifierReferenceContext } from "./SparkSqlParser.js";
155156
import { QueryOrganizationContext } from "./SparkSqlParser.js";
157+
import { LimitClauseContext } from "./SparkSqlParser.js";
158+
import { OrderOrSortByClauseContext } from "./SparkSqlParser.js";
159+
import { ClusterOrDistributeByContext } from "./SparkSqlParser.js";
156160
import { MultiInsertQueryBodyContext } from "./SparkSqlParser.js";
157161
import { QueryTermContext } from "./SparkSqlParser.js";
158162
import { QueryPrimaryContext } from "./SparkSqlParser.js";
@@ -1255,6 +1259,12 @@ export class SparkSqlParserVisitor<Result> extends AbstractParseTreeVisitor<Resu
12551259
* @return the visitor result
12561260
*/
12571261
visitColumnName?: (ctx: ColumnNameContext) => Result;
1262+
/**
1263+
* Visit a parse tree produced by `SparkSqlParser.columnNamePath`.
1264+
* @param ctx the parse tree
1265+
* @return the visitor result
1266+
*/
1267+
visitColumnNamePath?: (ctx: ColumnNamePathContext) => Result;
12581268
/**
12591269
* Visit a parse tree produced by `SparkSqlParser.columnNameSeq`.
12601270
* @param ctx the parse tree
@@ -1279,6 +1289,24 @@ export class SparkSqlParserVisitor<Result> extends AbstractParseTreeVisitor<Resu
12791289
* @return the visitor result
12801290
*/
12811291
visitQueryOrganization?: (ctx: QueryOrganizationContext) => Result;
1292+
/**
1293+
* Visit a parse tree produced by `SparkSqlParser.limitClause`.
1294+
* @param ctx the parse tree
1295+
* @return the visitor result
1296+
*/
1297+
visitLimitClause?: (ctx: LimitClauseContext) => Result;
1298+
/**
1299+
* Visit a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
1300+
* @param ctx the parse tree
1301+
* @return the visitor result
1302+
*/
1303+
visitOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => Result;
1304+
/**
1305+
* Visit a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
1306+
* @param ctx the parse tree
1307+
* @return the visitor result
1308+
*/
1309+
visitClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => Result;
12821310
/**
12831311
* Visit a parse tree produced by `SparkSqlParser.multiInsertQueryBody`.
12841312
* @param ctx the parse tree

src/parser/spark/index.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
3131
SparkSqlParser.RULE_functionName,
3232
SparkSqlParser.RULE_functionNameCreate,
3333
SparkSqlParser.RULE_columnName,
34+
SparkSqlParser.RULE_columnNamePath,
3435
SparkSqlParser.RULE_columnNameCreate,
3536
]);
3637

@@ -105,6 +106,23 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
105106
syntaxContextType = EntityContextType.COLUMN_CREATE;
106107
break;
107108
}
109+
case SparkSqlParser.RULE_columnNamePath: {
110+
if (
111+
candidateRule.ruleList.includes(SparkSqlParser.RULE_whenClause) ||
112+
candidateRule.ruleList.includes(SparkSqlParser.RULE_whereClause) ||
113+
candidateRule.ruleList.includes(SparkSqlParser.RULE_joinRelation) ||
114+
candidateRule.ruleList.includes(SparkSqlParser.RULE_orderOrSortByClause) ||
115+
candidateRule.ruleList.includes(SparkSqlParser.RULE_groupByClause) ||
116+
candidateRule.ruleList.includes(SparkSqlParser.RULE_aggregationClause) ||
117+
candidateRule.ruleList.includes(SparkSqlParser.RULE_havingClause) ||
118+
candidateRule.ruleList.includes(SparkSqlParser.RULE_windowClause) ||
119+
candidateRule.ruleList.includes(SparkSqlParser.RULE_selectClause) ||
120+
candidateRule.ruleList.includes(SparkSqlParser.RULE_limitClause) ||
121+
candidateRule.ruleList.includes(SparkSqlParser.RULE_clusterOrDistributeBy)
122+
) {
123+
syntaxContextType = EntityContextType.COLUMN;
124+
}
125+
}
108126
default:
109127
break;
110128
}

src/parser/spark/sparkErrorListener.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export class SparkErrorListener extends ParseErrorListener {
1717
[SparkSqlParser.RULE_functionName, 'function'],
1818
[SparkSqlParser.RULE_functionNameCreate, 'function'],
1919
[SparkSqlParser.RULE_columnName, 'column'],
20+
[SparkSqlParser.RULE_columnNamePath, 'column'],
2021
[SparkSqlParser.RULE_columnNameCreate, 'column'],
2122
]);
2223

@@ -48,7 +49,8 @@ export class SparkErrorListener extends ParseErrorListener {
4849
case SparkSqlParser.RULE_tableName:
4950
case SparkSqlParser.RULE_viewName:
5051
case SparkSqlParser.RULE_functionName:
51-
case SparkSqlParser.RULE_columnName: {
52+
case SparkSqlParser.RULE_columnName:
53+
case SparkSqlParser.RULE_columnNamePath: {
5254
result.push(`{existing}${name}`);
5355
break;
5456
}

test/parser/spark/errorListener.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const randomText = `dhsdansdnkla ndjnsla ndnalks`;
44
const sql1 = `ALTER VIEW`;
55
const sql2 = `SELECT * FROM `;
66
const sql3 = `DROP SCHEMA aaa aaa`;
7+
const sql4 = `SELECT name, age FROM person ORDER BY length( `;
78

89
describe('SparkSQL validate invalid sql and test msg', () => {
910
const spark = new SparkSQL();
@@ -38,6 +39,14 @@ describe('SparkSQL validate invalid sql and test msg', () => {
3839
);
3940
});
4041

42+
test('validate unComplete sql4', () => {
43+
const errors = spark.validate(sql4);
44+
expect(errors.length).toBe(1);
45+
expect(errors[0].message).toBe(
46+
`Statement is incomplete, expecting an existing function or an existing column or a keyword`
47+
);
48+
});
49+
4150
test('validate random text cn', () => {
4251
spark.locale = 'zh_CN';
4352
const errors = spark.validate(randomText);
@@ -64,4 +73,12 @@ describe('SparkSQL validate invalid sql and test msg', () => {
6473
expect(errors.length).toBe(1);
6574
expect(errors[0].message).toBe(`'aaa' 在此位置无效,期望一个存在的namespace或者一个关键字`);
6675
});
76+
77+
test('validate unComplete sql4', () => {
78+
const errors = spark.validate(sql4);
79+
expect(errors.length).toBe(1);
80+
expect(errors[0].message).toBe(
81+
`语句不完整,期望一个存在的function或者一个存在的column或者一个关键字`
82+
);
83+
});
6784
});

test/parser/spark/suggestion/fixtures/syntaxSuggestion.sql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,14 @@ OPTIMIZE db.tb;
6464

6565
OPTIMIZE db.tb ZORDER BY ;
6666

67-
OPTIMIZE db.tb ZORDER BY name, i;
67+
OPTIMIZE db.tb ZORDER BY name, i;
68+
69+
SELECT name, age FROM person ORDER BY length(age) LIMIT length(name);
70+
71+
SELECT id, CASE id WHEN 100 then 'bigger' WHEN id > 300 THEN '300' ELSE 'small' END FROM person;
72+
73+
INSERT OVERWRITE students PARTITION (student_id = 222222) SELECT name, address FROM persons WHERE name = "Dora Williams";
74+
75+
SELECT id, name, employee.deptno, deptname FROM employee FULL JOIN department ON employee.deptno = department.deptno;
76+
77+
SELECT city, sum(quantity) AS sum FROM dealer GROUP BY sum(city) HAVING max(quantity) > 15;

0 commit comments

Comments
 (0)