Skip to content

Commit f82939c

Browse files
committed
feat(spark): add spark expression column
1 parent 87d58d7 commit f82939c

File tree

11 files changed

+5232
-4803
lines changed

11 files changed

+5232
-4803
lines changed

src/grammar/hive/HiveSqlParser.g4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License. You may obtain a copy of the
66
License at
7-
7+
88
http://www.apache.org/licenses/LICENSE-2.0
9-
9+
1010
Unless required by applicable law or agreed to in writing, software distributed under the License
1111
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
1212
implied. See the License for the specific language governing permissions and limitations under the

src/grammar/spark/SparkSqlParser.g4

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,10 @@ columnName
410410
| {this.shouldMatchEmpty()}?
411411
;
412412

413+
columnNamePath
414+
: multipartIdentifier
415+
;
416+
413417
columnNameSeq
414418
: columnName (COMMA columnName)*
415419
;
@@ -424,11 +428,23 @@ identifierReference
424428
;
425429

426430
queryOrganization
427-
: (KW_ORDER KW_BY order+=sortItem (COMMA order+=sortItem)*)? (
428-
KW_CLUSTER KW_BY clusterBy+=expression (COMMA clusterBy+=expression)*
429-
)? (KW_DISTRIBUTE KW_BY distributeBy+=expression (COMMA distributeBy+=expression)*)? (
430-
KW_SORT KW_BY sort+=sortItem (COMMA sort+=sortItem)*
431-
)? windowClause? (KW_LIMIT (KW_ALL | limit=expression))? (KW_OFFSET offset=expression)?
431+
: (KW_ORDER KW_BY orderOrSortByClause)? (KW_CLUSTER KW_BY clusterOrDistributeBy)? (
432+
KW_DISTRIBUTE KW_BY clusterOrDistributeBy
433+
)? (KW_SORT KW_BY orderOrSortByClause)? windowClause? limitClause? (
434+
KW_OFFSET offset=expression
435+
)?
436+
;
437+
438+
limitClause
439+
: KW_LIMIT (KW_ALL | limit=expression)
440+
;
441+
442+
orderOrSortByClause
443+
: sortItem (COMMA sortItem)*
444+
;
445+
446+
clusterOrDistributeBy
447+
: expression (COMMA expression)*
432448
;
433449

434450
queryTerm
@@ -722,11 +738,7 @@ tableArgumentPartitioning
722738
| partition+=expression
723739
)
724740
)
725-
) (
726-
(KW_ORDER | KW_SORT) KW_BY (
727-
((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) | sortItem)
728-
)
729-
)?
741+
) ((KW_ORDER | KW_SORT) KW_BY ( ((LEFT_PAREN orderOrSortByClause RIGHT_PAREN) | sortItem)))?
730742
;
731743

732744
functionTableNamedArgumentExpression
@@ -906,7 +918,7 @@ primaryExpression
906918
| identifier ARROW expression
907919
| LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression
908920
| value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET
909-
| identifier
921+
| columnNamePath
910922
| base=primaryExpression DOT fieldName=identifier
911923
| LEFT_PAREN expression RIGHT_PAREN
912924
| KW_EXTRACT LEFT_PAREN field=identifier KW_FROM source=valueExpression RIGHT_PAREN
@@ -1161,7 +1173,7 @@ windowSpec
11611173
(KW_PARTITION | KW_DISTRIBUTE) KW_BY partition+=expression (
11621174
COMMA partition+=expression
11631175
)*
1164-
)? ((KW_ORDER | KW_SORT) KW_BY sortItem (COMMA sortItem)*)?
1176+
)? ((KW_ORDER | KW_SORT) KW_BY orderOrSortByClause)?
11651177
) windowFrame? RIGHT_PAREN
11661178
;
11671179

src/lib/spark/SparkSqlParser.interp

Lines changed: 5 additions & 1 deletion
Large diffs are not rendered by default.

src/lib/spark/SparkSqlParser.ts

Lines changed: 4940 additions & 4786 deletions
Large diffs are not rendered by default.

src/lib/spark/SparkSqlParserListener.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,14 @@ import { TableNameContext } from "./SparkSqlParser.js";
139139
import { ViewNameCreateContext } from "./SparkSqlParser.js";
140140
import { ViewNameContext } from "./SparkSqlParser.js";
141141
import { ColumnNameContext } from "./SparkSqlParser.js";
142+
import { ColumnNamePathContext } from "./SparkSqlParser.js";
142143
import { ColumnNameSeqContext } from "./SparkSqlParser.js";
143144
import { ColumnNameCreateContext } from "./SparkSqlParser.js";
144145
import { IdentifierReferenceContext } from "./SparkSqlParser.js";
145146
import { QueryOrganizationContext } from "./SparkSqlParser.js";
147+
import { LimitClauseContext } from "./SparkSqlParser.js";
148+
import { OrderOrSortByClauseContext } from "./SparkSqlParser.js";
149+
import { ClusterOrDistributeByContext } from "./SparkSqlParser.js";
146150
import { QueryTermContext } from "./SparkSqlParser.js";
147151
import { QueryPrimaryContext } from "./SparkSqlParser.js";
148152
import { SortItemContext } from "./SparkSqlParser.js";
@@ -1778,6 +1782,16 @@ export class SparkSqlParserListener implements ParseTreeListener {
17781782
* @param ctx the parse tree
17791783
*/
17801784
exitColumnName?: (ctx: ColumnNameContext) => void;
1785+
/**
1786+
* Enter a parse tree produced by `SparkSqlParser.columnNamePath`.
1787+
* @param ctx the parse tree
1788+
*/
1789+
enterColumnNamePath?: (ctx: ColumnNamePathContext) => void;
1790+
/**
1791+
* Exit a parse tree produced by `SparkSqlParser.columnNamePath`.
1792+
* @param ctx the parse tree
1793+
*/
1794+
exitColumnNamePath?: (ctx: ColumnNamePathContext) => void;
17811795
/**
17821796
* Enter a parse tree produced by `SparkSqlParser.columnNameSeq`.
17831797
* @param ctx the parse tree
@@ -1818,6 +1832,36 @@ export class SparkSqlParserListener implements ParseTreeListener {
18181832
* @param ctx the parse tree
18191833
*/
18201834
exitQueryOrganization?: (ctx: QueryOrganizationContext) => void;
1835+
/**
1836+
* Enter a parse tree produced by `SparkSqlParser.limitClause`.
1837+
* @param ctx the parse tree
1838+
*/
1839+
enterLimitClause?: (ctx: LimitClauseContext) => void;
1840+
/**
1841+
* Exit a parse tree produced by `SparkSqlParser.limitClause`.
1842+
* @param ctx the parse tree
1843+
*/
1844+
exitLimitClause?: (ctx: LimitClauseContext) => void;
1845+
/**
1846+
* Enter a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
1847+
* @param ctx the parse tree
1848+
*/
1849+
enterOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => void;
1850+
/**
1851+
* Exit a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
1852+
* @param ctx the parse tree
1853+
*/
1854+
exitOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => void;
1855+
/**
1856+
* Enter a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
1857+
* @param ctx the parse tree
1858+
*/
1859+
enterClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => void;
1860+
/**
1861+
* Exit a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
1862+
* @param ctx the parse tree
1863+
*/
1864+
exitClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => void;
18211865
/**
18221866
* Enter a parse tree produced by `SparkSqlParser.queryTerm`.
18231867
* @param ctx the parse tree

src/lib/spark/SparkSqlParserVisitor.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,14 @@ import { TableNameContext } from "./SparkSqlParser.js";
139139
import { ViewNameCreateContext } from "./SparkSqlParser.js";
140140
import { ViewNameContext } from "./SparkSqlParser.js";
141141
import { ColumnNameContext } from "./SparkSqlParser.js";
142+
import { ColumnNamePathContext } from "./SparkSqlParser.js";
142143
import { ColumnNameSeqContext } from "./SparkSqlParser.js";
143144
import { ColumnNameCreateContext } from "./SparkSqlParser.js";
144145
import { IdentifierReferenceContext } from "./SparkSqlParser.js";
145146
import { QueryOrganizationContext } from "./SparkSqlParser.js";
147+
import { LimitClauseContext } from "./SparkSqlParser.js";
148+
import { OrderOrSortByClauseContext } from "./SparkSqlParser.js";
149+
import { ClusterOrDistributeByContext } from "./SparkSqlParser.js";
146150
import { QueryTermContext } from "./SparkSqlParser.js";
147151
import { QueryPrimaryContext } from "./SparkSqlParser.js";
148152
import { SortItemContext } from "./SparkSqlParser.js";
@@ -1161,6 +1165,12 @@ export class SparkSqlParserVisitor<Result> extends AbstractParseTreeVisitor<Resu
11611165
* @return the visitor result
11621166
*/
11631167
visitColumnName?: (ctx: ColumnNameContext) => Result;
1168+
/**
1169+
* Visit a parse tree produced by `SparkSqlParser.columnNamePath`.
1170+
* @param ctx the parse tree
1171+
* @return the visitor result
1172+
*/
1173+
visitColumnNamePath?: (ctx: ColumnNamePathContext) => Result;
11641174
/**
11651175
* Visit a parse tree produced by `SparkSqlParser.columnNameSeq`.
11661176
* @param ctx the parse tree
@@ -1185,6 +1195,24 @@ export class SparkSqlParserVisitor<Result> extends AbstractParseTreeVisitor<Resu
11851195
* @return the visitor result
11861196
*/
11871197
visitQueryOrganization?: (ctx: QueryOrganizationContext) => Result;
1198+
/**
1199+
* Visit a parse tree produced by `SparkSqlParser.limitClause`.
1200+
* @param ctx the parse tree
1201+
* @return the visitor result
1202+
*/
1203+
visitLimitClause?: (ctx: LimitClauseContext) => Result;
1204+
/**
1205+
* Visit a parse tree produced by `SparkSqlParser.orderOrSortByClause`.
1206+
* @param ctx the parse tree
1207+
* @return the visitor result
1208+
*/
1209+
visitOrderOrSortByClause?: (ctx: OrderOrSortByClauseContext) => Result;
1210+
/**
1211+
* Visit a parse tree produced by `SparkSqlParser.clusterOrDistributeBy`.
1212+
* @param ctx the parse tree
1213+
* @return the visitor result
1214+
*/
1215+
visitClusterOrDistributeBy?: (ctx: ClusterOrDistributeByContext) => Result;
11881216
/**
11891217
* Visit a parse tree produced by `SparkSqlParser.queryTerm`.
11901218
* @param ctx the parse tree

src/parser/spark/index.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
3939
SparkSqlParser.RULE_functionName,
4040
SparkSqlParser.RULE_functionNameCreate,
4141
SparkSqlParser.RULE_columnName,
42+
SparkSqlParser.RULE_columnNamePath,
4243
SparkSqlParser.RULE_columnNameCreate,
4344
]);
4445

@@ -122,6 +123,23 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
122123
syntaxContextType = EntityContextType.COLUMN_CREATE;
123124
break;
124125
}
126+
case SparkSqlParser.RULE_columnNamePath: {
127+
if (
128+
candidateRule.ruleList.includes(SparkSqlParser.RULE_whenClause) ||
129+
candidateRule.ruleList.includes(SparkSqlParser.RULE_whereClause) ||
130+
candidateRule.ruleList.includes(SparkSqlParser.RULE_joinRelation) ||
131+
candidateRule.ruleList.includes(SparkSqlParser.RULE_orderOrSortByClause) ||
132+
candidateRule.ruleList.includes(SparkSqlParser.RULE_groupByClause) ||
133+
candidateRule.ruleList.includes(SparkSqlParser.RULE_aggregationClause) ||
134+
candidateRule.ruleList.includes(SparkSqlParser.RULE_havingClause) ||
135+
candidateRule.ruleList.includes(SparkSqlParser.RULE_windowClause) ||
136+
candidateRule.ruleList.includes(SparkSqlParser.RULE_selectClause) ||
137+
candidateRule.ruleList.includes(SparkSqlParser.RULE_limitClause) ||
138+
candidateRule.ruleList.includes(SparkSqlParser.RULE_clusterOrDistributeBy)
139+
) {
140+
syntaxContextType = EntityContextType.COLUMN;
141+
}
142+
}
125143
default:
126144
break;
127145
}

src/parser/spark/sparkErrorListener.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export class SparkErrorListener extends ParseErrorListener {
1414
[SparkSqlParser.RULE_functionName, 'function'],
1515
[SparkSqlParser.RULE_functionNameCreate, 'function'],
1616
[SparkSqlParser.RULE_columnName, 'column'],
17+
[SparkSqlParser.RULE_columnNamePath, 'column'],
1718
[SparkSqlParser.RULE_columnNameCreate, 'column'],
1819
]);
1920

@@ -56,7 +57,8 @@ export class SparkErrorListener extends ParseErrorListener {
5657
case SparkSqlParser.RULE_tableName:
5758
case SparkSqlParser.RULE_viewName:
5859
case SparkSqlParser.RULE_functionName:
59-
case SparkSqlParser.RULE_columnName: {
60+
case SparkSqlParser.RULE_columnName:
61+
case SparkSqlParser.RULE_columnNamePath: {
6062
result.push(`{existing}${name}`);
6163
break;
6264
}

test/parser/spark/errorListener.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const randomText = `dhsdansdnkla ndjnsla ndnalks`;
44
const sql1 = `ALTER VIEW`;
55
const sql2 = `SELECT * FROM `;
66
const sql3 = `DROP SCHEMA aaa aaa`;
7+
const sql4 = `SELECT name, age FROM person ORDER BY length( `;
78

89
describe('SparkSQL validate invalid sql and test msg', () => {
910
const spark = new SparkSQL();
@@ -38,6 +39,14 @@ describe('SparkSQL validate invalid sql and test msg', () => {
3839
);
3940
});
4041

42+
test('validate unComplete sql4', () => {
43+
const errors = spark.validate(sql4);
44+
expect(errors.length).toBe(1);
45+
expect(errors[0].message).toBe(
46+
`Statement is incomplete, expecting an existing function or an existing column or a keyword`
47+
);
48+
});
49+
4150
test('validate random text cn', () => {
4251
spark.locale = 'zh_CN';
4352
const errors = spark.validate(randomText);
@@ -64,4 +73,12 @@ describe('SparkSQL validate invalid sql and test msg', () => {
6473
expect(errors.length).toBe(1);
6574
expect(errors[0].message).toBe(`'aaa' 在此位置无效,期望一个存在的namespace或者一个关键字`);
6675
});
76+
77+
test('validate unComplete sql4', () => {
78+
const errors = spark.validate(sql4);
79+
expect(errors.length).toBe(1);
80+
expect(errors[0].message).toBe(
81+
`语句不完整,期望一个存在的function或者一个存在的column或者一个关键字`
82+
);
83+
});
6784
});

test/parser/spark/suggestion/fixtures/syntaxSuggestion.sql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,14 @@ OPTIMIZE db.tb;
6464

6565
OPTIMIZE db.tb ZORDER BY ;
6666

67-
OPTIMIZE db.tb ZORDER BY name, i;
67+
OPTIMIZE db.tb ZORDER BY name, i;
68+
69+
SELECT name, age FROM person ORDER BY length(age) LIMIT length(name);
70+
71+
SELECT id, CASE id WHEN 100 then 'bigger' WHEN id > 300 THEN '300' ELSE 'small' END FROM person;
72+
73+
INSERT OVERWRITE students PARTITION (student_id = 222222) SELECT name, address FROM persons WHERE name = "Dora Williams";
74+
75+
SELECT id, name, employee.deptno, deptname FROM employee FULL JOIN department ON employee.deptno = department.deptno;
76+
77+
SELECT city, sum(quantity) AS sum FROM dealer GROUP BY sum(city) HAVING max(quantity) > 15;

0 commit comments

Comments
 (0)