Skip to content

Commit 10aca6b

Browse files
committed
feat: introduce SubstraitCreateStatementParser
Standalone processor for SQL CREATE statements
1 parent 4fe8068 commit 10aca6b

File tree

8 files changed

+191
-97
lines changed

8 files changed

+191
-97
lines changed

isthmus/src/main/java/io/substrait/isthmus/SqlConverterBase.java

Lines changed: 3 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22

33
import io.substrait.extension.SimpleExtension;
44
import io.substrait.isthmus.calcite.SubstraitTable;
5-
import io.substrait.isthmus.sql.SubstraitSqlValidator;
6-
import java.util.ArrayList;
5+
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
76
import java.util.List;
87
import org.apache.calcite.config.CalciteConnectionConfig;
98
import org.apache.calcite.config.CalciteConnectionProperty;
@@ -17,21 +16,13 @@
1716
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
1817
import org.apache.calcite.rel.metadata.ProxyingMetadataHandlerProvider;
1918
import org.apache.calcite.rel.metadata.RelMetadataQuery;
20-
import org.apache.calcite.rel.type.RelDataType;
2119
import org.apache.calcite.rel.type.RelDataTypeFactory;
2220
import org.apache.calcite.rex.RexBuilder;
2321
import org.apache.calcite.schema.Schema;
24-
import org.apache.calcite.sql.SqlNode;
25-
import org.apache.calcite.sql.SqlNodeList;
26-
import org.apache.calcite.sql.ddl.SqlColumnDeclaration;
27-
import org.apache.calcite.sql.ddl.SqlCreateTable;
28-
import org.apache.calcite.sql.ddl.SqlKeyConstraint;
2922
import org.apache.calcite.sql.parser.SqlParseException;
3023
import org.apache.calcite.sql.parser.SqlParser;
31-
import org.apache.calcite.sql.parser.SqlParserPos;
3224
import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl;
3325
import org.apache.calcite.sql.validate.SqlConformanceEnum;
34-
import org.apache.calcite.sql.validate.SqlValidator;
3526
import org.apache.calcite.sql2rel.SqlToRelConverter;
3627

3728
class SqlConverterBase {
@@ -73,10 +64,10 @@ CalciteCatalogReader registerCreateTables(List<String> tables) throws SqlParseEx
7364
CalciteSchema rootSchema = CalciteSchema.createRootSchema(false);
7465
CalciteCatalogReader catalogReader =
7566
new CalciteCatalogReader(rootSchema, List.of(), factory, config);
76-
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
7767
if (tables != null) {
7868
for (String tableDef : tables) {
79-
List<SubstraitTable> tList = parseCreateTable(factory, validator, tableDef);
69+
List<SubstraitTable> tList =
70+
SubstraitCreateStatementParser.processCreateStatements(tableDef);
8071
for (SubstraitTable t : tList) {
8172
rootSchema.add(t.getName(), t);
8273
}
@@ -93,63 +84,4 @@ CalciteCatalogReader registerSchema(String name, Schema schema) {
9384
}
9485
return new CalciteCatalogReader(rootSchema, List.of(), factory, config);
9586
}
96-
97-
protected List<SubstraitTable> parseCreateTable(
98-
RelDataTypeFactory factory, SqlValidator validator, String sql) throws SqlParseException {
99-
SqlParser parser = SqlParser.create(sql, parserConfig);
100-
List<SubstraitTable> tableList = new ArrayList<>();
101-
102-
SqlNodeList nodeList = parser.parseStmtList();
103-
for (SqlNode parsed : nodeList) {
104-
if (!(parsed instanceof SqlCreateTable)) {
105-
throw fail("Not a valid CREATE TABLE statement.");
106-
}
107-
108-
SqlCreateTable create = (SqlCreateTable) parsed;
109-
if (create.name.names.size() > 1) {
110-
throw fail("Only simple table names are allowed.", create.name.getParserPosition());
111-
}
112-
113-
if (create.query != null) {
114-
throw fail("CTAS not supported.", create.name.getParserPosition());
115-
}
116-
117-
List<String> names = new ArrayList<>();
118-
List<RelDataType> columnTypes = new ArrayList<>();
119-
120-
for (SqlNode node : create.columnList) {
121-
if (!(node instanceof SqlColumnDeclaration)) {
122-
if (node instanceof SqlKeyConstraint) {
123-
// key constraints declarations, like primary key declaration, are valid and should not
124-
// result in parse exceptions. Ignore the constraint declaration.
125-
continue;
126-
}
127-
128-
throw fail("Unexpected column list construction.", node.getParserPosition());
129-
}
130-
131-
SqlColumnDeclaration col = (SqlColumnDeclaration) node;
132-
if (col.name.names.size() != 1) {
133-
throw fail("Expected simple column names.", col.name.getParserPosition());
134-
}
135-
136-
names.add(col.name.names.get(0));
137-
columnTypes.add(col.dataType.deriveType(validator));
138-
}
139-
140-
tableList.add(
141-
new SubstraitTable(
142-
create.name.names.get(0), factory.createStructType(columnTypes, names)));
143-
}
144-
145-
return tableList;
146-
}
147-
148-
protected static SqlParseException fail(String text, SqlParserPos pos) {
149-
return new SqlParseException(text, pos, null, null, new RuntimeException("fake lineage"));
150-
}
151-
152-
protected static SqlParseException fail(String text) {
153-
return fail(text, SqlParserPos.ZERO);
154-
}
15587
}

isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import io.substrait.isthmus.calcite.SubstraitTable;
99
import io.substrait.isthmus.expression.RexExpressionConverter;
1010
import io.substrait.isthmus.expression.ScalarFunctionConverter;
11+
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
1112
import io.substrait.isthmus.sql.SubstraitSqlValidator;
1213
import io.substrait.proto.ExtendedExpression;
1314
import io.substrait.type.NamedStruct;
@@ -144,10 +145,10 @@ private Result registerCreateTablesForExtendedExpression(List<String> tables)
144145
CalciteSchema rootSchema = CalciteSchema.createRootSchema(false);
145146
CalciteCatalogReader catalogReader =
146147
new CalciteCatalogReader(rootSchema, List.of(), factory, config);
147-
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
148148
if (tables != null) {
149149
for (String tableDef : tables) {
150-
List<SubstraitTable> tList = parseCreateTable(factory, validator, tableDef);
150+
List<SubstraitTable> tList =
151+
SubstraitCreateStatementParser.processCreateStatements(tableDef);
151152
for (SubstraitTable t : tList) {
152153
rootSchema.add(t.getName(), t);
153154
for (RelDataTypeField field : t.getRowType(factory).getFieldList()) {
@@ -169,6 +170,7 @@ private Result registerCreateTablesForExtendedExpression(List<String> tables)
169170
}
170171
}
171172
}
173+
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
172174
return new Result(validator, catalogReader, nameToTypeMap, nameToNodeMap);
173175
}
174176

isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ public Plan execute(String sql, Prepare.CatalogReader catalogReader) throws SqlP
4949
}
5050

5151
// Package protected for testing
52-
List<RelRoot> sqlToRelNode(String sql, List<String> tables) throws SqlParseException {
53-
Prepare.CatalogReader catalogReader = registerCreateTables(tables);
52+
List<RelRoot> sqlToRelNode(String sql, Prepare.CatalogReader catalogReader)
53+
throws SqlParseException {
5454
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
5555
return sqlToRelNode(sql, validator, catalogReader);
5656
}
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package io.substrait.isthmus.sql;
2+
3+
import io.substrait.isthmus.SubstraitTypeSystem;
4+
import io.substrait.isthmus.calcite.SubstraitTable;
5+
import java.util.ArrayList;
6+
import java.util.Collections;
7+
import java.util.List;
8+
import org.apache.calcite.avatica.util.Casing;
9+
import org.apache.calcite.config.CalciteConnectionConfig;
10+
import org.apache.calcite.config.CalciteConnectionProperty;
11+
import org.apache.calcite.jdbc.CalciteSchema;
12+
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
13+
import org.apache.calcite.prepare.CalciteCatalogReader;
14+
import org.apache.calcite.rel.type.RelDataType;
15+
import org.apache.calcite.rel.type.RelDataTypeFactory;
16+
import org.apache.calcite.sql.SqlNode;
17+
import org.apache.calcite.sql.SqlNodeList;
18+
import org.apache.calcite.sql.ddl.SqlColumnDeclaration;
19+
import org.apache.calcite.sql.ddl.SqlCreateTable;
20+
import org.apache.calcite.sql.ddl.SqlKeyConstraint;
21+
import org.apache.calcite.sql.parser.SqlParseException;
22+
import org.apache.calcite.sql.parser.SqlParser;
23+
import org.apache.calcite.sql.parser.SqlParserPos;
24+
import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl;
25+
import org.apache.calcite.sql.validate.SqlConformanceEnum;
26+
import org.apache.calcite.sql.validate.SqlValidator;
27+
28+
/** Utility class for parsing CREATE statements into a {@link CalciteSchema} */
29+
public class SubstraitCreateStatementParser {
30+
31+
private static final RelDataTypeFactory TYPE_FACTORY =
32+
new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM);
33+
34+
private static final CalciteConnectionConfig CONNECTION_CONFIG =
35+
CalciteConnectionConfig.DEFAULT.set(
36+
CalciteConnectionProperty.CASE_SENSITIVE, Boolean.FALSE.toString());
37+
38+
private static final SqlParser.Config PARSER_CONFIG =
39+
SqlParser.config()
40+
// To process CREATE statements we must use the SqlDdlParserImpl, as the default
41+
// parser does not handle them
42+
.withParserFactory(SqlDdlParserImpl.FACTORY)
43+
.withUnquotedCasing(Casing.TO_UPPER)
44+
.withConformance(SqlConformanceEnum.LENIENT);
45+
46+
private static final CalciteCatalogReader EMPTY_CATALOG =
47+
new CalciteCatalogReader(
48+
CalciteSchema.createRootSchema(false), List.of(), TYPE_FACTORY, CONNECTION_CONFIG);
49+
50+
// A validator is needed to convert the types in column declarations to Calcite types
51+
private static final SqlValidator VALIDATOR =
52+
new SubstraitSqlValidator(
53+
// as we are validating CREATE statements, an empty catalog suffices
54+
EMPTY_CATALOG);
55+
56+
/**
57+
* @param createStatements a SQL string containing only CREATE statements
58+
* @return a list of {@link SubstraitTable}s generated from the CREATE statements
59+
* @throws SqlParseException
60+
*/
61+
public static List<SubstraitTable> processCreateStatements(String createStatements)
62+
throws SqlParseException {
63+
SqlParser parser = SqlParser.create(createStatements, PARSER_CONFIG);
64+
List<SubstraitTable> tableList = new ArrayList<>();
65+
66+
SqlNodeList sqlNode = parser.parseStmtList();
67+
for (SqlNode parsed : sqlNode) {
68+
if (!(parsed instanceof SqlCreateTable create)) {
69+
throw fail("Not a valid CREATE TABLE statement.");
70+
}
71+
72+
if (create.name.names.size() > 1) {
73+
throw fail("Only simple table names are allowed.", create.name.getParserPosition());
74+
}
75+
76+
if (create.query != null) {
77+
throw fail("CTAS not supported.", create.name.getParserPosition());
78+
}
79+
80+
List<String> names = new ArrayList<>();
81+
List<RelDataType> columnTypes = new ArrayList<>();
82+
83+
for (SqlNode node : create.columnList) {
84+
if (!(node instanceof SqlColumnDeclaration col)) {
85+
if (node instanceof SqlKeyConstraint) {
86+
// key constraints declarations, like primary key declaration, are valid and should not
87+
// result in parse exceptions. Ignore the constraint declaration.
88+
continue;
89+
}
90+
91+
throw fail("Unexpected column list construction.", node.getParserPosition());
92+
}
93+
94+
if (col.name.names.size() != 1) {
95+
throw fail("Expected simple column names.", col.name.getParserPosition());
96+
}
97+
98+
names.add(col.name.names.get(0));
99+
columnTypes.add(col.dataType.deriveType(VALIDATOR));
100+
}
101+
102+
tableList.add(
103+
new SubstraitTable(
104+
create.name.names.get(0), TYPE_FACTORY.createStructType(columnTypes, names)));
105+
}
106+
107+
return tableList;
108+
}
109+
110+
/**
111+
* @param createStatements a SQL string containing only CREATE statements
112+
* @return a {@link CalciteCatalogReader} generated from the CREATE statements
113+
* @throws SqlParseException
114+
*/
115+
public static CalciteCatalogReader processCreateStatementsToCatalog(String createStatements)
116+
throws SqlParseException {
117+
List<SubstraitTable> tables = processCreateStatements(createStatements);
118+
CalciteSchema rootSchema = CalciteSchema.createRootSchema(false);
119+
for (SubstraitTable table : tables) {
120+
rootSchema.add(table.getName(), table);
121+
}
122+
List<String> defaultSchema = Collections.emptyList();
123+
return new CalciteCatalogReader(rootSchema, defaultSchema, TYPE_FACTORY, CONNECTION_CONFIG);
124+
}
125+
126+
private static SqlParseException fail(String text, SqlParserPos pos) {
127+
return new SqlParseException(text, pos, null, null, new RuntimeException("fake lineage"));
128+
}
129+
130+
private static SqlParseException fail(String text) {
131+
return fail(text, SqlParserPos.ZERO);
132+
}
133+
}

isthmus/src/test/java/io/substrait/isthmus/ArithmeticFunctionTest.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
package io.substrait.isthmus;
22

3-
import java.util.List;
43
import org.junit.jupiter.params.ParameterizedTest;
54
import org.junit.jupiter.params.provider.ValueSource;
65

76
public class ArithmeticFunctionTest extends PlanTestBase {
87

9-
static List<String> CREATES =
10-
List.of(
11-
"CREATE TABLE numbers (i8 TINYINT, i16 SMALLINT, i32 INT, i64 BIGINT, fp32 REAL, fp64 DOUBLE)");
8+
static String CREATES =
9+
"CREATE TABLE numbers (i8 TINYINT, i16 SMALLINT, i32 INT, i64 BIGINT, fp32 REAL, fp64 DOUBLE)";
1210

1311
@ParameterizedTest
1412
@ValueSource(strings = {"i8", "i16", "i32", "i64", "fp32", "fp64"})

isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,20 @@
33
import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION;
44
import static org.junit.jupiter.api.Assertions.assertEquals;
55

6+
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
67
import io.substrait.plan.Plan;
78
import io.substrait.relation.NamedScan;
89
import java.util.List;
10+
import org.apache.calcite.prepare.CalciteCatalogReader;
911
import org.junit.jupiter.api.Test;
1012

1113
public class NameRoundtripTest extends PlanTestBase {
1214

1315
@Test
1416
void preserveNamesFromSql() throws Exception {
15-
List<String> creates = List.of("CREATE TABLE foo(a BIGINT, b BIGINT)");
17+
String createStatement = "CREATE TABLE foo(a BIGINT, b BIGINT)";
18+
CalciteCatalogReader catalogReader =
19+
SubstraitCreateStatementParser.processCreateStatementsToCatalog(createStatement);
1620

1721
SqlToSubstrait s = new SqlToSubstrait();
1822
var substraitToCalcite = new SubstraitToCalcite(EXTENSION_COLLECTION, typeFactory);
@@ -22,7 +26,7 @@ void preserveNamesFromSql() throws Exception {
2226
""";
2327
List<String> expectedNames = List.of("a", "B");
2428

25-
List<org.apache.calcite.rel.RelRoot> calciteRelRoots = s.sqlToRelNode(query, creates);
29+
List<org.apache.calcite.rel.RelRoot> calciteRelRoots = s.sqlToRelNode(query, catalogReader);
2630
assertEquals(1, calciteRelRoots.size());
2731

2832
org.apache.calcite.rel.RelRoot calciteRelRoot1 = calciteRelRoots.get(0);

isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE
2525
assertFullRoundTrip(query);
2626

2727
SqlToSubstrait sqlConverter = new SqlToSubstrait();
28-
List<RelRoot> relRoots = sqlConverter.sqlToRelNode(query, tpchSchemaCreateStatements());
28+
List<RelRoot> relRoots = sqlConverter.sqlToRelNode(query, TPCH_CATALOG);
2929
assertEquals(1, relRoots.size());
3030
RelRoot planRoot = relRoots.get(0);
3131
RelNode originalPlan = planRoot.rel;

0 commit comments

Comments
 (0)