From c9f88144056c2e911bc747fdd4a1f75fa3c2706e Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 1 Jul 2025 13:34:00 +0200 Subject: [PATCH 01/11] Rework primitive --- .../sql/catalyst/parser/SqlBaseParser.g4 | 45 +++--- .../catalyst/parser/DataTypeAstBuilder.scala | 148 ++++++++++-------- .../spark/sql/errors/QueryParsingErrors.scala | 6 +- 3 files changed, 114 insertions(+), 85 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index fc3d86ca858f0..65d234b1884f9 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1340,7 +1340,20 @@ collateClause : COLLATE collationName=multipartIdentifier ; -type +primitiveTypeWithParameters + : STRING collateClause? + | (CHARACTER | CHAR) (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? + | VARCHAR (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? + | DECIMAL | DEC | NUMERIC + (LEFT_PAREN precision=INTEGER_VALUE (COMMA scale=INTEGER_VALUE)? RIGHT_PAREN)? + | TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? (WITHOUT TIME ZONE)? + | INTERVAL ( + fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | + fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? + )? + ; + +primitiveTypeWithoutParameters : BOOLEAN | TINYINT | BYTE | SMALLINT | SHORT @@ -1349,32 +1362,24 @@ type | FLOAT | REAL | DOUBLE | DATE - | TIME - | TIMESTAMP | TIMESTAMP_NTZ | TIMESTAMP_LTZ - | STRING collateClause? - | CHARACTER | CHAR - | VARCHAR + | TIMESTAMP_LTZ | TIMESTAMP_NTZ | TIMESTAMP (WITHOUT TIME ZONE)? | BINARY - | DECIMAL | DEC | NUMERIC | VOID | INTERVAL | VARIANT - | ARRAY | STRUCT | MAP - | unsupportedType=identifier + ; + +primitiveType + : primitiveTypeWithParameters + | primitiveTypeWithoutParameters + | unsupportedType=identifier (LEFT_PAREN INTEGER_VALUE(COMMA INTEGER_VALUE)* RIGHT_PAREN)? ; dataType - : complex=ARRAY LT dataType GT #complexDataType - | complex=MAP LT dataType COMMA dataType GT #complexDataType - | complex=STRUCT (LT complexColTypeList? GT | NEQ) #complexDataType - | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType - | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) - (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType - | TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? - (WITHOUT TIME ZONE)? #timeDataType - | (TIMESTAMP_NTZ | TIMESTAMP WITHOUT TIME ZONE) #timestampNtzDataType - | type (LEFT_PAREN INTEGER_VALUE - (COMMA INTEGER_VALUE)* RIGHT_PAREN)? #primitiveDataType + : complex=ARRAY (LT dataType GT)? #complexDataType + | complex=MAP (LT dataType COMMA dataType GT)? #complexDataType + | complex=STRUCT (LT complexColTypeList? GT)? #complexDataType + | primitiveType #primitiveDataType ; qualifiedColTypeWithPositionList diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index e83a987263db4..395698da8a7b9 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -65,74 +65,93 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { ctx.parts.asScala.map(_.getText).toSeq } - /** - * Resolve/create the TIME primitive type. - */ - override def visitTimeDataType(ctx: TimeDataTypeContext): DataType = withOrigin(ctx) { - val precision = if (ctx.precision == null) { - TimeType.DEFAULT_PRECISION - } else { - ctx.precision.getText.toInt - } - TimeType(precision) - } - - /** - * Create the TIMESTAMP_NTZ primitive type. - */ - override def visitTimestampNtzDataType(ctx: TimestampNtzDataTypeContext): DataType = { - withOrigin(ctx)(TimestampNTZType) - } - /** * Resolve/create a primitive type. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { - val typeCtx = ctx.`type` - (typeCtx.start.getType, ctx.INTEGER_VALUE().asScala.toList) match { - case (BOOLEAN, Nil) => BooleanType - case (TINYINT | BYTE, Nil) => ByteType - case (SMALLINT | SHORT, Nil) => ShortType - case (INT | INTEGER, Nil) => IntegerType - case (BIGINT | LONG, Nil) => LongType - case (FLOAT | REAL, Nil) => FloatType - case (DOUBLE, Nil) => DoubleType - case (DATE, Nil) => DateType - case (TIMESTAMP, Nil) => SqlApiConf.get.timestampType - case (TIMESTAMP_LTZ, Nil) => TimestampType - case (STRING, Nil) => - typeCtx.children.asScala.toSeq match { - case Seq(_) => StringType - case Seq(_, ctx: CollateClauseContext) => - val collationNameParts = visitCollateClause(ctx).toArray - val collationId = CollationFactory.collationNameToId( - CollationFactory.resolveFullyQualifiedName(collationNameParts)) - StringType(collationId) - } - case (CHARACTER | CHAR, length :: Nil) => CharType(length.getText.toInt) - case (VARCHAR, length :: Nil) => VarcharType(length.getText.toInt) - case (BINARY, Nil) => BinaryType - case (DECIMAL | DEC | NUMERIC, Nil) => DecimalType.USER_DEFAULT - case (DECIMAL | DEC | NUMERIC, precision :: Nil) => - DecimalType(precision.getText.toInt, 0) - case (DECIMAL | DEC | NUMERIC, precision :: scale :: Nil) => - DecimalType(precision.getText.toInt, scale.getText.toInt) - case (VOID, Nil) => NullType - case (INTERVAL, Nil) => CalendarIntervalType - case (VARIANT, Nil) => VariantType - case (CHARACTER | CHAR | VARCHAR, Nil) => - throw QueryParsingErrors.charTypeMissingLengthError(ctx.`type`.getText, ctx) - case (ARRAY | STRUCT | MAP, Nil) => - throw QueryParsingErrors.nestedTypeMissingElementTypeError(ctx.`type`.getText, ctx) - case (_, params) => - val badType = ctx.`type`.getText - val dtStr = if (params.nonEmpty) s"$badType(${params.mkString(",")})" else badType - throw QueryParsingErrors.dataTypeUnsupportedError(dtStr, ctx) + val typeCtx = ctx.`primitiveType` + if (typeCtx.`primitiveTypeWithParameters` != null) { + // This is a primitive type with parameters, e.g. VARCHAR(10), DECIMAL(10, 2), etc. + val currentCtx = typeCtx.`primitiveTypeWithParameters` + currentCtx.start.getType match { + case STRING => + currentCtx.children.asScala.toSeq match { + case Seq(_) => StringType + case Seq(_, ctx: CollateClauseContext) => + val collationNameParts = visitCollateClause(ctx).toArray + val collationId = CollationFactory.collationNameToId( + CollationFactory.resolveFullyQualifiedName(collationNameParts)) + StringType(collationId) + } + case CHARACTER | CHAR => + if (currentCtx.length == null) { + throw QueryParsingErrors.charVarcharTypeMissingLengthError(typeCtx.getText, ctx) + } + else CharType(currentCtx.length.getText.toInt) + case VARCHAR => + if (currentCtx.length == null) { + throw QueryParsingErrors.charVarcharTypeMissingLengthError(typeCtx.getText, ctx) + } + else VarcharType(currentCtx.length.getText.toInt) + case DECIMAL | DEC | NUMERIC => + if (currentCtx.precision == null) { + DecimalType.USER_DEFAULT + } else if (currentCtx.scale == null) { + DecimalType(currentCtx.precision.getText.toInt, 0) + } else { + DecimalType(currentCtx.precision.getText.toInt, currentCtx.scale.getText.toInt) + } + case TIME => + val precision = if (currentCtx.precision == null) { + TimeType.DEFAULT_PRECISION + } else { + currentCtx.precision.getText.toInt + } + TimeType(precision) + case INTERVAL => + if (currentCtx.fromYearMonth != null) { + visitDayTimeIntervalDataType(currentCtx) + } + else if (currentCtx.fromDayTime != null) { + visitYearMonthIntervalDataType(currentCtx) + } + else { + CalendarIntervalType + } + } + } else if (typeCtx.`primitiveTypeWithoutParameters` != null) { + // This is a primitive type without parameters, e.g. BOOLEAN, TINYINT, etc. + typeCtx.`primitiveTypeWithoutParameters`.start.getType match { + case BOOLEAN => BooleanType + case TINYINT | BYTE => ByteType + case SMALLINT | SHORT => ShortType + case INT | INTEGER => IntegerType + case BIGINT | LONG => LongType + case FLOAT | REAL => FloatType + case DOUBLE => DoubleType + case DATE => DateType + case TIMESTAMP => + if (typeCtx.`primitiveTypeWithoutParameters`.WITHOUT() == null) { + SqlApiConf.get.timestampType + } else TimestampNTZType + case TIMESTAMP_LTZ => TimestampType + case TIMESTAMP_NTZ => TimestampNTZType + case BINARY => BinaryType + case VOID => NullType + case VARIANT => VariantType + } + } else { + val badType = typeCtx.unsupportedType.getText + val params = typeCtx.INTEGER_VALUE().asScala.toList + val dtStr = + if (params.nonEmpty) s"$badType(${params.mkString(",")})" + else badType + throw QueryParsingErrors.dataTypeUnsupportedError(dtStr, ctx) } } - override def visitYearMonthIntervalDataType(ctx: YearMonthIntervalDataTypeContext): DataType = { - val startStr = ctx.from.getText.toLowerCase(Locale.ROOT) + private def visitYearMonthIntervalDataType(ctx: PrimitiveTypeWithParametersContext): DataType = { + val startStr = ctx.fromYearMonth.getText.toLowerCase(Locale.ROOT) val start = YearMonthIntervalType.stringToField(startStr) if (ctx.to != null) { val endStr = ctx.to.getText.toLowerCase(Locale.ROOT) @@ -146,8 +165,8 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } } - override def visitDayTimeIntervalDataType(ctx: DayTimeIntervalDataTypeContext): DataType = { - val startStr = ctx.from.getText.toLowerCase(Locale.ROOT) + private def visitDayTimeIntervalDataType(ctx: PrimitiveTypeWithParametersContext): DataType = { + val startStr = ctx.fromDayTime.getText.toLowerCase(Locale.ROOT) val start = DayTimeIntervalType.stringToField(startStr) if (ctx.to != null) { val endStr = ctx.to.getText.toLowerCase(Locale.ROOT) @@ -165,6 +184,9 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { * Create a complex DataType. Arrays, Maps and Structures are supported. */ override def visitComplexDataType(ctx: ComplexDataTypeContext): DataType = withOrigin(ctx) { + if (ctx.LT() == null) { + throw QueryParsingErrors.nestedTypeMissingElementTypeError(ctx.getText, ctx) + } ctx.complex.getType match { case SqlBaseParser.ARRAY => ArrayType(typedVisit(ctx.dataType(0))) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 12f986b89fd2b..60ccf7a9282cf 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -324,7 +324,9 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase { ctx) } - def charTypeMissingLengthError(dataType: String, ctx: PrimitiveDataTypeContext): Throwable = { + def charVarcharTypeMissingLengthError( + dataType: String, + ctx: PrimitiveDataTypeContext): Throwable = { new ParseException( errorClass = "DATATYPE_MISSING_SIZE", messageParameters = Map("type" -> toSQLType(dataType)), @@ -333,7 +335,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase { def nestedTypeMissingElementTypeError( dataType: String, - ctx: PrimitiveDataTypeContext): Throwable = { + ctx: ComplexDataTypeContext): Throwable = { dataType.toUpperCase(Locale.ROOT) match { case "ARRAY" => new ParseException( From 96e70321ca3d441258bb8201eeb391e77b0c573e Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 1 Jul 2025 16:08:00 +0200 Subject: [PATCH 02/11] Fix decimal --- .../org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 65d234b1884f9..55740d83f22de 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1344,7 +1344,7 @@ primitiveTypeWithParameters : STRING collateClause? | (CHARACTER | CHAR) (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? | VARCHAR (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? - | DECIMAL | DEC | NUMERIC + | (DECIMAL | DEC | NUMERIC) (LEFT_PAREN precision=INTEGER_VALUE (COMMA scale=INTEGER_VALUE)? RIGHT_PAREN)? | TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? (WITHOUT TIME ZONE)? | INTERVAL ( From 4e17e22c5e8b4a3576e3c3047d25b9aa56d0d0ca Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 1 Jul 2025 16:17:41 +0200 Subject: [PATCH 03/11] Move interval --- .../sql/catalyst/parser/SqlBaseParser.g4 | 6 ++-- .../catalyst/parser/DataTypeAstBuilder.scala | 28 ++++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 55740d83f22de..779468730f637 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1347,10 +1347,6 @@ primitiveTypeWithParameters | (DECIMAL | DEC | NUMERIC) (LEFT_PAREN precision=INTEGER_VALUE (COMMA scale=INTEGER_VALUE)? RIGHT_PAREN)? | TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? (WITHOUT TIME ZONE)? - | INTERVAL ( - fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | - fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? - )? ; primitiveTypeWithoutParameters @@ -1366,6 +1362,8 @@ primitiveTypeWithoutParameters | BINARY | VOID | INTERVAL + (fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | + fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))?)? | VARIANT ; diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 395698da8a7b9..635fc9c53dbb7 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -108,20 +108,11 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { currentCtx.precision.getText.toInt } TimeType(precision) - case INTERVAL => - if (currentCtx.fromYearMonth != null) { - visitDayTimeIntervalDataType(currentCtx) - } - else if (currentCtx.fromDayTime != null) { - visitYearMonthIntervalDataType(currentCtx) - } - else { - CalendarIntervalType - } } } else if (typeCtx.`primitiveTypeWithoutParameters` != null) { // This is a primitive type without parameters, e.g. BOOLEAN, TINYINT, etc. - typeCtx.`primitiveTypeWithoutParameters`.start.getType match { + val currentCtx = typeCtx.`primitiveTypeWithoutParameters` + currentCtx.start.getType match { case BOOLEAN => BooleanType case TINYINT | BYTE => ByteType case SMALLINT | SHORT => ShortType @@ -138,6 +129,16 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { case TIMESTAMP_NTZ => TimestampNTZType case BINARY => BinaryType case VOID => NullType + case INTERVAL => + if (currentCtx.fromYearMonth != null) { + visitDayTimeIntervalDataType(currentCtx) + } + else if (currentCtx.fromDayTime != null) { + visitYearMonthIntervalDataType(currentCtx) + } + else { + CalendarIntervalType + } case VARIANT => VariantType } } else { @@ -150,7 +151,8 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } } - private def visitYearMonthIntervalDataType(ctx: PrimitiveTypeWithParametersContext): DataType = { + private def visitYearMonthIntervalDataType( + ctx: PrimitiveTypeWithoutParametersContext): DataType = { val startStr = ctx.fromYearMonth.getText.toLowerCase(Locale.ROOT) val start = YearMonthIntervalType.stringToField(startStr) if (ctx.to != null) { @@ -165,7 +167,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } } - private def visitDayTimeIntervalDataType(ctx: PrimitiveTypeWithParametersContext): DataType = { + private def visitDayTimeIntervalDataType(ctx: PrimitiveTypeWithoutParametersContext): DataType = { val startStr = ctx.fromDayTime.getText.toLowerCase(Locale.ROOT) val start = DayTimeIntervalType.stringToField(startStr) if (ctx.to != null) { From ae9f7de913ec6c446d72ee6f7e019a6fb37eb38e Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 1 Jul 2025 18:01:59 +0200 Subject: [PATCH 04/11] Fix interval error --- .../apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 635fc9c53dbb7..c7b1705518be6 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -130,10 +130,10 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { case BINARY => BinaryType case VOID => NullType case INTERVAL => - if (currentCtx.fromYearMonth != null) { + if (currentCtx.fromDayTime != null) { visitDayTimeIntervalDataType(currentCtx) } - else if (currentCtx.fromDayTime != null) { + else if (currentCtx.fromYearMonth != null) { visitYearMonthIntervalDataType(currentCtx) } else { From 473e101172a555ffa71627b4fa711fc5c8944dc8 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 7 Jul 2025 13:56:51 +0200 Subject: [PATCH 05/11] Fix struct --- .../org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 779468730f637..ee2204d76b926 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1376,7 +1376,7 @@ primitiveType dataType : complex=ARRAY (LT dataType GT)? #complexDataType | complex=MAP (LT dataType COMMA dataType GT)? #complexDataType - | complex=STRUCT (LT complexColTypeList? GT)? #complexDataType + | complex=STRUCT ((LT complexColTypeList? GT) | NEQ)? #complexDataType | primitiveType #primitiveDataType ; From c761bbb8bea2e8775390df84aff92228750e9551 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 7 Jul 2025 15:04:02 +0200 Subject: [PATCH 06/11] Fix struct match --- .../apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index c7b1705518be6..13fdc5078c468 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -186,7 +186,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { * Create a complex DataType. Arrays, Maps and Structures are supported. */ override def visitComplexDataType(ctx: ComplexDataTypeContext): DataType = withOrigin(ctx) { - if (ctx.LT() == null) { + if (ctx.LT() == null || ctx.NEQ() == null) { throw QueryParsingErrors.nestedTypeMissingElementTypeError(ctx.getText, ctx) } ctx.complex.getType match { From d3347bba2b03349c093fe099d287b6e3de336c14 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 7 Jul 2025 17:57:42 +0200 Subject: [PATCH 07/11] Fix struct match --- .../apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 13fdc5078c468..fb348f213bc26 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -186,7 +186,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { * Create a complex DataType. Arrays, Maps and Structures are supported. */ override def visitComplexDataType(ctx: ComplexDataTypeContext): DataType = withOrigin(ctx) { - if (ctx.LT() == null || ctx.NEQ() == null) { + if (ctx.LT() == null && ctx.NEQ() == null) { throw QueryParsingErrors.nestedTypeMissingElementTypeError(ctx.getText, ctx) } ctx.complex.getType match { From 18c5be1871201ff658476db6005f1597da9c25b9 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 8 Jul 2025 07:55:04 +0200 Subject: [PATCH 08/11] Fix style --- .../sql/catalyst/parser/DataTypeAstBuilder.scala | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index fb348f213bc26..c5e8f9b2b4f1c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -86,13 +86,11 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { case CHARACTER | CHAR => if (currentCtx.length == null) { throw QueryParsingErrors.charVarcharTypeMissingLengthError(typeCtx.getText, ctx) - } - else CharType(currentCtx.length.getText.toInt) + } else CharType(currentCtx.length.getText.toInt) case VARCHAR => if (currentCtx.length == null) { throw QueryParsingErrors.charVarcharTypeMissingLengthError(typeCtx.getText, ctx) - } - else VarcharType(currentCtx.length.getText.toInt) + } else VarcharType(currentCtx.length.getText.toInt) case DECIMAL | DEC | NUMERIC => if (currentCtx.precision == null) { DecimalType.USER_DEFAULT @@ -132,11 +130,9 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { case INTERVAL => if (currentCtx.fromDayTime != null) { visitDayTimeIntervalDataType(currentCtx) - } - else if (currentCtx.fromYearMonth != null) { + } else if (currentCtx.fromYearMonth != null) { visitYearMonthIntervalDataType(currentCtx) - } - else { + } else { CalendarIntervalType } case VARIANT => VariantType @@ -167,7 +163,8 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } } - private def visitDayTimeIntervalDataType(ctx: PrimitiveTypeWithoutParametersContext): DataType = { + private def visitDayTimeIntervalDataType( + ctx: PrimitiveTypeWithoutParametersContext): DataType = { val startStr = ctx.fromDayTime.getText.toLowerCase(Locale.ROOT) val start = DayTimeIntervalType.stringToField(startStr) if (ctx.to != null) { From a9bb3e3b7a1ca75b7c3b390f877a5ac0db3999d3 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 8 Jul 2025 08:04:17 +0200 Subject: [PATCH 09/11] Fix parser namings --- .../apache/spark/sql/catalyst/parser/SqlBaseParser.g4 | 8 ++++---- .../spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index ee2204d76b926..f7401ca2fdf00 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1340,7 +1340,7 @@ collateClause : COLLATE collationName=multipartIdentifier ; -primitiveTypeWithParameters +nonTrivialPrimitiveType : STRING collateClause? | (CHARACTER | CHAR) (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? | VARCHAR (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? @@ -1349,7 +1349,7 @@ primitiveTypeWithParameters | TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? (WITHOUT TIME ZONE)? ; -primitiveTypeWithoutParameters +trivialPrimitiveType : BOOLEAN | TINYINT | BYTE | SMALLINT | SHORT @@ -1368,8 +1368,8 @@ primitiveTypeWithoutParameters ; primitiveType - : primitiveTypeWithParameters - | primitiveTypeWithoutParameters + : nonTrivialPrimitiveType + | trivialPrimitiveType | unsupportedType=identifier (LEFT_PAREN INTEGER_VALUE(COMMA INTEGER_VALUE)* RIGHT_PAREN)? ; diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index c5e8f9b2b4f1c..2411881800847 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -70,9 +70,9 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { val typeCtx = ctx.`primitiveType` - if (typeCtx.`primitiveTypeWithParameters` != null) { + if (typeCtx.`nonTrivialPrimitiveType` != null) { // This is a primitive type with parameters, e.g. VARCHAR(10), DECIMAL(10, 2), etc. - val currentCtx = typeCtx.`primitiveTypeWithParameters` + val currentCtx = typeCtx.`nonTrivialPrimitiveType` currentCtx.start.getType match { case STRING => currentCtx.children.asScala.toSeq match { @@ -107,9 +107,9 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } TimeType(precision) } - } else if (typeCtx.`primitiveTypeWithoutParameters` != null) { + } else if (typeCtx.`trivialPrimitiveType` != null) { // This is a primitive type without parameters, e.g. BOOLEAN, TINYINT, etc. - val currentCtx = typeCtx.`primitiveTypeWithoutParameters` + val currentCtx = typeCtx.`trivialPrimitiveType` currentCtx.start.getType match { case BOOLEAN => BooleanType case TINYINT | BYTE => ByteType @@ -120,7 +120,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { case DOUBLE => DoubleType case DATE => DateType case TIMESTAMP => - if (typeCtx.`primitiveTypeWithoutParameters`.WITHOUT() == null) { + if (typeCtx.`trivialPrimitiveType`.WITHOUT() == null) { SqlApiConf.get.timestampType } else TimestampNTZType case TIMESTAMP_LTZ => TimestampType From c91c4d5bc530ef973d1b030ff6eb22e124adb69d Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 8 Jul 2025 08:56:52 +0200 Subject: [PATCH 10/11] Incorporate feedback --- .../sql/catalyst/parser/SqlBaseParser.g4 | 9 ++--- .../catalyst/parser/DataTypeAstBuilder.scala | 35 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index f7401ca2fdf00..698afa4860027 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1346,6 +1346,10 @@ nonTrivialPrimitiveType | VARCHAR (LEFT_PAREN length=INTEGER_VALUE RIGHT_PAREN)? | (DECIMAL | DEC | NUMERIC) (LEFT_PAREN precision=INTEGER_VALUE (COMMA scale=INTEGER_VALUE)? RIGHT_PAREN)? + | INTERVAL + (fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | + fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))?)? + | TIMESTAMP (WITHOUT TIME ZONE)? | TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? (WITHOUT TIME ZONE)? ; @@ -1358,12 +1362,9 @@ trivialPrimitiveType | FLOAT | REAL | DOUBLE | DATE - | TIMESTAMP_LTZ | TIMESTAMP_NTZ | TIMESTAMP (WITHOUT TIME ZONE)? + | TIMESTAMP_LTZ | TIMESTAMP_NTZ | BINARY | VOID - | INTERVAL - (fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? | - fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))?)? | VARIANT ; diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 2411881800847..d348fc4bf8e6c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -69,10 +69,10 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { * Resolve/create a primitive type. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { - val typeCtx = ctx.`primitiveType` - if (typeCtx.`nonTrivialPrimitiveType` != null) { + val typeCtx = ctx.primitiveType + if (typeCtx.nonTrivialPrimitiveType != null) { // This is a primitive type with parameters, e.g. VARCHAR(10), DECIMAL(10, 2), etc. - val currentCtx = typeCtx.`nonTrivialPrimitiveType` + val currentCtx = typeCtx.nonTrivialPrimitiveType currentCtx.start.getType match { case STRING => currentCtx.children.asScala.toSeq match { @@ -99,6 +99,18 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } else { DecimalType(currentCtx.precision.getText.toInt, currentCtx.scale.getText.toInt) } + case INTERVAL => + if (currentCtx.fromDayTime != null) { + visitDayTimeIntervalDataType(currentCtx) + } else if (currentCtx.fromYearMonth != null) { + visitYearMonthIntervalDataType(currentCtx) + } else { + CalendarIntervalType + } + case TIMESTAMP => + if (currentCtx.WITHOUT() == null) { + SqlApiConf.get.timestampType + } else TimestampNTZType case TIME => val precision = if (currentCtx.precision == null) { TimeType.DEFAULT_PRECISION @@ -107,10 +119,9 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } TimeType(precision) } - } else if (typeCtx.`trivialPrimitiveType` != null) { + } else if (typeCtx.trivialPrimitiveType != null) { // This is a primitive type without parameters, e.g. BOOLEAN, TINYINT, etc. - val currentCtx = typeCtx.`trivialPrimitiveType` - currentCtx.start.getType match { + typeCtx.trivialPrimitiveType.start.getType match { case BOOLEAN => BooleanType case TINYINT | BYTE => ByteType case SMALLINT | SHORT => ShortType @@ -119,22 +130,10 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { case FLOAT | REAL => FloatType case DOUBLE => DoubleType case DATE => DateType - case TIMESTAMP => - if (typeCtx.`trivialPrimitiveType`.WITHOUT() == null) { - SqlApiConf.get.timestampType - } else TimestampNTZType case TIMESTAMP_LTZ => TimestampType case TIMESTAMP_NTZ => TimestampNTZType case BINARY => BinaryType case VOID => NullType - case INTERVAL => - if (currentCtx.fromDayTime != null) { - visitDayTimeIntervalDataType(currentCtx) - } else if (currentCtx.fromYearMonth != null) { - visitYearMonthIntervalDataType(currentCtx) - } else { - CalendarIntervalType - } case VARIANT => VariantType } } else { From eda9511b3ac4b1b76719304343c7ec761936b467 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 8 Jul 2025 14:33:48 +0200 Subject: [PATCH 11/11] Fix compile --- .../spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index d348fc4bf8e6c..beb7061a841a8 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -146,8 +146,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } } - private def visitYearMonthIntervalDataType( - ctx: PrimitiveTypeWithoutParametersContext): DataType = { + private def visitYearMonthIntervalDataType(ctx: NonTrivialPrimitiveTypeContext): DataType = { val startStr = ctx.fromYearMonth.getText.toLowerCase(Locale.ROOT) val start = YearMonthIntervalType.stringToField(startStr) if (ctx.to != null) { @@ -162,8 +161,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } } - private def visitDayTimeIntervalDataType( - ctx: PrimitiveTypeWithoutParametersContext): DataType = { + private def visitDayTimeIntervalDataType(ctx: NonTrivialPrimitiveTypeContext): DataType = { val startStr = ctx.fromDayTime.getText.toLowerCase(Locale.ROOT) val start = DayTimeIntervalType.stringToField(startStr) if (ctx.to != null) {