Skip to content
Closed
18 changes: 18 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -5518,6 +5518,24 @@
],
"sqlState" : "2201E"
},
"ST_INVALID_ALGORITHM_VALUE" : {
"message" : [
"Invalid or unsupported edge interpolation algorithm value: '<alg>'."
],
"sqlState" : "22023"
},
"ST_INVALID_CRS_VALUE" : {
"message" : [
"Invalid or unsupported CRS (coordinate reference system) value: '<crs>'."
],
"sqlState" : "22023"
},
"ST_INVALID_SRID_VALUE" : {
"message" : [
"Invalid or unsupported SRID (spatial reference identifier) value: <srid>"
],
"sqlState" : "22023"
},
"SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT" : {
"message" : [
"The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
Expand Down
2 changes: 2 additions & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,8 @@ Below is a list of all the keywords in Spark SQL.
|FUNCTION|non-reserved|non-reserved|reserved|
|FUNCTIONS|non-reserved|non-reserved|non-reserved|
|GENERATED|non-reserved|non-reserved|non-reserved|
|GEOGRAPHY|non-reserved|non-reserved|non-reserved|
|GEOMETRY|non-reserved|non-reserved|non-reserved|
|GLOBAL|non-reserved|non-reserved|reserved|
|GRANT|reserved|non-reserved|reserved|
|GROUP|reserved|non-reserved|reserved|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ FULL: 'FULL';
FUNCTION: 'FUNCTION';
FUNCTIONS: 'FUNCTIONS';
GENERATED: 'GENERATED';
GEOGRAPHY: 'GEOGRAPHY';
GEOMETRY: 'GEOMETRY';
GLOBAL: 'GLOBAL';
GRANT: 'GRANT';
GROUP: 'GROUP';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,8 @@ nonTrivialPrimitiveType
fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))?)?
| TIMESTAMP (WITHOUT TIME ZONE)?
| TIME (LEFT_PAREN precision=INTEGER_VALUE RIGHT_PAREN)? (WITHOUT TIME ZONE)?
| GEOGRAPHY (LEFT_PAREN srid=(INTEGER_VALUE | ANY) RIGHT_PAREN)
| GEOMETRY (LEFT_PAREN srid=(INTEGER_VALUE | ANY) RIGHT_PAREN)
;

trivialPrimitiveType
Expand Down Expand Up @@ -1832,6 +1834,8 @@ ansiNonReserved
| FUNCTION
| FUNCTIONS
| GENERATED
| GEOGRAPHY
| GEOMETRY
| GLOBAL
| GROUPING
| HANDLER
Expand Down Expand Up @@ -2210,6 +2214,8 @@ nonReserved
| FUNCTION
| FUNCTIONS
| GENERATED
| GEOGRAPHY
| GEOMETRY
| GLOBAL
| GRANT
| GROUP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
import org.apache.spark.sql.errors.QueryParsingErrors
import org.apache.spark.sql.internal.SqlApiConf
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}

class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
protected def typedVisit[T](ctx: ParseTree): T = {
Expand Down Expand Up @@ -118,6 +118,30 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
currentCtx.precision.getText.toInt
}
TimeType(precision)
case GEOGRAPHY =>
// Unparameterized geometry type isn't supported and will be caught by the default branch.
// Here, we only handle the parameterized GEOGRAPHY type syntax, which comes in two forms:
if (currentCtx.srid.getText.toLowerCase(Locale.ROOT) == "any") {
// The special parameterized GEOGRAPHY type syntax uses a single "ANY" string value.
// This implies a mixed GEOGRAPHY type, with potentially different SRIDs across rows.
GeographyType("ANY")
} else {
// The explicitly parameterzied GEOGRAPHY syntax uses a specified integer SRID value.
// This implies a fixed GEOGRAPHY type, with a single fixed SRID value across all rows.
GeographyType(currentCtx.srid.getText.toInt)
}
case GEOMETRY =>
// Unparameterized geometry type isn't supported and will be caught by the default branch.
// Here, we only handle the parameterized GEOMETRY type syntax, which comes in two forms:
if (currentCtx.srid.getText.toLowerCase(Locale.ROOT) == "any") {
// The special parameterized GEOMETRY type syntax uses a single "ANY" string value.
// This implies a mixed GEOMETRY type, with potentially different SRIDs across rows.
GeometryType("ANY")
} else {
// The explicitly parameterzied GEOMETRY type syntax has a single integer SRID value.
// This implies a fixed GEOMETRY type, with a single fixed SRID value across all rows.
GeometryType(currentCtx.srid.getText.toInt)
}
}
} else if (typeCtx.trivialPrimitiveType != null) {
// This is a primitive type without parameters, e.g. BOOLEAN, TINYINT, etc.
Expand Down
14 changes: 14 additions & 0 deletions sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ object DataType {
private val CHAR_TYPE = """char\(\s*(\d+)\s*\)""".r
private val VARCHAR_TYPE = """varchar\(\s*(\d+)\s*\)""".r
private val STRING_WITH_COLLATION = """string\s+collate\s+(\w+)""".r
private val GEOMETRY_TYPE = """geometry\(\s*([\w]+:-?[\w]+)\s*\)""".r
private val GEOGRAPHY_TYPE_CRS = """geography\(\s*(\w+:-?\w+)\s*\)""".r
private val GEOGRAPHY_TYPE_ALG = """geography\(\s*(\w+)\s*\)""".r
private val GEOGRAPHY_TYPE_CRS_ALG = """geography\(\s*(\w+:-?\w+)\s*,\s*(\w+)\s*\)""".r

val COLLATIONS_METADATA_KEY = "__COLLATIONS"

Expand Down Expand Up @@ -217,6 +221,16 @@ object DataType {
case CHAR_TYPE(length) => CharType(length.toInt)
case VARCHAR_TYPE(length) => VarcharType(length.toInt)
case STRING_WITH_COLLATION(collation) => StringType(collation)
// If the coordinate reference system (CRS) value is omitted, Parquet and other storage
// formats (Delta, Iceberg) consider "OGC:CRS84" to be the default value of the crs.
case "geometry" => GeometryType(GeometryType.GEOMETRY_DEFAULT_CRS)
case GEOMETRY_TYPE(crs) => GeometryType(crs)
case "geography" => GeographyType(GeographyType.GEOGRAPHY_DEFAULT_CRS)
case GEOGRAPHY_TYPE_CRS(crs) =>
GeographyType(crs, GeographyType.GEOGRAPHY_DEFAULT_ALGORITHM)
case GEOGRAPHY_TYPE_ALG(alg) =>
GeographyType(GeographyType.GEOGRAPHY_DEFAULT_CRS, alg)
case GEOGRAPHY_TYPE_CRS_ALG(crs, alg) => GeographyType(crs, alg)
// For backwards compatibility, previously the type name of NullType is "null"
case "null" => NullType
case "timestamp_ltz" => TimestampType
Expand Down
Loading