Skip to content

adding config to control Varchar behavior #11090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ config_namespace! {
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
pub dialect: String, default = "generic".to_string()

/// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
/// ignore the length. If false, error if a `VARCHAR` with a length is
/// specified. The Arrow type system does not have a notion of maximum
/// string length and thus DataFusion can not enforce such limits.
pub support_varchar_with_length: bool, default = true
}
}

Expand Down Expand Up @@ -303,6 +308,7 @@ config_namespace! {
/// statistics into the same file groups.
/// Currently experimental
pub split_file_groups_by_statistics: bool, default = false

}
}

Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ impl SessionState {
ParserOptions {
parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
enable_ident_normalization: sql_parser_options.enable_ident_normalization,
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
}
}

Expand Down
9 changes: 8 additions & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,15 @@ pub trait ContextProvider {
pub struct ParserOptions {
pub parse_float_as_decimal: bool,
pub enable_ident_normalization: bool,
pub support_varchar_with_length: bool,
}

impl Default for ParserOptions {
fn default() -> Self {
Self {
parse_float_as_decimal: false,
enable_ident_normalization: true,
support_varchar_with_length: true,
}
}
}
Expand Down Expand Up @@ -404,12 +406,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) | SQLDataType::UnsignedInt4(_) => {
Ok(DataType::UInt32)
}
SQLDataType::Varchar(length) => {
match (length, self.options.support_varchar_with_length) {
(Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"),
_ => Ok(DataType::Utf8),
}
}
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64),
SQLDataType::Float(_) => Ok(DataType::Float32),
SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64),
SQLDataType::Char(_)
| SQLDataType::Varchar(_)
| SQLDataType::Text
| SQLDataType::String(_) => Ok(DataType::Utf8),
SQLDataType::Timestamp(None, tz_info) => {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ fn parse_decimals() {
ParserOptions {
parse_float_as_decimal: true,
enable_ident_normalization: false,
support_varchar_with_length: false,
},
);
}
Expand Down Expand Up @@ -137,6 +138,7 @@ fn parse_ident_normalization() {
ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization,
support_varchar_with_length: false,
},
);
if plan.is_ok() {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.support_varchar_with_length true

# show all variables with verbose
query TTT rowsort
Expand Down Expand Up @@ -318,6 +319,7 @@ datafusion.optimizer.top_down_join_key_reordering true When set to true, the phy
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

# show_variable_in_config_options
query TT
Expand Down
49 changes: 49 additions & 0 deletions datafusion/sqllogictest/test_files/strings.slt
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,52 @@ e1
p2
p2e1
p2m1e1

## VARCHAR with length support

# Lengths can be used by default
query T
SELECT '12345'::VARCHAR(2);
----
12345

# Lengths can not be used when the config setting is disabled

statement ok
set datafusion.sql_parser.support_varchar_with_length = false;

query error
SELECT '12345'::VARCHAR(2);

query error
SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s);

statement ok
create table vals(s char) as values('abc'), ('def');

query error
SELECT s::VARCHAR(2) FROM vals

# Lengths can be used when the config setting is enabled

statement ok
set datafusion.sql_parser.support_varchar_with_length = true;

query T
SELECT '12345'::VARCHAR(2)
----
12345

query T
SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s)
----
12345

query T
SELECT s::VARCHAR(2) FROM vals
----
abc
def

statement ok
drop table vals;
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type |
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) |
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |