From ec3876159977bb1171fa9d1b3efe8418d570a0ce Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Tue, 7 May 2024 13:59:16 +0300
Subject: [PATCH 01/15] Simplify format options

---
 datafusion-cli/src/helper.rs                  | 69 +++++++++------
 datafusion/common/src/config.rs               |  6 +-
 .../common/src/file_options/csv_writer.rs     |  2 +-
 datafusion/core/src/catalog/listing_schema.rs |  3 -
 .../core/src/datasource/file_format/csv.rs    | 44 +++++++---
 .../src/datasource/listing_table_factory.rs   | 21 ++---
 .../core/src/datasource/physical_plan/csv.rs  | 16 ++--
 datafusion/core/tests/path_partition.rs       | 17 +++-
 datafusion/expr/src/logical_plan/ddl.rs       |  7 --
 datafusion/proto/proto/datafusion.proto       | 21 ++---
 datafusion/proto/src/generated/pbjson.rs      | 48 ++---------
 datafusion/proto/src/generated/prost.rs       | 20 ++---
 datafusion/proto/src/logical_plan/mod.rs      | 60 +++++--------
 .../proto/src/physical_plan/from_proto.rs     |  2 +-
 .../proto/src/physical_plan/to_proto.rs       |  5 +-
 datafusion/sql/src/parser.rs                  | 85 ++++++-------------
 datafusion/sql/src/statement.rs               | 12 +--
 datafusion/sql/tests/sql_integration.rs       | 24 +++---
 datafusion/sqllogictest/test_files/copy.slt   |  6 +-
 .../sqllogictest/test_files/csv_files.slt     | 20 ++---
 datafusion/sqllogictest/test_files/ddl.slt    |  2 +-
 datafusion/sqllogictest/test_files/limit.slt  |  3 +-
 datafusion/sqllogictest/test_files/order.slt  |  3 +-
 .../test_files/pg_compat/pg_compat_null.slt   |  3 +-
 .../test_files/pg_compat/pg_compat_simple.slt |  3 +-
 .../test_files/pg_compat/pg_compat_union.slt  |  3 +-
 .../test_files/pg_compat/pg_compat_window.slt |  3 +-
 .../sqllogictest/test_files/predicates.slt    |  4 +-
 .../test_files/repartition_scan.slt           |  6 +-
 .../sqllogictest/test_files/subquery.slt      |  6 +-
 .../test_files/tpch/create_tables.slt.part    | 16 ++--
 31 files changed, 243 insertions(+), 297 deletions(-)

diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 85f14c1736dc..28cba06723aa 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -259,52 +259,69 @@ mod tests {
 
         // shoule be valid
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter ',';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' ',');"
+                     .as_bytes(),
+             ),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Valid(None)));
 
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\0';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\0');"
+                     .as_bytes()),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Valid(None)));
 
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\n';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\n');"
+                     .as_bytes()),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Valid(None)));
 
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\r';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\r');"
+                     .as_bytes()),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Valid(None)));
 
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\t';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\t');"
+                     .as_bytes()),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Valid(None)));
 
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\\';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\\');"
+                     .as_bytes()),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Valid(None)));
 
-        // should be invalid
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter ',,';".as_bytes()),
-            &validator,
-        )?;
-        assert!(matches!(result, ValidationResult::Invalid(Some(_))));
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' ',,');"
+                     .as_bytes()),
+             &validator,
+         )?;
+        assert!(matches!(result, ValidationResult::Valid(None)));
 
+        // should be invalid
         let result = readline_direct(
-            Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\u{07}';".as_bytes()),
-            &validator,
-        )?;
+             Cursor::new(
+                 r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\u{07}');"
+                     .as_bytes()),
+             &validator,
+         )?;
         assert!(matches!(result, ValidationResult::Invalid(Some(_))));
 
         Ok(())
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 18cd83a47097..1afac3549117 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1564,7 +1564,7 @@ config_namespace_with_hashmap! {
 config_namespace! {
     /// Options controlling CSV format
     pub struct CsvOptions {
-        pub has_header: bool, default = true
+        pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
         pub escape: Option<u8>, default = None
@@ -1600,13 +1600,13 @@ impl CsvOptions {
     /// Set true to indicate that the first line is a header.
     /// - default to true
     pub fn with_has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
+        self.has_header = Some(has_header);
         self
     }
 
     /// True if the first line is a header.
     pub fn has_header(&self) -> bool {
-        self.has_header
+        self.has_header.unwrap_or(false)
     }
 
     /// The character separating values within a row.
diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs
index 5f1a62682f8d..2904ea0f8f03 100644
--- a/datafusion/common/src/file_options/csv_writer.rs
+++ b/datafusion/common/src/file_options/csv_writer.rs
@@ -50,7 +50,7 @@ impl TryFrom<&CsvOptions> for CsvWriterOptions {
 
     fn try_from(value: &CsvOptions) -> Result<Self> {
         let mut builder = WriterBuilder::default()
-            .with_header(value.has_header)
+            .with_header(value.has_header.unwrap_or(false))
             .with_delimiter(value.delimiter);
 
         if let Some(v) = &value.date_format {
diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog/listing_schema.rs
index a5960b21dff5..15e6764b1033 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog/listing_schema.rs
@@ -27,7 +27,6 @@ use crate::datasource::provider::TableProviderFactory;
 use crate::datasource::TableProvider;
 use crate::execution::context::SessionState;
 
-use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{Constraints, DFSchema, DataFusionError, TableReference};
 use datafusion_expr::CreateExternalTable;
 
@@ -140,11 +139,9 @@ impl ListingSchemaProvider {
                             location: table_url,
                             file_type: self.format.clone(),
                             has_header: self.has_header,
-                            delimiter: ',',
                             table_partition_cols: vec![],
                             if_not_exists: false,
                             definition: None,
-                            file_compression_type: CompressionTypeVariant::UNCOMPRESSED,
                             order_exprs: vec![],
                             unbounded: false,
                             options: Default::default(),
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 645f98cd3fb0..2e5054d4366c 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -136,13 +136,13 @@ impl CsvFormat {
     /// Set true to indicate that the first line is a header.
     /// - default to true
     pub fn with_has_header(mut self, has_header: bool) -> Self {
-        self.options.has_header = has_header;
+        self.options.has_header = Some(has_header);
         self
     }
 
     /// True if the first line is a header.
     pub fn has_header(&self) -> bool {
-        self.options.has_header
+        self.options.has_header.unwrap_or(false)
     }
 
     /// The character separating values within a row.
@@ -200,7 +200,7 @@ impl FileFormat for CsvFormat {
 
     async fn infer_schema(
         &self,
-        _state: &SessionState,
+        state: &SessionState,
         store: &Arc<dyn ObjectStore>,
         objects: &[ObjectMeta],
     ) -> Result<SchemaRef> {
@@ -211,7 +211,7 @@ impl FileFormat for CsvFormat {
         for object in objects {
             let stream = self.read_to_delimited_chunks(store, object).await;
             let (schema, records_read) = self
-                .infer_schema_from_stream(records_to_read, stream)
+                .infer_schema_from_stream(state, records_to_read, stream)
                 .await?;
             records_to_read -= records_read;
             schemas.push(schema);
@@ -236,13 +236,15 @@ impl FileFormat for CsvFormat {
 
     async fn create_physical_plan(
         &self,
-        _state: &SessionState,
+        state: &SessionState,
         conf: FileScanConfig,
         _filters: Option<&Arc<dyn PhysicalExpr>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let exec = CsvExec::new(
             conf,
-            self.options.has_header,
+            self.options
+                .has_header
+                .unwrap_or(state.config().options().catalog.has_header),
             self.options.delimiter,
             self.options.quote,
             self.options.escape,
@@ -286,6 +288,7 @@ impl CsvFormat {
     /// number of lines that were read
     async fn infer_schema_from_stream(
         &self,
+        state: &SessionState,
         mut records_to_read: usize,
         stream: impl Stream<Item = Result<Bytes>>,
     ) -> Result<(Schema, usize)> {
@@ -298,7 +301,12 @@ impl CsvFormat {
 
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
-                .with_header(self.options.has_header && first_chunk)
+                .with_header(
+                    self.options
+                        .has_header
+                        .unwrap_or(state.config_options().catalog.has_header)
+                        && first_chunk,
+                )
                 .with_delimiter(self.options.delimiter);
 
             let (Schema { fields, .. }, records_read) =
@@ -538,6 +546,7 @@ mod tests {
     use datafusion_common::cast::as_string_array;
     use datafusion_common::stats::Precision;
     use datafusion_common::{internal_err, GetExt};
+    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use datafusion_expr::{col, lit};
 
     use chrono::DateTime;
@@ -554,7 +563,8 @@ mod tests {
         let task_ctx = state.task_ctx();
         // skip column 9 that overflows the automaticly discovered column type of i64 (u64 would work)
         let projection = Some(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12]);
-        let exec = get_exec(&state, "aggregate_test_100.csv", projection, None).await?;
+        let exec =
+            get_exec(&state, "aggregate_test_100.csv", projection, None, true).await?;
         let stream = exec.execute(0, task_ctx)?;
 
         let tt_batches: i32 = stream
@@ -582,7 +592,7 @@ mod tests {
         let task_ctx = session_ctx.task_ctx();
         let projection = Some(vec![0, 1, 2, 3]);
         let exec =
-            get_exec(&state, "aggregate_test_100.csv", projection, Some(1)).await?;
+            get_exec(&state, "aggregate_test_100.csv", projection, Some(1), true).await?;
         let batches = collect(exec, task_ctx).await?;
         assert_eq!(1, batches.len());
         assert_eq!(4, batches[0].num_columns());
@@ -597,7 +607,8 @@ mod tests {
         let state = session_ctx.state();
 
         let projection = None;
-        let exec = get_exec(&state, "aggregate_test_100.csv", projection, None).await?;
+        let exec =
+            get_exec(&state, "aggregate_test_100.csv", projection, None, true).await?;
 
         let x: Vec<String> = exec
             .schema()
@@ -633,7 +644,8 @@ mod tests {
         let state = session_ctx.state();
         let task_ctx = session_ctx.task_ctx();
         let projection = Some(vec![0]);
-        let exec = get_exec(&state, "aggregate_test_100.csv", projection, None).await?;
+        let exec =
+            get_exec(&state, "aggregate_test_100.csv", projection, None, true).await?;
 
         let batches = collect(exec, task_ctx).await.expect("Collect batches");
 
@@ -716,6 +728,10 @@ mod tests {
     async fn query_compress_data(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap());
+        let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
+        let session_state = SessionState::new_with_config_rt(cfg, runtime);
+
         let integration = LocalFileSystem::new_with_prefix(arrow_test_data()).unwrap();
 
         let path = Path::from("csv/aggregate_test_100.csv");
@@ -757,7 +773,7 @@ mod tests {
             .read_to_delimited_chunks_from_stream(compressed_stream.unwrap())
             .await;
         let (schema, records_read) = compressed_csv
-            .infer_schema_from_stream(records_to_read, decoded_stream)
+            .infer_schema_from_stream(&session_state, records_to_read, decoded_stream)
             .await?;
 
         assert_eq!(expected, schema);
@@ -803,9 +819,11 @@ mod tests {
         file_name: &str,
         projection: Option<Vec<usize>>,
         limit: Option<usize>,
+        hash_header: bool,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let root = format!("{}/csv", crate::test_util::arrow_test_data());
-        let format = CsvFormat::default();
+        let mut format = CsvFormat::default();
+        format = format.with_has_header(hash_header);
         scan_format(state, &format, &root, file_name, projection, limit).await
     }
 
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 1a0eb34d1234..8380808d5f3b 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -34,7 +34,7 @@ use crate::datasource::TableProvider;
 use crate::execution::context::SessionState;
 
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::{arrow_datafusion_err, DataFusionError, FileType};
+use datafusion_common::{arrow_datafusion_err, plan_err, DataFusionError, FileType};
 use datafusion_expr::CreateExternalTable;
 
 use async_trait::async_trait;
@@ -67,9 +67,13 @@ impl TableProviderFactory for ListingTableFactory {
         let file_format: Arc<dyn FileFormat> = match file_type {
             FileType::CSV => {
                 let mut csv_options = table_options.csv;
-                csv_options.has_header = cmd.has_header;
-                csv_options.delimiter = cmd.delimiter as u8;
-                csv_options.compression = cmd.file_compression_type;
+                if let Some(has_header) = csv_options.has_header {
+                    if cmd.has_header && !has_header {
+                        return plan_err!("Conflicting header options for CSV file");
+                    }
+                } else {
+                    csv_options.has_header = Some(cmd.has_header);
+                }
                 Arc::new(CsvFormat::default().with_options(csv_options))
             }
             #[cfg(feature = "parquet")]
@@ -78,9 +82,7 @@ impl TableProviderFactory for ListingTableFactory {
             }
             FileType::AVRO => Arc::new(AvroFormat),
             FileType::JSON => {
-                let mut json_options = table_options.json;
-                json_options.compression = cmd.file_compression_type;
-                Arc::new(JsonFormat::default().with_options(json_options))
+                Arc::new(JsonFormat::default().with_options(table_options.json))
             }
             FileType::ARROW => Arc::new(ArrowFormat),
         };
@@ -172,7 +174,6 @@ mod tests {
     use super::*;
     use crate::execution::context::SessionContext;
 
-    use datafusion_common::parsers::CompressionTypeVariant;
     use datafusion_common::{Constraints, DFSchema, TableReference};
 
     #[tokio::test]
@@ -192,11 +193,9 @@ mod tests {
             location: csv_file.path().to_str().unwrap().to_string(),
             file_type: "csv".to_string(),
             has_header: true,
-            delimiter: ',',
             schema: Arc::new(DFSchema::empty()),
             table_partition_cols: vec![],
             if_not_exists: false,
-            file_compression_type: CompressionTypeVariant::UNCOMPRESSED,
             definition: None,
             order_exprs: vec![],
             unbounded: false,
@@ -233,11 +232,9 @@ mod tests {
             location: csv_file.path().to_str().unwrap().to_string(),
             file_type: "csv".to_string(),
             has_header: true,
-            delimiter: ',',
             schema: Arc::new(DFSchema::empty()),
             table_partition_cols: vec![],
             if_not_exists: false,
-            file_compression_type: CompressionTypeVariant::UNCOMPRESSED,
             definition: None,
             order_exprs: vec![],
             unbounded: false,
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 879461c2eb1e..cc7c837e471e 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -610,7 +610,8 @@ mod tests {
     async fn csv_exec_with_mixed_order_projection(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
-        let session_ctx = SessionContext::new();
+        let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
+        let session_ctx = SessionContext::new_with_config(cfg);
         let task_ctx = session_ctx.task_ctx();
         let file_schema = aggr_test_schema();
         let path = format!("{}/csv", arrow_test_data());
@@ -675,7 +676,8 @@ mod tests {
     async fn csv_exec_with_limit(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
-        let session_ctx = SessionContext::new();
+        let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
+        let session_ctx = SessionContext::new_with_config(cfg);
         let task_ctx = session_ctx.task_ctx();
         let file_schema = aggr_test_schema();
         let path = format!("{}/csv", arrow_test_data());
@@ -1017,7 +1019,9 @@ mod tests {
         // create partitioned input file and context
         let tmp_dir = TempDir::new()?;
         let ctx = SessionContext::new_with_config(
-            SessionConfig::new().with_target_partitions(8),
+            SessionConfig::new()
+                .with_target_partitions(8)
+                .set_str("datafusion.catalog.has_header", "false"),
         );
 
         let schema = populate_csv_partitions(&tmp_dir, 8, ".csv")?;
@@ -1045,7 +1049,9 @@ mod tests {
             .await?;
 
         // create a new context and verify that the results were saved to a partitioned csv file
-        let ctx = SessionContext::new();
+        let ctx = SessionContext::new_with_config(
+            SessionConfig::new().set_str("datafusion.catalog.has_header", "false"),
+        );
 
         let schema = Arc::new(Schema::new(vec![
             Field::new("c1", DataType::UInt32, false),
@@ -1074,7 +1080,7 @@ mod tests {
             panic!("Did not find part_0 in csv output files!")
         }
         // register each partition as well as the top level dir
-        let csv_read_option = CsvReadOptions::new().schema(&schema);
+        let csv_read_option = CsvReadOptions::new().schema(&schema).has_header(false);
         ctx.register_csv(
             "part0",
             &format!("{out_dir}/{part_0_name}"),
diff --git a/datafusion/core/tests/path_partition.rs b/datafusion/core/tests/path_partition.rs
index dd8eb52f67c7..6dfed4be795b 100644
--- a/datafusion/core/tests/path_partition.rs
+++ b/datafusion/core/tests/path_partition.rs
@@ -42,6 +42,7 @@ use datafusion_common::ScalarValue;
 use async_trait::async_trait;
 use bytes::Bytes;
 use chrono::{TimeZone, Utc};
+use datafusion_execution::config::SessionConfig;
 use futures::stream;
 use futures::stream::BoxStream;
 use object_store::{
@@ -202,7 +203,9 @@ fn extract_as_utf(v: &ScalarValue) -> Option<String> {
 
 #[tokio::test]
 async fn csv_filter_with_file_col() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = SessionContext::new_with_config(
+        SessionConfig::new().set_str("datafusion.catalog.has_header", "true"),
+    );
 
     register_partitioned_aggregate_csv(
         &ctx,
@@ -238,7 +241,9 @@ async fn csv_filter_with_file_col() -> Result<()> {
 
 #[tokio::test]
 async fn csv_filter_with_file_nonstring_col() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = SessionContext::new_with_config(
+        SessionConfig::new().set_str("datafusion.catalog.has_header", "true"),
+    );
 
     register_partitioned_aggregate_csv(
         &ctx,
@@ -274,7 +279,9 @@ async fn csv_filter_with_file_nonstring_col() -> Result<()> {
 
 #[tokio::test]
 async fn csv_projection_on_partition() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = SessionContext::new_with_config(
+        SessionConfig::new().set_str("datafusion.catalog.has_header", "true"),
+    );
 
     register_partitioned_aggregate_csv(
         &ctx,
@@ -310,7 +317,9 @@ async fn csv_projection_on_partition() -> Result<()> {
 
 #[tokio::test]
 async fn csv_grouping_by_partition() -> Result<()> {
-    let ctx = SessionContext::new();
+    let ctx = SessionContext::new_with_config(
+        SessionConfig::new().set_str("datafusion.catalog.has_header", "true"),
+    );
 
     register_partitioned_aggregate_csv(
         &ctx,
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 8d72c9a8b036..5b1c377dc1e3 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -25,7 +25,6 @@ use std::{
 use crate::{Expr, LogicalPlan, Volatility};
 
 use arrow::datatypes::DataType;
-use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{Constraints, DFSchemaRef, SchemaReference, TableReference};
 use sqlparser::ast::Ident;
 
@@ -192,8 +191,6 @@ pub struct CreateExternalTable {
     pub file_type: String,
     /// Whether the CSV file contains a header
     pub has_header: bool,
-    /// Delimiter for CSV
-    pub delimiter: char,
     /// Partition Columns
     pub table_partition_cols: Vec<String>,
     /// Option to not error if table already exists
@@ -202,8 +199,6 @@ pub struct CreateExternalTable {
     pub definition: Option<String>,
     /// Order expressions supplied by user
     pub order_exprs: Vec<Vec<Expr>>,
-    /// File compression type (GZIP, BZIP2, XZ, ZSTD)
-    pub file_compression_type: CompressionTypeVariant,
     /// Whether the table is an infinite streams
     pub unbounded: bool,
     /// Table(provider) specific options
@@ -222,11 +217,9 @@ impl Hash for CreateExternalTable {
         self.location.hash(state);
         self.file_type.hash(state);
         self.has_header.hash(state);
-        self.delimiter.hash(state);
         self.table_partition_cols.hash(state);
         self.if_not_exists.hash(state);
         self.definition.hash(state);
-        self.file_compression_type.hash(state);
         self.order_exprs.hash(state);
         self.unbounded.hash(state);
         self.options.len().hash(state); // HashMap is not hashable
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index c057ab8acda7..3da326ff37fe 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -201,23 +201,20 @@ message Constraints{
 
 message CreateExternalTableNode {
   reserved 1; // was string name
-  TableReference name = 12;
+  TableReference name = 10;
   string location = 2;
   string file_type = 3;
   bool has_header = 4;
   DfSchema schema = 5;
   repeated string table_partition_cols = 6;
   bool if_not_exists = 7;
-  string delimiter = 8;
-  string definition = 9;
-  reserved 10; // was string file_compression_type
-  CompressionTypeVariant file_compression_type = 17;
-  repeated LogicalExprNodeCollection order_exprs = 13;
-  bool unbounded = 14;
-  map<string, string> options = 11;
-  Constraints constraints = 15;
-  map<string, LogicalExprNode> column_defaults = 16;
-}
+  string definition = 8;
+  repeated LogicalExprNodeCollection order_exprs = 11;
+  bool unbounded = 12;
+  map<string, string> options = 9;
+  Constraints constraints = 13;
+  map<string, LogicalExprNode> column_defaults = 14;
+ }
 
 message PrepareNode {
   string name = 1;
@@ -1106,7 +1103,7 @@ message CsvWriterOptions {
 
 // Options controlling CSV format
 message CsvOptions {
-  bool has_header = 1; // Indicates if the CSV has a header row
+  bytes has_header = 1; // Indicates if the CSV has a header row
   bytes delimiter = 2; // Delimiter character as a byte
   bytes quote = 3; // Quote character as a byte
   bytes escape = 4; // Optional escape character as a byte
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 994703c5fcfb..a836ed5abcc5 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -4688,15 +4688,9 @@ impl serde::Serialize for CreateExternalTableNode {
         if self.if_not_exists {
             len += 1;
         }
-        if !self.delimiter.is_empty() {
-            len += 1;
-        }
         if !self.definition.is_empty() {
             len += 1;
         }
-        if self.file_compression_type != 0 {
-            len += 1;
-        }
         if !self.order_exprs.is_empty() {
             len += 1;
         }
@@ -4734,17 +4728,9 @@ impl serde::Serialize for CreateExternalTableNode {
         if self.if_not_exists {
             struct_ser.serialize_field("ifNotExists", &self.if_not_exists)?;
         }
-        if !self.delimiter.is_empty() {
-            struct_ser.serialize_field("delimiter", &self.delimiter)?;
-        }
         if !self.definition.is_empty() {
             struct_ser.serialize_field("definition", &self.definition)?;
         }
-        if self.file_compression_type != 0 {
-            let v = CompressionTypeVariant::try_from(self.file_compression_type)
-                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", self.file_compression_type)))?;
-            struct_ser.serialize_field("fileCompressionType", &v)?;
-        }
         if !self.order_exprs.is_empty() {
             struct_ser.serialize_field("orderExprs", &self.order_exprs)?;
         }
@@ -4781,10 +4767,7 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
             "tablePartitionCols",
             "if_not_exists",
             "ifNotExists",
-            "delimiter",
             "definition",
-            "file_compression_type",
-            "fileCompressionType",
             "order_exprs",
             "orderExprs",
             "unbounded",
@@ -4803,9 +4786,7 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
             Schema,
             TablePartitionCols,
             IfNotExists,
-            Delimiter,
             Definition,
-            FileCompressionType,
             OrderExprs,
             Unbounded,
             Options,
@@ -4839,9 +4820,7 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                             "schema" => Ok(GeneratedField::Schema),
                             "tablePartitionCols" | "table_partition_cols" => Ok(GeneratedField::TablePartitionCols),
                             "ifNotExists" | "if_not_exists" => Ok(GeneratedField::IfNotExists),
-                            "delimiter" => Ok(GeneratedField::Delimiter),
                             "definition" => Ok(GeneratedField::Definition),
-                            "fileCompressionType" | "file_compression_type" => Ok(GeneratedField::FileCompressionType),
                             "orderExprs" | "order_exprs" => Ok(GeneratedField::OrderExprs),
                             "unbounded" => Ok(GeneratedField::Unbounded),
                             "options" => Ok(GeneratedField::Options),
@@ -4873,9 +4852,7 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                 let mut schema__ = None;
                 let mut table_partition_cols__ = None;
                 let mut if_not_exists__ = None;
-                let mut delimiter__ = None;
                 let mut definition__ = None;
-                let mut file_compression_type__ = None;
                 let mut order_exprs__ = None;
                 let mut unbounded__ = None;
                 let mut options__ = None;
@@ -4925,24 +4902,12 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                             }
                             if_not_exists__ = Some(map_.next_value()?);
                         }
-                        GeneratedField::Delimiter => {
-                            if delimiter__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("delimiter"));
-                            }
-                            delimiter__ = Some(map_.next_value()?);
-                        }
                         GeneratedField::Definition => {
                             if definition__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("definition"));
                             }
                             definition__ = Some(map_.next_value()?);
                         }
-                        GeneratedField::FileCompressionType => {
-                            if file_compression_type__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("fileCompressionType"));
-                            }
-                            file_compression_type__ = Some(map_.next_value::<CompressionTypeVariant>()? as i32);
-                        }
                         GeneratedField::OrderExprs => {
                             if order_exprs__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("orderExprs"));
@@ -4987,9 +4952,7 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                     schema: schema__,
                     table_partition_cols: table_partition_cols__.unwrap_or_default(),
                     if_not_exists: if_not_exists__.unwrap_or_default(),
-                    delimiter: delimiter__.unwrap_or_default(),
                     definition: definition__.unwrap_or_default(),
-                    file_compression_type: file_compression_type__.unwrap_or_default(),
                     order_exprs: order_exprs__.unwrap_or_default(),
                     unbounded: unbounded__.unwrap_or_default(),
                     options: options__.unwrap_or_default(),
@@ -5459,7 +5422,7 @@ impl serde::Serialize for CsvOptions {
     {
         use serde::ser::SerializeStruct;
         let mut len = 0;
-        if self.has_header {
+        if !self.has_header.is_empty() {
             len += 1;
         }
         if !self.delimiter.is_empty() {
@@ -5496,8 +5459,9 @@ impl serde::Serialize for CsvOptions {
             len += 1;
         }
         let mut struct_ser = serializer.serialize_struct("datafusion.CsvOptions", len)?;
-        if self.has_header {
-            struct_ser.serialize_field("hasHeader", &self.has_header)?;
+        if !self.has_header.is_empty() {
+            #[allow(clippy::needless_borrow)]
+            struct_ser.serialize_field("hasHeader", pbjson::private::base64::encode(&self.has_header).as_str())?;
         }
         if !self.delimiter.is_empty() {
             #[allow(clippy::needless_borrow)]
@@ -5654,7 +5618,9 @@ impl<'de> serde::Deserialize<'de> for CsvOptions {
                             if has_header__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("hasHeader"));
                             }
-                            has_header__ = Some(map_.next_value()?);
+                            has_header__ = 
+                                Some(map_.next_value::<::pbjson::private::BytesDeserialize<_>>()?.0)
+                            ;
                         }
                         GeneratedField::Delimiter => {
                             if delimiter__.is_some() {
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index fc23a9ea05f7..3b48a8050031 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -320,7 +320,7 @@ pub struct Constraints {
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CreateExternalTableNode {
-    #[prost(message, optional, tag = "12")]
+    #[prost(message, optional, tag = "10")]
     pub name: ::core::option::Option<TableReference>,
     #[prost(string, tag = "2")]
     pub location: ::prost::alloc::string::String,
@@ -335,23 +335,19 @@ pub struct CreateExternalTableNode {
     #[prost(bool, tag = "7")]
     pub if_not_exists: bool,
     #[prost(string, tag = "8")]
-    pub delimiter: ::prost::alloc::string::String,
-    #[prost(string, tag = "9")]
     pub definition: ::prost::alloc::string::String,
-    #[prost(enumeration = "CompressionTypeVariant", tag = "17")]
-    pub file_compression_type: i32,
-    #[prost(message, repeated, tag = "13")]
+    #[prost(message, repeated, tag = "11")]
     pub order_exprs: ::prost::alloc::vec::Vec<LogicalExprNodeCollection>,
-    #[prost(bool, tag = "14")]
+    #[prost(bool, tag = "12")]
     pub unbounded: bool,
-    #[prost(map = "string, string", tag = "11")]
+    #[prost(map = "string, string", tag = "9")]
     pub options: ::std::collections::HashMap<
         ::prost::alloc::string::String,
         ::prost::alloc::string::String,
     >,
-    #[prost(message, optional, tag = "15")]
+    #[prost(message, optional, tag = "13")]
     pub constraints: ::core::option::Option<Constraints>,
-    #[prost(map = "string, message", tag = "16")]
+    #[prost(map = "string, message", tag = "14")]
     pub column_defaults: ::std::collections::HashMap<
         ::prost::alloc::string::String,
         LogicalExprNode,
@@ -1684,8 +1680,8 @@ pub struct CsvWriterOptions {
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CsvOptions {
     /// Indicates if the CSV has a header row
-    #[prost(bool, tag = "1")]
-    pub has_header: bool,
+    #[prost(bytes = "vec", tag = "1")]
+    pub has_header: ::prost::alloc::vec::Vec<u8>,
     /// Delimiter character as a byte
     #[prost(bytes = "vec", tag = "2")]
     pub delimiter: ::prost::alloc::vec::Vec<u8>,
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index cccfb0c27307..474fc93ac104 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -539,37 +539,28 @@ impl AsLogicalPlan for LogicalPlanNode {
                     column_defaults.insert(col_name.clone(), expr);
                 }
 
-                let file_compression_type = protobuf::CompressionTypeVariant::try_from(
-                    create_extern_table.file_compression_type,
-                )
-                .map_err(|_| {
-                    proto_error(format!(
-                        "Unknown file compression type {}",
-                        create_extern_table.file_compression_type
-                    ))
-                })?;
-
-                Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(CreateExternalTable {
-                    schema: pb_schema.try_into()?,
-                    name: from_table_reference(create_extern_table.name.as_ref(), "CreateExternalTable")?,
-                    location: create_extern_table.location.clone(),
-                    file_type: create_extern_table.file_type.clone(),
-                    has_header: create_extern_table.has_header,
-                    delimiter: create_extern_table.delimiter.chars().next().ok_or_else(|| {
-                        DataFusionError::Internal(String::from("Protobuf deserialization error, unable to parse CSV delimiter"))
-                    })?,
-                    table_partition_cols: create_extern_table
-                        .table_partition_cols
-                        .clone(),
-                    order_exprs,
-                    if_not_exists: create_extern_table.if_not_exists,
-                    file_compression_type: file_compression_type.into(),
-                    definition,
-                    unbounded: create_extern_table.unbounded,
-                    options: create_extern_table.options.clone(),
-                    constraints: constraints.into(),
-                    column_defaults,
-                })))
+                Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
+                    CreateExternalTable {
+                        schema: pb_schema.try_into()?,
+                        name: from_table_reference(
+                            create_extern_table.name.as_ref(),
+                            "CreateExternalTable",
+                        )?,
+                        location: create_extern_table.location.clone(),
+                        file_type: create_extern_table.file_type.clone(),
+                        has_header: create_extern_table.has_header,
+                        table_partition_cols: create_extern_table
+                            .table_partition_cols
+                            .clone(),
+                        order_exprs,
+                        if_not_exists: create_extern_table.if_not_exists,
+                        definition,
+                        unbounded: create_extern_table.unbounded,
+                        options: create_extern_table.options.clone(),
+                        constraints: constraints.into(),
+                        column_defaults,
+                    },
+                )))
             }
             LogicalPlanType::CreateView(create_view) => {
                 let plan = create_view
@@ -1328,12 +1319,10 @@ impl AsLogicalPlan for LogicalPlanNode {
                     location,
                     file_type,
                     has_header,
-                    delimiter,
                     schema: df_schema,
                     table_partition_cols,
                     if_not_exists,
                     definition,
-                    file_compression_type,
                     order_exprs,
                     unbounded,
                     options,
@@ -1359,9 +1348,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                         .insert(col_name.clone(), serialize_expr(expr, extension_codec)?);
                 }
 
-                let file_compression_type =
-                    protobuf::CompressionTypeVariant::from(file_compression_type);
-
                 Ok(protobuf::LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::CreateExternalTable(
                         protobuf::CreateExternalTableNode {
@@ -1372,10 +1358,8 @@ impl AsLogicalPlan for LogicalPlanNode {
                             schema: Some(df_schema.try_into()?),
                             table_partition_cols: table_partition_cols.clone(),
                             if_not_exists: *if_not_exists,
-                            delimiter: String::from(*delimiter),
                             order_exprs: converted_order_exprs,
                             definition: definition.clone().unwrap_or_default(),
-                            file_compression_type: file_compression_type.into(),
                             unbounded: *unbounded,
                             options: options.clone(),
                             constraints: Some(constraints.clone().into()),
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 33e632b0d942..6e568c7fb872 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -806,7 +806,7 @@ impl TryFrom<&protobuf::CsvOptions> for CsvOptions {
 
     fn try_from(proto_opts: &protobuf::CsvOptions) -> Result<Self, Self::Error> {
         Ok(CsvOptions {
-            has_header: proto_opts.has_header,
+            has_header: proto_opts.has_header.first().map(|h| *h != 0),
             delimiter: proto_opts.delimiter[0],
             quote: proto_opts.quote[0],
             escape: proto_opts.escape.first().copied(),
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index a0a0ee72054b..cddc8ddcc227 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -980,7 +980,10 @@ impl TryFrom<&CsvOptions> for protobuf::CsvOptions {
     fn try_from(opts: &CsvOptions) -> Result<Self, Self::Error> {
         let compression: protobuf::CompressionTypeVariant = opts.compression.into();
         Ok(protobuf::CsvOptions {
-            has_header: opts.has_header,
+            has_header: opts
+                .has_header
+                .map(|h| h as u8)
+                .map_or_else(Vec::new, |e| vec![e]),
             delimiter: vec![opts.delimiter],
             quote: vec![opts.quote],
             escape: opts.escape.map_or_else(Vec::new, |e| vec![e]),
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 5a999ab21d30..607276f47b73 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -198,8 +198,6 @@ pub struct CreateExternalTable {
     pub file_type: String,
     /// CSV Header row?
     pub has_header: bool,
-    /// User defined delimiter for CSVs
-    pub delimiter: char,
     /// Path to file
     pub location: String,
     /// Partition Columns
@@ -208,8 +206,6 @@ pub struct CreateExternalTable {
     pub order_exprs: Vec<LexOrdering>,
     /// Option to not error if table already exists
     pub if_not_exists: bool,
-    /// File compression type (GZIP, BZIP2, XZ)
-    pub file_compression_type: CompressionTypeVariant,
     /// Infinite streams?
     pub unbounded: bool,
     /// Table(provider) specific options
@@ -816,14 +812,10 @@ impl<'a> DFParser<'a> {
             columns,
             file_type: builder.file_type.unwrap(),
             has_header: builder.has_header.unwrap_or(false),
-            delimiter: builder.delimiter.unwrap_or(','),
             location: builder.location.unwrap(),
             table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
             order_exprs: builder.order_exprs,
             if_not_exists,
-            file_compression_type: builder
-                .file_compression_type
-                .unwrap_or(CompressionTypeVariant::UNCOMPRESSED),
             unbounded,
             options: builder.options.unwrap_or(HashMap::new()),
             constraints,
@@ -917,7 +909,6 @@ mod tests {
     use super::*;
     use sqlparser::ast::Expr::Identifier;
     use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident};
-    use CompressionTypeVariant::UNCOMPRESSED;
 
     fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
         let statements = DFParser::parse_sql(sql)?;
@@ -970,12 +961,10 @@ mod tests {
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -989,12 +978,10 @@ mod tests {
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1009,12 +996,10 @@ mod tests {
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1022,21 +1007,22 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // positive case with delimiter
-        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV DELIMITER '|' LOCATION 'foo.csv'";
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.delimiter' '|')";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: "t".into(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: '|',
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec!["format.delimiter".to_string()]
+                .into_iter()
+                .zip(vec!["|".to_string()])
+                .collect(),
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1049,12 +1035,10 @@ mod tests {
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1072,12 +1056,10 @@ mod tests {
                 columns: vec![make_column_def("c1", DataType::Int(display))],
                 file_type: "CSV".to_string(),
                 has_header: true,
-                delimiter: ',',
                 location: "foo.csv".into(),
                 table_partition_cols: vec![],
                 order_exprs: vec![],
                 if_not_exists: false,
-                file_compression_type: UNCOMPRESSED,
                 unbounded: false,
                 options: HashMap::new(),
                 constraints: vec![],
@@ -1087,27 +1069,30 @@ mod tests {
 
         // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
         let sqls = vec![
-            ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE GZIP LOCATION 'foo.csv'", "GZIP"),
-            ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE BZIP2 LOCATION 'foo.csv'", "BZIP2"),
-            ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE XZ LOCATION 'foo.csv'", "XZ"),
-            ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE ZSTD LOCATION 'foo.csv'", "ZSTD"),
-        ];
-        for (sql, file_compression_type) in sqls {
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ('format.key.compression' 'GZIP')", "GZIP"),
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ('format.key.compression' 'BZIP2')", "BZIP2"),
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ('format.key.compression' 'XZ')", "XZ"),
+             ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
+             ('format.key.compression' 'ZSTD')", "ZSTD"),
+         ];
+        for (sql, compression) in sqls {
             let expected = Statement::CreateExternalTable(CreateExternalTable {
                 name: "t".into(),
                 columns: vec![make_column_def("c1", DataType::Int(display))],
                 file_type: "CSV".to_string(),
                 has_header: false,
-                delimiter: ',',
                 location: "foo.csv".into(),
                 table_partition_cols: vec![],
                 order_exprs: vec![],
                 if_not_exists: false,
-                file_compression_type: CompressionTypeVariant::from_str(
-                    file_compression_type,
-                )?,
                 unbounded: false,
-                options: HashMap::new(),
+                options: vec!["format.key.compression".to_string()]
+                    .into_iter()
+                    .zip(vec![compression.to_string()])
+                    .collect(),
                 constraints: vec![],
             });
             expect_parse_ok(sql, expected)?;
@@ -1120,12 +1105,10 @@ mod tests {
             columns: vec![],
             file_type: "PARQUET".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.parquet".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1139,12 +1122,10 @@ mod tests {
             columns: vec![],
             file_type: "PARQUET".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.parquet".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1158,12 +1139,10 @@ mod tests {
             columns: vec![],
             file_type: "AVRO".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.avro".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1178,12 +1157,10 @@ mod tests {
             columns: vec![],
             file_type: "PARQUET".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.parquet".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: true,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1201,12 +1178,10 @@ mod tests {
             ],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec!["p1".to_string()],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1231,12 +1206,10 @@ mod tests {
             columns: vec![],
             file_type: "X".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "blahblah".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::from([("k1".into(), "v1".into())]),
             constraints: vec![],
@@ -1251,12 +1224,10 @@ mod tests {
             columns: vec![],
             file_type: "X".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "blahblah".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::from([
                 ("k1".into(), "v1".into()),
@@ -1293,7 +1264,6 @@ mod tests {
                 columns: vec![make_column_def("c1", DataType::Int(None))],
                 file_type: "CSV".to_string(),
                 has_header: false,
-                delimiter: ',',
                 location: "foo.csv".into(),
                 table_partition_cols: vec![],
                 order_exprs: vec![vec![OrderByExpr {
@@ -1305,7 +1275,6 @@ mod tests {
                     nulls_first,
                 }]],
                 if_not_exists: false,
-                file_compression_type: UNCOMPRESSED,
                 unbounded: false,
                 options: HashMap::new(),
                 constraints: vec![],
@@ -1324,7 +1293,6 @@ mod tests {
             ],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![vec![
@@ -1346,7 +1314,6 @@ mod tests {
                 },
             ]],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1364,7 +1331,6 @@ mod tests {
             ],
             file_type: "CSV".to_string(),
             has_header: false,
-            delimiter: ',',
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![vec![OrderByExpr {
@@ -1383,7 +1349,6 @@ mod tests {
                 nulls_first: None,
             }]],
             if_not_exists: false,
-            file_compression_type: UNCOMPRESSED,
             unbounded: false,
             options: HashMap::new(),
             constraints: vec![],
@@ -1394,14 +1359,14 @@ mod tests {
         let sql = "
             CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
             STORED AS PARQUET
-            DELIMITER '*'
             WITH HEADER ROW
             WITH ORDER (c1 - c2 ASC)
-            COMPRESSION TYPE zstd
             PARTITIONED BY (c1)
             LOCATION 'foo.parquet'
-            OPTIONS (ROW_GROUP_SIZE '1024', 'TRUNCATE' 'NO')
-        ";
+            OPTIONS (ROW_GROUP_SIZE '1024', 'TRUNCATE' 'NO',
+                     'format.compression' 'zstd',
+                     'format.delimiter' '*')
+                     ";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: "t".into(),
             columns: vec![
@@ -1410,7 +1375,6 @@ mod tests {
             ],
             file_type: "PARQUET".to_string(),
             has_header: true,
-            delimiter: '*',
             location: "foo.parquet".into(),
             table_partition_cols: vec!["c1".into()],
             order_exprs: vec![vec![OrderByExpr {
@@ -1429,9 +1393,10 @@ mod tests {
                 nulls_first: None,
             }]],
             if_not_exists: true,
-            file_compression_type: CompressionTypeVariant::ZSTD,
             unbounded: true,
             options: HashMap::from([
+                ("format.compression".into(), "zstd".into()),
+                ("format.delimiter".into(), "*".into()),
                 ("ROW_GROUP_SIZE".into(), "1024".into()),
                 ("TRUNCATE".into(), "NO".into()),
             ]),
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 137bb5fb20b7..ce440cace3cc 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -969,11 +969,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             columns,
             file_type,
             has_header,
-            delimiter,
             location,
             table_partition_cols,
             if_not_exists,
-            file_compression_type,
             order_exprs,
             unbounded,
             options,
@@ -985,8 +983,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let inline_constraints = calc_inline_constraints_from_columns(&columns);
         all_constraints.extend(inline_constraints);
 
+        let compression = options
+            .get("format.compression")
+            .map(|comp| CompressionTypeVariant::from_str(comp))
+            .transpose()?;
         if (file_type == "PARQUET" || file_type == "AVRO" || file_type == "ARROW")
-            && file_compression_type != CompressionTypeVariant::UNCOMPRESSED
+            && compression
+                .map(|comp| comp != CompressionTypeVariant::UNCOMPRESSED)
+                .unwrap_or(false)
         {
             plan_err!(
                 "File compression type cannot be set for PARQUET, AVRO, or ARROW files."
@@ -1018,11 +1022,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 location,
                 file_type,
                 has_header,
-                delimiter,
                 table_partition_cols,
                 if_not_exists,
                 definition,
-                file_compression_type,
                 order_exprs: ordered_exprs,
                 unbounded,
                 options,
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 1a300d11b0b2..781d2ce5bb06 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1903,12 +1903,12 @@ fn create_external_table_csv_no_schema() {
 fn create_external_table_with_compression_type() {
     // positive case
     let sqls = vec![
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE GZIP LOCATION 'foo.csv.gz'",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE BZIP2 LOCATION 'foo.csv.bz2'",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE GZIP LOCATION 'foo.json.gz'",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON COMPRESSION TYPE BZIP2 LOCATION 'foo.json.bz2'",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD COMPRESSION TYPE GZIP LOCATION 'foo.unk'",
-        ];
+        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv.gz' OPTIONS ('format.compression' 'gzip')",
+        "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv.bz2' OPTIONS ('format.compression' 'bzip2')",
+        "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.gz' OPTIONS ('format.compression' 'gzip')",
+        "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.bz2' OPTIONS ('format.compression' 'bzip2')",
+        "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD LOCATION 'foo.unk' OPTIONS ('format.compression' 'gzip')",
+         ];
     for sql in sqls {
         let expected = "CreateExternalTable: Bare { table: \"t\" }";
         quick_test(sql, expected);
@@ -1916,12 +1916,12 @@ fn create_external_table_with_compression_type() {
 
     // negative case
     let sqls = vec![
-        "CREATE EXTERNAL TABLE t STORED AS AVRO COMPRESSION TYPE GZIP LOCATION 'foo.avro'",
-        "CREATE EXTERNAL TABLE t STORED AS AVRO COMPRESSION TYPE BZIP2 LOCATION 'foo.avro'",
-        "CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE GZIP LOCATION 'foo.parquet'",
-        "CREATE EXTERNAL TABLE t STORED AS PARQUET COMPRESSION TYPE BZIP2 LOCATION 'foo.parquet'",
-        "CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE GZIP LOCATION 'foo.arrow'",
-        "CREATE EXTERNAL TABLE t STORED AS ARROW COMPRESSION TYPE BZIP2 LOCATION 'foo.arrow'",
+        "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro' OPTIONS ('format.compression' 'gzip')",
+        "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro' OPTIONS ('format.compression' 'bzip2')",
+        "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet' OPTIONS ('format.compression' 'gzip')",
+        "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet' OPTIONS ('format.compression' 'bzip2')",
+        "CREATE EXTERNAL TABLE t STORED AS ARROW LOCATION 'foo.arrow' OPTIONS ('format.compression' 'gzip')",
+        "CREATE EXTERNAL TABLE t STORED AS ARROW LOCATION 'foo.arrow' OPTIONS ('format.compression' 'bzip2')",
     ];
     for sql in sqls {
         let err = logical_plan(sql).expect_err("query should have failed");
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 882a4e220758..00bcea7ec154 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -384,7 +384,7 @@ COPY source_table  to 'test_files/scratch/copy/table_json_gz' STORED AS JSON OPT
 
 # validate folder of csv files
 statement ok
-CREATE EXTERNAL TABLE validate_json_gz STORED AS json COMPRESSION TYPE gzip LOCATION 'test_files/scratch/copy/table_json_gz';
+CREATE EXTERNAL TABLE validate_json_gz STORED AS json LOCATION 'test_files/scratch/copy/table_json_gz' OPTIONS ('format.compression' 'gzip');
 
 query IT
 select * from validate_json_gz;
@@ -400,7 +400,7 @@ COPY source_table  to 'test_files/scratch/copy/table_csv' STORED AS CSV OPTIONS
 
 # validate folder of csv files
 statement ok
-CREATE EXTERNAL TABLE validate_csv STORED AS csv COMPRESSION TYPE gzip LOCATION 'test_files/scratch/copy/table_csv';
+CREATE EXTERNAL TABLE validate_csv STORED AS csv LOCATION 'test_files/scratch/copy/table_csv' OPTIONS ('format.compression' 'gzip');
 
 query IT
 select * from validate_csv;
@@ -416,7 +416,7 @@ COPY source_table  to 'test_files/scratch/copy/table.csv';
 
 # Validate single csv output
 statement ok
-CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW LOCATION 'test_files/scratch/copy/table.csv';
+CREATE EXTERNAL TABLE validate_single_csv STORED AS csv LOCATION 'test_files/scratch/copy/table.csv' OPTIONS ('format.has_header' 'false');
 
 query IT
 select * from validate_single_csv;
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index be846028d545..1c501634fef5 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -22,9 +22,9 @@ c1 VARCHAR,
 c2 VARCHAR
 ) STORED AS CSV
 WITH HEADER ROW
-DELIMITER ','
-OPTIONS ('format.quote' '~')
-LOCATION '../core/tests/data/quote.csv';
+LOCATION '../core/tests/data/quote.csv'
+OPTIONS ('format.quote' '~',
+         'format.delimiter' ',');
 
 statement ok
 CREATE EXTERNAL TABLE csv_with_escape (
@@ -32,8 +32,8 @@ c1 VARCHAR,
 c2 VARCHAR
 ) STORED AS CSV
 WITH HEADER ROW
-DELIMITER ','
-OPTIONS ('format.escape' '\')
+OPTIONS ('format.escape' '\',
+         'format.delimiter' ',')
 LOCATION '../core/tests/data/escape.csv';
 
 query TT
@@ -70,8 +70,8 @@ c1 VARCHAR,
 c2 VARCHAR
 ) STORED AS CSV
 WITH HEADER ROW
-DELIMITER ','
-OPTIONS ('format.escape' '"')
+OPTIONS ('format.escape' '"',
+        'format.delimiter' ',')
 LOCATION '../core/tests/data/escape.csv';
 
 # TODO: Validate this with better data.
@@ -136,8 +136,8 @@ CREATE EXTERNAL TABLE partitioned_table (
   partition_col INT
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION 'test_files/scratch/csv_files/csv_partitions';
+LOCATION 'test_files/scratch/csv_files/csv_partitions'
+OPTIONS ('format.has_header' 'false');
 
 query ITII
 SELECT * FROM partitioned_table ORDER BY int_col;
@@ -160,4 +160,4 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST]
 02)--SortExec: expr=[int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CsvExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]}, projection=[int_col, string_col, bigint_col, partition_col], has_header=true
+03)----CsvExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]}, projection=[int_col, string_col, bigint_col, partition_col], has_header=false
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index 682972b5572a..26b0e5561806 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -535,7 +535,7 @@ DROP VIEW y;
 # create_pipe_delimited_csv_table()
 
 statement ok
-CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW DELIMITER '|' LOCATION '../core/tests/data/aggregate_simple_pipe.csv';
+CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple_pipe.csv' OPTIONS ('format.delimiter' '|');
 
 
 query RRB
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index faaa593d2c17..2238a4aaeaf4 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -529,9 +529,8 @@ CREATE UNBOUNDED EXTERNAL TABLE data (
     "column1"  INTEGER,
     "column2" VARCHAR,
 ) STORED AS CSV
-WITH HEADER ROW
 WITH ORDER ("column1", "column2")
-LOCATION 'test_files/scratch/limit/data.csv';
+LOCATION 'test_files/scratch/limit/data.csv' OPTIONS ('format.has_header' 'false');
 
 query IT
 SELECT * from data LIMIT 3;
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 0b43b6e31aaf..3f00bd2bb4ea 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -627,7 +627,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS orders (
         o_clerk VARCHAR,
         o_shippriority INTEGER,
         o_comment VARCHAR,
-) STORED AS CSV  WITH ORDER (o_orderkey ASC) DELIMITER ',' WITH HEADER ROW LOCATION '../core/tests/tpch-csv/orders.csv';
+) STORED AS CSV  WITH ORDER (o_orderkey ASC) WITH HEADER ROW LOCATION '../core/tests/tpch-csv/orders.csv'
+OPTIONS ('format.delimiter' ',');
 
 query TT
 EXPLAIN SELECT o_orderkey, o_orderstatus FROM orders ORDER BY o_orderkey ASC
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
index f933c90acc73..50567a5ef5c1 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
@@ -41,8 +41,7 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    DELIMITER ','
-    CSV HEADER;
+    CSV HEADER OPTIONS ('format.delimiter' ',');
 
 
 ###
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
index b01ea73c8056..a0593433c16f 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
@@ -43,8 +43,7 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    DELIMITER ','
-    CSV HEADER;
+    CSV HEADER OPTIONS ('format.delimiter' ',');
 
 ###
 ## Setup test for datafusion
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
index 05343de32268..2641d2ee8379 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
@@ -40,8 +40,7 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    DELIMITER ','
-    CSV HEADER;
+    CSV HEADER OPTIONS ('format.delimiter' ',');
 
 ###
 ## Setup test for datafusion
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
index cec51d472075..de780adf6f32 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
@@ -40,8 +40,7 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    DELIMITER ','
-    CSV HEADER;
+    CSV HEADER OPTIONS ('format.delimiter' ',');
 
 ###
 ## Setup test for datafusion
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index caf79abcfa4e..12f3ebccea3c 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -621,7 +621,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipinstruct VARCHAR,
         l_shipmode VARCHAR,
         l_comment VARCHAR,
-) STORED AS CSV DELIMITER ',' WITH HEADER ROW LOCATION '../core/tests/tpch-csv/lineitem.csv';
+) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS part (
@@ -634,7 +634,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS part (
         p_container VARCHAR,
         p_retailprice DECIMAL(15, 2),
         p_comment VARCHAR,
-) STORED AS CSV DELIMITER ',' WITH HEADER ROW LOCATION '../core/tests/tpch-csv/part.csv';
+) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/part.csv' OPTIONS ('format.delimiter' ',');
 
 query TT
 EXPLAIN SELECT l_partkey FROM
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 4a620a695fbf..6ce7d7211202 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -158,13 +158,13 @@ DROP TABLE parquet_table_with_order;
 # create a single csv file
 statement ok
 COPY  (VALUES (1), (2), (3), (4), (5)) TO 'test_files/scratch/repartition_scan/csv_table/1.csv'
-STORED AS CSV WITH HEADER ROW;
+STORED AS CSV OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE csv_table(column1 int)
 STORED AS csv
-WITH HEADER ROW
-LOCATION 'test_files/scratch/repartition_scan/csv_table/';
+LOCATION 'test_files/scratch/repartition_scan/csv_table/' OPTIONS
+    ('format.has_header' 'true');
 
 query I
 select * from csv_table ORDER BY column1;
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 7196418af1c7..85f85c3a524c 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -66,7 +66,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS customer (
         c_acctbal DECIMAL(15, 2),
         c_mktsegment VARCHAR,
         c_comment VARCHAR,
-) STORED AS CSV DELIMITER ',' WITH HEADER ROW LOCATION '../core/tests/tpch-csv/customer.csv';
+) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/customer.csv' OPTIONS ('format.delimiter' ',');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS orders (
@@ -79,7 +79,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS orders (
         o_clerk VARCHAR,
         o_shippriority INTEGER,
         o_comment VARCHAR,
-) STORED AS CSV DELIMITER ',' WITH HEADER ROW LOCATION '../core/tests/tpch-csv/orders.csv';
+) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/orders.csv' OPTIONS ('format.delimiter' ',');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
@@ -99,7 +99,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipinstruct VARCHAR,
         l_shipmode VARCHAR,
         l_comment VARCHAR,
-) STORED AS CSV DELIMITER ',' WITH HEADER ROW LOCATION '../core/tests/tpch-csv/lineitem.csv';
+) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',');
 
 # in_subquery_to_join_with_correlated_outer_filter
 query ITI rowsort
diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index 2f5e2d5a7616..75bcbc198bef 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -31,7 +31,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS supplier (
         s_acctbal DECIMAL(15, 2),
         s_comment VARCHAR,
         s_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/supplier.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/supplier.tbl' OPTIONS ('format.delimiter' '|');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS part (
@@ -45,7 +45,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS part (
         p_retailprice DECIMAL(15, 2),
         p_comment VARCHAR,
         p_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/part.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/part.tbl' OPTIONS ('format.delimiter' '|');
 
 
 statement ok
@@ -56,7 +56,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS partsupp (
         ps_supplycost DECIMAL(15, 2),
         ps_comment VARCHAR,
         ps_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/partsupp.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/partsupp.tbl' OPTIONS ('format.delimiter' '|');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS customer (
@@ -69,7 +69,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS customer (
         c_mktsegment VARCHAR,
         c_comment VARCHAR,
         c_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/customer.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/customer.tbl' OPTIONS ('format.delimiter' '|');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS orders (
@@ -83,7 +83,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS orders (
         o_shippriority INTEGER,
         o_comment VARCHAR,
         o_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/orders.tbl';
+)  STORED AS CSV LOCATION 'test_files/tpch/data/orders.tbl' OPTIONS ('format.delimiter' '|');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
@@ -104,7 +104,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipmode VARCHAR,
         l_comment VARCHAR,
         l_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/lineitem.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/lineitem.tbl' OPTIONS ('format.delimiter' '|');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS nation (
@@ -113,7 +113,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS nation (
         n_regionkey BIGINT,
         n_comment VARCHAR,
         n_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/nation.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/nation.tbl' OPTIONS ('format.delimiter' '|');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS region (
@@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
         r_name VARCHAR,
         r_comment VARCHAR,
         r_rev VARCHAR,
-) STORED AS CSV DELIMITER '|' LOCATION 'test_files/tpch/data/region.tbl';
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');

From d1611a9b9b98ba40d0116da8e8b7e608c3befc30 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Tue, 7 May 2024 14:40:05 +0300
Subject: [PATCH 02/15] Keep PG copy from tests same

---
 .../sqllogictest/test_files/pg_compat/pg_compat_null.slt       | 3 ++-
 .../sqllogictest/test_files/pg_compat/pg_compat_simple.slt     | 3 ++-
 .../sqllogictest/test_files/pg_compat/pg_compat_union.slt      | 3 ++-
 .../sqllogictest/test_files/pg_compat/pg_compat_window.slt     | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
index 50567a5ef5c1..f933c90acc73 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
@@ -41,7 +41,8 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    CSV HEADER OPTIONS ('format.delimiter' ',');
+    DELIMITER ','
+    CSV HEADER;
 
 
 ###
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
index a0593433c16f..b01ea73c8056 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
@@ -43,7 +43,8 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    CSV HEADER OPTIONS ('format.delimiter' ',');
+    DELIMITER ','
+    CSV HEADER;
 
 ###
 ## Setup test for datafusion
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
index 2641d2ee8379..05343de32268 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
@@ -40,7 +40,8 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    CSV HEADER OPTIONS ('format.delimiter' ',');
+    DELIMITER ','
+    CSV HEADER;
 
 ###
 ## Setup test for datafusion
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
index de780adf6f32..cec51d472075 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
@@ -40,7 +40,8 @@ onlyif postgres
 statement ok
 COPY aggregate_test_100_by_sql
     FROM '../../testing/data/csv/aggregate_test_100.csv'
-    CSV HEADER OPTIONS ('format.delimiter' ',');
+    DELIMITER ','
+    CSV HEADER;
 
 ###
 ## Setup test for datafusion

From 99fae814fb25375195738e8d4c348c8ad98672c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Berkay=20=C5=9Eahin?=
 <124376117+berkaysynnada@users.noreply.github.com>
Date: Wed, 8 May 2024 09:47:17 +0300
Subject: [PATCH 03/15] Update datafusion/common/src/config.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/config.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 1afac3549117..547bcfac7df3 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1564,6 +1564,8 @@ config_namespace_with_hashmap! {
 config_namespace! {
     /// Options controlling CSV format
     pub struct CsvOptions {
+        /// If the first line of the CSV is column names. 
+        /// If not specified, uses default from `CREATE TABLE` command, if any. 
         pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'

From 13c4db64e787fb79a2e42bc76c376e0ed9eb86c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Berkay=20=C5=9Eahin?=
 <124376117+berkaysynnada@users.noreply.github.com>
Date: Wed, 8 May 2024 09:48:24 +0300
Subject: [PATCH 04/15] Update
 datafusion/core/src/datasource/file_format/csv.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/datasource/file_format/csv.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 2e5054d4366c..874bc2cf4e5d 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -819,7 +819,7 @@ mod tests {
         file_name: &str,
         projection: Option<Vec<usize>>,
         limit: Option<usize>,
-        hash_header: bool,
+        has_header: bool,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let root = format!("{}/csv", crate::test_util::arrow_test_data());
         let mut format = CsvFormat::default();

From c613a336f3dfb27d6f501f431e8da967b7a20801 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Thu, 9 May 2024 11:34:40 +0300
Subject: [PATCH 05/15] Remove WITH HEADER ROW

---
 datafusion/common/src/config.rs               | 13 +--
 datafusion/core/src/catalog/listing_schema.rs |  4 -
 .../core/src/datasource/file_format/csv.rs    | 19 ++--
 .../core/src/datasource/listing/table.rs      | 12 ---
 .../src/datasource/listing_table_factory.rs   | 18 +---
 datafusion/core/src/datasource/stream.rs      |  1 -
 datafusion/core/src/execution/context/mod.rs  | 11 +--
 datafusion/core/tests/fifo.rs                 |  8 +-
 datafusion/core/tests/sql/mod.rs              |  4 +-
 datafusion/expr/src/logical_plan/ddl.rs       |  3 -
 datafusion/proto/proto/datafusion.proto       | 21 ++--
 datafusion/proto/src/generated/pbjson.rs      | 18 ----
 datafusion/proto/src/generated/prost.rs       | 22 ++---
 datafusion/proto/src/logical_plan/mod.rs      |  3 -
 .../tests/cases/roundtrip_logical_plan.rs     |  8 +-
 datafusion/sql/src/parser.rs                  | 95 ++++---------------
 datafusion/sql/src/statement.rs               |  2 -
 .../test_files/agg_func_substitute.slt        |  4 +-
 .../sqllogictest/test_files/aggregate.slt     | 10 +-
 datafusion/sqllogictest/test_files/avro.slt   | 18 ++--
 .../test_files/create_external_table.slt      | 12 ---
 .../sqllogictest/test_files/csv_files.slt     | 12 +--
 datafusion/sqllogictest/test_files/cte.slt    | 10 +-
 datafusion/sqllogictest/test_files/ddl.slt    | 24 ++---
 .../sqllogictest/test_files/decimal.slt       |  8 +-
 .../sqllogictest/test_files/describe.slt      |  4 +-
 .../sqllogictest/test_files/distinct_on.slt   |  2 +-
 datafusion/sqllogictest/test_files/errors.slt |  2 +-
 .../sqllogictest/test_files/explain.slt       | 14 +--
 datafusion/sqllogictest/test_files/expr.slt   |  2 +-
 .../sqllogictest/test_files/functions.slt     |  2 +-
 datafusion/sqllogictest/test_files/group.slt  |  4 +-
 .../sqllogictest/test_files/group_by.slt      | 26 ++---
 .../sqllogictest/test_files/identifiers.slt   |  2 +-
 .../test_files/information_schema.slt         |  3 +-
 datafusion/sqllogictest/test_files/insert.slt |  2 +-
 .../test_files/insert_to_external.slt         |  2 +-
 .../join_disable_repartition_joins.slt        |  4 +-
 datafusion/sqllogictest/test_files/joins.slt  |  8 +-
 datafusion/sqllogictest/test_files/limit.slt  |  2 +-
 datafusion/sqllogictest/test_files/math.slt   |  2 +-
 .../test_files/monotonic_projection_test.slt  |  4 +-
 datafusion/sqllogictest/test_files/order.slt  | 12 +--
 .../sqllogictest/test_files/parquet.slt       | 30 +++---
 .../test_files/pg_compat/pg_compat_null.slt   |  2 +-
 .../test_files/pg_compat/pg_compat_simple.slt |  2 +-
 .../test_files/pg_compat/pg_compat_union.slt  |  2 +-
 .../test_files/pg_compat/pg_compat_window.slt |  2 +-
 .../sqllogictest/test_files/predicates.slt    |  8 +-
 .../sqllogictest/test_files/projection.slt    |  4 +-
 .../sqllogictest/test_files/references.slt    |  2 +-
 .../sqllogictest/test_files/repartition.slt   |  4 +-
 .../test_files/repartition_scan.slt           |  2 +-
 datafusion/sqllogictest/test_files/scalar.slt |  2 +-
 datafusion/sqllogictest/test_files/select.slt |  8 +-
 .../sqllogictest/test_files/subquery.slt      |  6 +-
 datafusion/sqllogictest/test_files/topk.slt   |  2 +-
 datafusion/sqllogictest/test_files/union.slt  | 10 +-
 .../sqllogictest/test_files/wildcard.slt      |  2 +-
 datafusion/sqllogictest/test_files/window.slt | 29 +++---
 docs/source/user-guide/cli/datasources.md     |  4 +-
 docs/source/user-guide/sql/ddl.md             | 23 ++---
 docs/source/user-guide/sql/write_options.md   |  4 +-
 63 files changed, 233 insertions(+), 372 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 547bcfac7df3..49d31257c8ba 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1564,9 +1564,9 @@ config_namespace_with_hashmap! {
 config_namespace! {
     /// Options controlling CSV format
     pub struct CsvOptions {
-        /// If the first line of the CSV is column names. 
-        /// If not specified, uses default from `CREATE TABLE` command, if any. 
-        pub has_header: Option<bool>, default = None
+        /// If the first line of the CSV is column names.
+        /// If not specified, uses default from `CREATE TABLE` command, if any.
+        pub has_header: Option<bool>, default =None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
         pub escape: Option<u8>, default = None
@@ -1606,9 +1606,10 @@ impl CsvOptions {
         self
     }
 
-    /// True if the first line is a header.
-    pub fn has_header(&self) -> bool {
-        self.has_header.unwrap_or(false)
+    /// True if the first line is a header. If the condition of having header
+    /// is not set by the format options, session state decides it.
+    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
+        self.has_header.unwrap_or(config_opt.catalog.has_header)
     }
 
     /// The character separating values within a row.
diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog/listing_schema.rs
index 15e6764b1033..29f3e4ad8181 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog/listing_schema.rs
@@ -57,7 +57,6 @@ pub struct ListingSchemaProvider {
     store: Arc<dyn ObjectStore>,
     tables: Arc<Mutex<HashMap<String, Arc<dyn TableProvider>>>>,
     format: String,
-    has_header: bool,
 }
 
 impl ListingSchemaProvider {
@@ -76,7 +75,6 @@ impl ListingSchemaProvider {
         factory: Arc<dyn TableProviderFactory>,
         store: Arc<dyn ObjectStore>,
         format: String,
-        has_header: bool,
     ) -> Self {
         Self {
             authority,
@@ -85,7 +83,6 @@ impl ListingSchemaProvider {
             store,
             tables: Arc::new(Mutex::new(HashMap::new())),
             format,
-            has_header,
         }
     }
 
@@ -138,7 +135,6 @@ impl ListingSchemaProvider {
                             name,
                             location: table_url,
                             file_type: self.format.clone(),
-                            has_header: self.has_header,
                             table_partition_cols: vec![],
                             if_not_exists: false,
                             definition: None,
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 874bc2cf4e5d..ab2582b4e820 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -39,7 +39,7 @@ use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use arrow::datatypes::SchemaRef;
 use arrow::datatypes::{DataType, Field, Fields, Schema};
-use datafusion_common::config::CsvOptions;
+use datafusion_common::config::{ConfigOptions, CsvOptions};
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
 use datafusion_execution::TaskContext;
@@ -141,8 +141,8 @@ impl CsvFormat {
     }
 
     /// True if the first line is a header.
-    pub fn has_header(&self) -> bool {
-        self.options.has_header.unwrap_or(false)
+    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
+        self.options.has_header(config_opt)
     }
 
     /// The character separating values within a row.
@@ -242,9 +242,9 @@ impl FileFormat for CsvFormat {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let exec = CsvExec::new(
             conf,
-            self.options
-                .has_header
-                .unwrap_or(state.config().options().catalog.has_header),
+            // If the condition of having header is not set by
+            // the format options, session state decides it.
+            self.options.has_header(state.config_options()),
             self.options.delimiter,
             self.options.quote,
             self.options.escape,
@@ -302,10 +302,7 @@ impl CsvFormat {
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
                 .with_header(
-                    self.options
-                        .has_header
-                        .unwrap_or(state.config_options().catalog.has_header)
-                        && first_chunk,
+                    self.options.has_header(state.config_options()) && first_chunk,
                 )
                 .with_delimiter(self.options.delimiter);
 
@@ -823,7 +820,7 @@ mod tests {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let root = format!("{}/csv", crate::test_util::arrow_test_data());
         let mut format = CsvFormat::default();
-        format = format.with_has_header(hash_header);
+        format = format.with_has_header(has_header);
         scan_format(state, &format, &root, file_name, projection, limit).await
     }
 
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index e6f763f52426..7488091fff70 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -1561,18 +1561,6 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
-    async fn test_insert_into_sql_csv_defaults_header_row() -> Result<()> {
-        helper_test_insert_into_sql(
-            "csv",
-            FileCompressionType::UNCOMPRESSED,
-            "WITH HEADER ROW",
-            None,
-        )
-        .await?;
-        Ok(())
-    }
-
     #[tokio::test]
     async fn test_insert_into_sql_json_defaults() -> Result<()> {
         helper_test_insert_into_sql("json", FileCompressionType::UNCOMPRESSED, "", None)
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 8380808d5f3b..1d33944330da 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -34,7 +34,7 @@ use crate::datasource::TableProvider;
 use crate::execution::context::SessionState;
 
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::{arrow_datafusion_err, plan_err, DataFusionError, FileType};
+use datafusion_common::{arrow_datafusion_err, DataFusionError, FileType};
 use datafusion_expr::CreateExternalTable;
 
 use async_trait::async_trait;
@@ -66,15 +66,7 @@ impl TableProviderFactory for ListingTableFactory {
         let file_extension = get_extension(cmd.location.as_str());
         let file_format: Arc<dyn FileFormat> = match file_type {
             FileType::CSV => {
-                let mut csv_options = table_options.csv;
-                if let Some(has_header) = csv_options.has_header {
-                    if cmd.has_header && !has_header {
-                        return plan_err!("Conflicting header options for CSV file");
-                    }
-                } else {
-                    csv_options.has_header = Some(cmd.has_header);
-                }
-                Arc::new(CsvFormat::default().with_options(csv_options))
+                Arc::new(CsvFormat::default().with_options(table_options.csv))
             }
             #[cfg(feature = "parquet")]
             FileType::PARQUET => {
@@ -192,14 +184,13 @@ mod tests {
             name,
             location: csv_file.path().to_str().unwrap().to_string(),
             file_type: "csv".to_string(),
-            has_header: true,
             schema: Arc::new(DFSchema::empty()),
             table_partition_cols: vec![],
             if_not_exists: false,
             definition: None,
             order_exprs: vec![],
             unbounded: false,
-            options: HashMap::new(),
+            options: HashMap::from([("format.has_header".into(), "true".into())]),
             constraints: Constraints::empty(),
             column_defaults: HashMap::new(),
         };
@@ -231,14 +222,13 @@ mod tests {
             name,
             location: csv_file.path().to_str().unwrap().to_string(),
             file_type: "csv".to_string(),
-            has_header: true,
             schema: Arc::new(DFSchema::empty()),
             table_partition_cols: vec![],
             if_not_exists: false,
             definition: None,
             order_exprs: vec![],
             unbounded: false,
-            options,
+            options: HashMap::from([("format.has_header".into(), "true".into())]),
             constraints: Constraints::empty(),
             column_defaults: HashMap::new(),
         };
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 0a64e7c516ef..5e485f42b516 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -62,7 +62,6 @@ impl TableProviderFactory for StreamTableFactory {
         let config = StreamConfig::new_file(schema, location.into())
             .with_encoding(encoding)
             .with_order(cmd.order_exprs.clone())
-            .with_header(cmd.has_header)
             .with_batch_size(state.config().batch_size())
             .with_constraints(cmd.constraints.clone());
 
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index f3af31f895f9..6c2dc692dae0 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -1560,7 +1560,6 @@ impl SessionState {
         let url = url.to_string();
         let format = format.to_string();
 
-        let has_header = config.options().catalog.has_header;
         let url = Url::parse(url.as_str()).expect("Invalid default catalog location!");
         let authority = match url.host_str() {
             Some(host) => format!("{}://{}", url.scheme(), host),
@@ -1578,14 +1577,8 @@ impl SessionState {
             Some(factory) => factory,
             _ => return,
         };
-        let schema = ListingSchemaProvider::new(
-            authority,
-            path,
-            factory.clone(),
-            store,
-            format,
-            has_header,
-        );
+        let schema =
+            ListingSchemaProvider::new(authority, path, factory.clone(), store, format);
         let _ = default_catalog
             .register_schema("default", Arc::new(schema))
             .expect("Failed to register default schema");
diff --git a/datafusion/core/tests/fifo.rs b/datafusion/core/tests/fifo.rs
index 9b132f18c7a5..a63240d03d94 100644
--- a/datafusion/core/tests/fifo.rs
+++ b/datafusion/core/tests/fifo.rs
@@ -384,8 +384,8 @@ mod unix_test {
                 a2  INT NOT NULL
             )
             STORED AS CSV
-            WITH HEADER ROW
-            LOCATION '{source_display_fifo_path}'"
+            LOCATION '{source_display_fifo_path}'
+            OPTIONS ('format.has_header' 'true')"
         ))
         .await?;
 
@@ -396,8 +396,8 @@ mod unix_test {
                 a2  INT NOT NULL
             )
             STORED AS CSV
-            WITH HEADER ROW
-            LOCATION '{sink_display_fifo_path}'"
+            LOCATION '{sink_display_fifo_path}'
+            OPTIONS ('format.has_header' 'true')"
         ))
         .await?;
 
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 9b7828a777c8..00f091d77cff 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -85,8 +85,8 @@ async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
         c13 VARCHAR NOT NULL
     )
     STORED AS CSV
-    WITH HEADER ROW
-    LOCATION '{testdata}/csv/aggregate_test_100.csv'
+    LOCATION '{testdata}/csv/aggregate_test_100.csv
+    OPTIONS ('format.has_header' 'true')
     "
         ))
         .await
diff --git a/datafusion/expr/src/logical_plan/ddl.rs b/datafusion/expr/src/logical_plan/ddl.rs
index 5b1c377dc1e3..4538ff52c052 100644
--- a/datafusion/expr/src/logical_plan/ddl.rs
+++ b/datafusion/expr/src/logical_plan/ddl.rs
@@ -189,8 +189,6 @@ pub struct CreateExternalTable {
     pub location: String,
     /// The file type of physical file
     pub file_type: String,
-    /// Whether the CSV file contains a header
-    pub has_header: bool,
     /// Partition Columns
     pub table_partition_cols: Vec<String>,
     /// Option to not error if table already exists
@@ -216,7 +214,6 @@ impl Hash for CreateExternalTable {
         self.name.hash(state);
         self.location.hash(state);
         self.file_type.hash(state);
-        self.has_header.hash(state);
         self.table_partition_cols.hash(state);
         self.if_not_exists.hash(state);
         self.definition.hash(state);
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 3da326ff37fe..dcb6a983978c 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -201,19 +201,18 @@ message Constraints{
 
 message CreateExternalTableNode {
   reserved 1; // was string name
-  TableReference name = 10;
+  TableReference name = 9;
   string location = 2;
   string file_type = 3;
-  bool has_header = 4;
-  DfSchema schema = 5;
-  repeated string table_partition_cols = 6;
-  bool if_not_exists = 7;
-  string definition = 8;
-  repeated LogicalExprNodeCollection order_exprs = 11;
-  bool unbounded = 12;
-  map<string, string> options = 9;
-  Constraints constraints = 13;
-  map<string, LogicalExprNode> column_defaults = 14;
+  DfSchema schema = 4;
+  repeated string table_partition_cols = 5;
+  bool if_not_exists = 6;
+  string definition = 7;
+  repeated LogicalExprNodeCollection order_exprs = 10;
+  bool unbounded = 11;
+  map<string, string> options = 8;
+  Constraints constraints = 12;
+  map<string, LogicalExprNode> column_defaults = 13;
  }
 
 message PrepareNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index a836ed5abcc5..7284e52f3cdb 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -4676,9 +4676,6 @@ impl serde::Serialize for CreateExternalTableNode {
         if !self.file_type.is_empty() {
             len += 1;
         }
-        if self.has_header {
-            len += 1;
-        }
         if self.schema.is_some() {
             len += 1;
         }
@@ -4716,9 +4713,6 @@ impl serde::Serialize for CreateExternalTableNode {
         if !self.file_type.is_empty() {
             struct_ser.serialize_field("fileType", &self.file_type)?;
         }
-        if self.has_header {
-            struct_ser.serialize_field("hasHeader", &self.has_header)?;
-        }
         if let Some(v) = self.schema.as_ref() {
             struct_ser.serialize_field("schema", v)?;
         }
@@ -4760,8 +4754,6 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
             "location",
             "file_type",
             "fileType",
-            "has_header",
-            "hasHeader",
             "schema",
             "table_partition_cols",
             "tablePartitionCols",
@@ -4782,7 +4774,6 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
             Name,
             Location,
             FileType,
-            HasHeader,
             Schema,
             TablePartitionCols,
             IfNotExists,
@@ -4816,7 +4807,6 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                             "name" => Ok(GeneratedField::Name),
                             "location" => Ok(GeneratedField::Location),
                             "fileType" | "file_type" => Ok(GeneratedField::FileType),
-                            "hasHeader" | "has_header" => Ok(GeneratedField::HasHeader),
                             "schema" => Ok(GeneratedField::Schema),
                             "tablePartitionCols" | "table_partition_cols" => Ok(GeneratedField::TablePartitionCols),
                             "ifNotExists" | "if_not_exists" => Ok(GeneratedField::IfNotExists),
@@ -4848,7 +4838,6 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                 let mut name__ = None;
                 let mut location__ = None;
                 let mut file_type__ = None;
-                let mut has_header__ = None;
                 let mut schema__ = None;
                 let mut table_partition_cols__ = None;
                 let mut if_not_exists__ = None;
@@ -4878,12 +4867,6 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                             }
                             file_type__ = Some(map_.next_value()?);
                         }
-                        GeneratedField::HasHeader => {
-                            if has_header__.is_some() {
-                                return Err(serde::de::Error::duplicate_field("hasHeader"));
-                            }
-                            has_header__ = Some(map_.next_value()?);
-                        }
                         GeneratedField::Schema => {
                             if schema__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("schema"));
@@ -4948,7 +4931,6 @@ impl<'de> serde::Deserialize<'de> for CreateExternalTableNode {
                     name: name__,
                     location: location__.unwrap_or_default(),
                     file_type: file_type__.unwrap_or_default(),
-                    has_header: has_header__.unwrap_or_default(),
                     schema: schema__,
                     table_partition_cols: table_partition_cols__.unwrap_or_default(),
                     if_not_exists: if_not_exists__.unwrap_or_default(),
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 3b48a8050031..78a9f2bf34a7 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -320,34 +320,32 @@ pub struct Constraints {
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CreateExternalTableNode {
-    #[prost(message, optional, tag = "10")]
+    #[prost(message, optional, tag = "9")]
     pub name: ::core::option::Option<TableReference>,
     #[prost(string, tag = "2")]
     pub location: ::prost::alloc::string::String,
     #[prost(string, tag = "3")]
     pub file_type: ::prost::alloc::string::String,
-    #[prost(bool, tag = "4")]
-    pub has_header: bool,
-    #[prost(message, optional, tag = "5")]
+    #[prost(message, optional, tag = "4")]
     pub schema: ::core::option::Option<DfSchema>,
-    #[prost(string, repeated, tag = "6")]
+    #[prost(string, repeated, tag = "5")]
     pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
-    #[prost(bool, tag = "7")]
+    #[prost(bool, tag = "6")]
     pub if_not_exists: bool,
-    #[prost(string, tag = "8")]
+    #[prost(string, tag = "7")]
     pub definition: ::prost::alloc::string::String,
-    #[prost(message, repeated, tag = "11")]
+    #[prost(message, repeated, tag = "10")]
     pub order_exprs: ::prost::alloc::vec::Vec<LogicalExprNodeCollection>,
-    #[prost(bool, tag = "12")]
+    #[prost(bool, tag = "11")]
     pub unbounded: bool,
-    #[prost(map = "string, string", tag = "9")]
+    #[prost(map = "string, string", tag = "8")]
     pub options: ::std::collections::HashMap<
         ::prost::alloc::string::String,
         ::prost::alloc::string::String,
     >,
-    #[prost(message, optional, tag = "13")]
+    #[prost(message, optional, tag = "12")]
     pub constraints: ::core::option::Option<Constraints>,
-    #[prost(map = "string, message", tag = "14")]
+    #[prost(map = "string, message", tag = "13")]
     pub column_defaults: ::std::collections::HashMap<
         ::prost::alloc::string::String,
         LogicalExprNode,
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 474fc93ac104..a6352bcefc3e 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -548,7 +548,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                         )?,
                         location: create_extern_table.location.clone(),
                         file_type: create_extern_table.file_type.clone(),
-                        has_header: create_extern_table.has_header,
                         table_partition_cols: create_extern_table
                             .table_partition_cols
                             .clone(),
@@ -1318,7 +1317,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                     name,
                     location,
                     file_type,
-                    has_header,
                     schema: df_schema,
                     table_partition_cols,
                     if_not_exists,
@@ -1354,7 +1352,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                             name: Some(name.clone().into()),
                             location: location.clone(),
                             file_type: file_type.clone(),
-                            has_header: *has_header,
                             schema: Some(df_schema.try_into()?),
                             table_partition_cols: table_partition_cols.clone(),
                             if_not_exists: *if_not_exists,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 3800b672b5e2..74df7178b0f2 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -236,10 +236,10 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
               primary key(c)
             )
             STORED AS CSV
-            WITH HEADER ROW
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv';";
+            LOCATION '../core/tests/data/window_2.csv';
+            OPTIONS ('format.has_header' 'true')";
 
     let plan = ctx.state().create_logical_plan(query).await?;
 
@@ -266,10 +266,10 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
               primary key(c)
             )
             STORED AS CSV
-            WITH HEADER ROW
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv';",
+            LOCATION '../core/tests/data/window_2.csv';
+            OPTIONS ('format.has_header' 'true')",
     )
     .await?;
 
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 607276f47b73..7468527662f6 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -19,9 +19,7 @@
 
 use std::collections::{HashMap, VecDeque};
 use std::fmt;
-use std::str::FromStr;
 
-use datafusion_common::parsers::CompressionTypeVariant;
 use sqlparser::{
     ast::{
         ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query,
@@ -130,10 +128,6 @@ impl fmt::Display for CopyToStatement {
             write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?;
         }
 
-        if self.has_header {
-            write!(f, " WITH HEADER ROW")?;
-        }
-
         if !options.is_empty() {
             let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} {v}")).collect();
             write!(f, " OPTIONS ({})", opts.join(", "))?;
@@ -172,9 +166,6 @@ pub(crate) type LexOrdering = Vec<OrderByExpr>;
 /// [ IF NOT EXISTS ]
 /// <TABLE_NAME>[ (<column_definition>) ]
 /// STORED AS <file_type>
-/// [ WITH HEADER ROW ]
-/// [ DELIMITER <char> ]
-/// [ COMPRESSION TYPE <GZIP | BZIP2 | XZ | ZSTD> ]
 /// [ PARTITIONED BY (<column_definition list> | <column list>) ]
 /// [ WITH ORDER (<ordered column list>)
 /// [ OPTIONS (<key_value_list>) ]
@@ -196,8 +187,6 @@ pub struct CreateExternalTable {
     pub columns: Vec<ColumnDef>,
     /// File type (Parquet, NDJSON, CSV, etc)
     pub file_type: String,
-    /// CSV Header row?
-    pub has_header: bool,
     /// Path to file
     pub location: String,
     /// Partition Columns
@@ -424,8 +413,8 @@ impl<'a> DFParser<'a> {
                     Keyword::WITH => {
                         self.parser.expect_keyword(Keyword::HEADER)?;
                         self.parser.expect_keyword(Keyword::ROW)?;
-                        ensure_not_set(&builder.has_header, "WITH HEADER ROW")?;
-                        builder.has_header = Some(true);
+                        return parser_err!("\"WITH HEADER ROW\" clause is no longer in use. Please use the \"OPTIONS\" 
+                        clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -695,9 +684,6 @@ impl<'a> DFParser<'a> {
         struct Builder {
             file_type: Option<String>,
             location: Option<String>,
-            has_header: Option<bool>,
-            delimiter: Option<char>,
-            file_compression_type: Option<CompressionTypeVariant>,
             table_partition_cols: Option<Vec<String>>,
             order_exprs: Vec<LexOrdering>,
             options: Option<HashMap<String, String>>,
@@ -730,22 +716,18 @@ impl<'a> DFParser<'a> {
                         } else {
                             self.parser.expect_keyword(Keyword::HEADER)?;
                             self.parser.expect_keyword(Keyword::ROW)?;
-                            ensure_not_set(&builder.has_header, "WITH HEADER ROW")?;
-                            builder.has_header = Some(true);
+                            return parser_err!("\"WITH HEADER ROW\" clause is no longer in use. Please use the \"OPTIONS\" 
+                            clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
                         }
                     }
                     Keyword::DELIMITER => {
-                        ensure_not_set(&builder.delimiter, "DELIMITER")?;
-                        builder.delimiter = Some(self.parse_delimiter()?);
+                        return parser_err!("\"DELIMITER\" clause is no longer in use. Please use the \"OPTIONS\" 
+                            clause with 'format.delimiter' set appropriately, e.g., OPTIONS ('format.delimiter' ',')");
                     }
                     Keyword::COMPRESSION => {
                         self.parser.expect_keyword(Keyword::TYPE)?;
-                        ensure_not_set(
-                            &builder.file_compression_type,
-                            "COMPRESSION TYPE",
-                        )?;
-                        builder.file_compression_type =
-                            Some(self.parse_file_compression_type()?);
+                        return parser_err!("\"COMPRESSION TYPE\" clause is no longer in use. Please use the \"OPTIONS\" 
+                            clause with 'format.compression' set appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -811,7 +793,6 @@ impl<'a> DFParser<'a> {
             name: table_name.to_string(),
             columns,
             file_type: builder.file_type.unwrap(),
-            has_header: builder.has_header.unwrap_or(false),
             location: builder.location.unwrap(),
             table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]),
             order_exprs: builder.order_exprs,
@@ -832,17 +813,6 @@ impl<'a> DFParser<'a> {
         }
     }
 
-    /// Parses the set of
-    fn parse_file_compression_type(
-        &mut self,
-    ) -> Result<CompressionTypeVariant, ParserError> {
-        let token = self.parser.next_token();
-        match &token.token {
-            Token::Word(w) => CompressionTypeVariant::from_str(&w.value),
-            _ => self.expected("one of GZIP, BZIP2, XZ, ZSTD", token),
-        }
-    }
-
     /// Parses (key value) style options where the values are literal strings.
     fn parse_string_options(&mut self) -> Result<HashMap<String, String>, ParserError> {
         let mut options = HashMap::new();
@@ -892,16 +862,6 @@ impl<'a> DFParser<'a> {
         }
         Ok(options)
     }
-
-    fn parse_delimiter(&mut self) -> Result<char, ParserError> {
-        let token = self.parser.parse_literal_string()?;
-        match token.len() {
-            1 => Ok(token.chars().next().unwrap()),
-            _ => Err(ParserError::TokenizerError(
-                "Delimiter must be a single char".to_string(),
-            )),
-        }
-    }
 }
 
 #[cfg(test)]
@@ -960,7 +920,6 @@ mod tests {
             name: "t".into(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -977,7 +936,6 @@ mod tests {
             name: "t".into(),
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -995,7 +953,6 @@ mod tests {
             name: "t".into(),
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1013,16 +970,12 @@ mod tests {
             name: "t".into(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: vec!["format.delimiter".to_string()]
-                .into_iter()
-                .zip(vec!["|".to_string()])
-                .collect(),
+            options: HashMap::from([("format.delimiter".into(), "|".into())]),
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1034,7 +987,6 @@ mod tests {
             name: "t".into(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
             order_exprs: vec![],
@@ -1045,23 +997,22 @@ mod tests {
         });
         expect_parse_ok(sql, expected)?;
 
-        // positive case: it is ok for case insensitive sql stmt with `WITH HEADER ROW` tokens
+        // positive case: it is ok for case insensitive sql stmt with has_header option tokens
         let sqls = vec![
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH HEADER ROW LOCATION 'foo.csv'",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV with header row LOCATION 'foo.csv'"
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('FORMAT.HAS_HEADER' 'TRUE')",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.has_header' 'true')",
         ];
         for sql in sqls {
             let expected = Statement::CreateExternalTable(CreateExternalTable {
                 name: "t".into(),
                 columns: vec![make_column_def("c1", DataType::Int(display))],
                 file_type: "CSV".to_string(),
-                has_header: true,
                 location: "foo.csv".into(),
                 table_partition_cols: vec![],
                 order_exprs: vec![],
                 if_not_exists: false,
                 unbounded: false,
-                options: HashMap::new(),
+                options: HashMap::from([("format.has_header".into(), "true".into())]),
                 constraints: vec![],
             });
             expect_parse_ok(sql, expected)?;
@@ -1083,7 +1034,6 @@ mod tests {
                 name: "t".into(),
                 columns: vec![make_column_def("c1", DataType::Int(display))],
                 file_type: "CSV".to_string(),
-                has_header: false,
                 location: "foo.csv".into(),
                 table_partition_cols: vec![],
                 order_exprs: vec![],
@@ -1104,7 +1054,6 @@ mod tests {
             name: "t".into(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
-            has_header: false,
             location: "foo.parquet".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1121,7 +1070,6 @@ mod tests {
             name: "t".into(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
-            has_header: false,
             location: "foo.parquet".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1138,7 +1086,6 @@ mod tests {
             name: "t".into(),
             columns: vec![],
             file_type: "AVRO".to_string(),
-            has_header: false,
             location: "foo.avro".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1156,7 +1103,6 @@ mod tests {
             name: "t".into(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
-            has_header: false,
             location: "foo.parquet".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1177,7 +1123,6 @@ mod tests {
                 make_column_def("p1", DataType::Int(None)),
             ],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec!["p1".to_string()],
             order_exprs: vec![],
@@ -1205,7 +1150,6 @@ mod tests {
             name: "t".into(),
             columns: vec![],
             file_type: "X".to_string(),
-            has_header: false,
             location: "blahblah".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1223,7 +1167,6 @@ mod tests {
             name: "t".into(),
             columns: vec![],
             file_type: "X".to_string(),
-            has_header: false,
             location: "blahblah".into(),
             table_partition_cols: vec![],
             order_exprs: vec![],
@@ -1263,7 +1206,6 @@ mod tests {
                 name: "t".into(),
                 columns: vec![make_column_def("c1", DataType::Int(None))],
                 file_type: "CSV".to_string(),
-                has_header: false,
                 location: "foo.csv".into(),
                 table_partition_cols: vec![],
                 order_exprs: vec![vec![OrderByExpr {
@@ -1292,7 +1234,6 @@ mod tests {
                 make_column_def("c2", DataType::Int(display)),
             ],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![vec![
@@ -1330,7 +1271,6 @@ mod tests {
                 make_column_def("c2", DataType::Int(display)),
             ],
             file_type: "CSV".to_string(),
-            has_header: false,
             location: "foo.csv".into(),
             table_partition_cols: vec![],
             order_exprs: vec![vec![OrderByExpr {
@@ -1359,13 +1299,13 @@ mod tests {
         let sql = "
             CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
             STORED AS PARQUET
-            WITH HEADER ROW
             WITH ORDER (c1 - c2 ASC)
             PARTITIONED BY (c1)
             LOCATION 'foo.parquet'
             OPTIONS (ROW_GROUP_SIZE '1024', 'TRUNCATE' 'NO',
                      'format.compression' 'zstd',
-                     'format.delimiter' '*')
+                     'format.delimiter' '*',
+                     'format.has_header' 'true')
                      ";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: "t".into(),
@@ -1374,7 +1314,6 @@ mod tests {
                 make_column_def("c2", DataType::Float(None)),
             ],
             file_type: "PARQUET".to_string(),
-            has_header: true,
             location: "foo.parquet".into(),
             table_partition_cols: vec!["c1".into()],
             order_exprs: vec![vec![OrderByExpr {
@@ -1399,6 +1338,7 @@ mod tests {
                 ("format.delimiter".into(), "*".into()),
                 ("ROW_GROUP_SIZE".into(), "1024".into()),
                 ("TRUNCATE".into(), "NO".into()),
+                ("format.has_header".into(), "true".into()),
             ]),
             constraints: vec![],
         });
@@ -1484,7 +1424,8 @@ mod tests {
             panic!("Expected query, got {statement:?}");
         };
 
-        let sql = "COPY (SELECT 1) TO bar STORED AS CSV WITH HEADER ROW";
+        let sql =
+            "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')";
         let expected = Statement::CopyTo(CopyToStatement {
             source: CopyToSource::Query(query),
             target: "bar".to_string(),
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index ef5561335626..e6eda0b2787d 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -968,7 +968,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             name,
             columns,
             file_type,
-            has_header,
             location,
             table_partition_cols,
             if_not_exists,
@@ -1021,7 +1020,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 name,
                 location,
                 file_type,
-                has_header,
                 table_partition_cols,
                 if_not_exists,
                 definition,
diff --git a/datafusion/sqllogictest/test_files/agg_func_substitute.slt b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
index c5cd78f1259b..342d45e7fb24 100644
--- a/datafusion/sqllogictest/test_files/agg_func_substitute.slt
+++ b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
@@ -27,10 +27,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../../datafusion/core/tests/data/window_2.csv';
+LOCATION '../../datafusion/core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 query TT
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index bc677b73fb94..a8393e08630b 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -35,8 +35,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE TABLE d_table (c1 decimal(10,3), c2 varchar)
@@ -116,8 +116,8 @@ c2 INT NOT NULL,
 c3 INT NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../core/tests/data/aggregate_agg_multi_order.csv';
+LOCATION '../core/tests/data/aggregate_agg_multi_order.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test array_agg with order by multiple columns
 query ?
@@ -3846,11 +3846,11 @@ c2 INT NOT NULL,
 c3 INT NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (c1 ASC)
 WITH ORDER (c2 DESC)
 WITH ORDER (c3 ASC)
-LOCATION '../core/tests/data/convert_first_last.csv';
+LOCATION '../core/tests/data/convert_first_last.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test first to last, the result does not show difference, we need to check the conversion by `explain`
 query TT
diff --git a/datafusion/sqllogictest/test_files/avro.slt b/datafusion/sqllogictest/test_files/avro.slt
index 7b9fbe556fee..f9f55f0dc153 100644
--- a/datafusion/sqllogictest/test_files/avro.slt
+++ b/datafusion/sqllogictest/test_files/avro.slt
@@ -31,8 +31,8 @@ CREATE EXTERNAL TABLE alltypes_plain (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/alltypes_plain.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_snappy (
@@ -49,8 +49,8 @@ CREATE EXTERNAL TABLE alltypes_plain_snappy (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/alltypes_plain.snappy.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_bzip2 (
@@ -67,8 +67,8 @@ CREATE EXTERNAL TABLE alltypes_plain_bzip2 (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/alltypes_plain.bzip2.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_xz (
@@ -85,8 +85,8 @@ CREATE EXTERNAL TABLE alltypes_plain_xz (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/alltypes_plain.xz.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_zstandard (
@@ -103,34 +103,34 @@ CREATE EXTERNAL TABLE alltypes_plain_zstandard (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/alltypes_plain.zstandard.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE single_nan (
   mycol FLOAT
 )
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/single_nan.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE nested_records
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/nested_records.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE simple_enum
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/simple_enum.avro'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE simple_fixed
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/simple_fixed.avro'
+OPTIONS ('format.has_header' 'true');
 
 # test avro query
 query IT
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 9f1fc523f559..4724bc87a099 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -68,18 +68,6 @@ CREATE EXTERNAL TABLE t STORED AS CSV STORED AS PARQUET LOCATION 'foo.parquet'
 statement error DataFusion error: SQL error: ParserError\("LOCATION specified more than once"\)
 CREATE EXTERNAL TABLE t STORED AS CSV LOCATION 'foo.csv' LOCATION 'bar.csv'
 
-# Duplicate `WITH HEADER ROW` clause
-statement error DataFusion error: SQL error: ParserError\("WITH HEADER ROW specified more than once"\)
-CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER ROW WITH HEADER ROW LOCATION 'foo.csv'
-
-# Duplicate `DELIMITER` clause
-statement error DataFusion error: SQL error: ParserError\("DELIMITER specified more than once"\)
-CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV DELIMITER '-' DELIMITER '+' LOCATION 'foo.csv'
-
-# Duplicate `COMPRESSION TYPE` clause
-statement error DataFusion error: SQL error: ParserError\("COMPRESSION TYPE specified more than once"\)
-CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE BZIP2 COMPRESSION TYPE XZ COMPRESSION TYPE ZSTD COMPRESSION TYPE GZIP LOCATION 'foo.csv'
-
 # Duplicate `PARTITIONED BY` clause
 statement error DataFusion error: SQL error: ParserError\("PARTITIONED BY specified more than once"\)
 create EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV PARTITIONED BY (c1) partitioned by (c2) LOCATION 'foo.csv'
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index 1c501634fef5..5e30d6ed09ae 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -21,19 +21,19 @@ CREATE EXTERNAL TABLE csv_with_quote (
 c1 VARCHAR,
 c2 VARCHAR
 ) STORED AS CSV
-WITH HEADER ROW
 LOCATION '../core/tests/data/quote.csv'
 OPTIONS ('format.quote' '~',
-         'format.delimiter' ',');
+         'format.delimiter' ',',
+         'format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE csv_with_escape (
 c1 VARCHAR,
 c2 VARCHAR
 ) STORED AS CSV
-WITH HEADER ROW
 OPTIONS ('format.escape' '\',
-         'format.delimiter' ',')
+         'format.delimiter' ',',
+         'format.has_header' 'true')
 LOCATION '../core/tests/data/escape.csv';
 
 query TT
@@ -69,9 +69,9 @@ CREATE EXTERNAL TABLE csv_with_escape_2 (
 c1 VARCHAR,
 c2 VARCHAR
 ) STORED AS CSV
-WITH HEADER ROW
 OPTIONS ('format.escape' '"',
-        'format.delimiter' ',')
+        'format.delimiter' ',',
+        'format.has_header' 'true')
 LOCATION '../core/tests/data/escape.csv';
 
 # TODO: Validate this with better data.
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index e9c508cb27fb..8d086e1b2dd2 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -126,11 +126,11 @@ physical_plan
 
 # setup
 statement ok
-CREATE EXTERNAL TABLE balance STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/balance.csv'
+CREATE EXTERNAL TABLE balance STORED as CSV LOCATION '../core/tests/data/recursive_cte/balance.csv' OPTIONS ('format.has_header' 'true');
 
 # setup
 statement ok
-CREATE EXTERNAL TABLE growth STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/growth.csv'
+CREATE EXTERNAL TABLE growth STORED as CSV LOCATION '../core/tests/data/recursive_cte/growth.csv' OPTIONS ('format.has_header' 'true');
 
 # setup
 statement ok
@@ -407,7 +407,7 @@ FROM
 
 # setup
 statement ok
-CREATE EXTERNAL TABLE prices STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/prices.csv'
+CREATE EXTERNAL TABLE prices STORED as CSV LOCATION '../core/tests/data/recursive_cte/prices.csv' OPTIONS ('format.has_header' 'true');
 
 # CTE within window function inside nested CTE works. This test demonstrates using a nested window function to recursively iterate over a column.
 query RRII
@@ -598,11 +598,11 @@ ORDER BY
 
 # setup
 statement ok
-CREATE EXTERNAL TABLE sales STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/sales.csv'
+CREATE EXTERNAL TABLE sales STORED as CSV LOCATION '../core/tests/data/recursive_cte/sales.csv' OPTIONS ('format.has_header' 'true');
 
 # setup
 statement ok
-CREATE EXTERNAL TABLE salespersons STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/recursive_cte/salespersons.csv'
+CREATE EXTERNAL TABLE salespersons STORED as CSV LOCATION '../core/tests/data/recursive_cte/salespersons.csv' OPTIONS ('format.has_header' 'true');
 
 
 # group by works within recursive cte. This test case demonstrates rolling up a hierarchy of salespeople to their managers.
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index 26b0e5561806..a35e688479e7 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -256,7 +256,7 @@ DROP VIEW non_existent_view
 ##########
 
 statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 # create_table_as
 statement ok
@@ -455,8 +455,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TIIIIIIIIIRRT
 SELECT c1, c2, c3, c4, c5, c6, c7, c8, c9, 10, c11, c12, c13 FROM aggregate_test_100 LIMIT 1;
@@ -535,7 +535,7 @@ DROP VIEW y;
 # create_pipe_delimited_csv_table()
 
 statement ok
-CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple_pipe.csv' OPTIONS ('format.delimiter' '|');
+CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV LOCATION '../core/tests/data/aggregate_simple_pipe.csv' OPTIONS ('format.delimiter' '|', 'format.has_header' 'true');
 
 
 query RRB
@@ -581,14 +581,14 @@ statement ok
 CREATE TABLE IF NOT EXISTS table_without_values(field1 BIGINT, field2 BIGINT);
 
 statement ok
-CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv'
+CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 # Should not recreate the same EXTERNAL table
 statement error Execution error: Table 'aggregate_simple' already exists
-CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv'
+CREATE EXTERNAL TABLE aggregate_simple STORED AS CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 statement ok
-CREATE EXTERNAL TABLE IF NOT EXISTS aggregate_simple STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv'
+CREATE EXTERNAL TABLE IF NOT EXISTS aggregate_simple STORED AS CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 # create bad custom table
 statement error DataFusion error: Execution error: Unable to find factory for DELTATABLE
@@ -690,7 +690,7 @@ drop table foo;
 
 # create csv table with empty csv file
 statement ok
-CREATE EXTERNAL TABLE empty STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/data/empty.csv';
+CREATE EXTERNAL TABLE empty STORED AS CSV LOCATION '../core/tests/data/empty.csv' OPTIONS ('format.has_header' 'true');
 
 query TTI
 select column_name, data_type, ordinal_position from information_schema.columns where table_name='empty';;
@@ -742,8 +742,8 @@ DROP SCHEMA empty_schema;
 statement ok
 CREATE UNBOUNDED external table t(c1 integer, c2 integer, c3 integer)
 STORED as CSV
-WITH HEADER ROW
-LOCATION '../core/tests/data/empty.csv';
+LOCATION '../core/tests/data/empty.csv'
+OPTIONS ('format.has_header' 'true');
 
 # should see infinite_source=true in the explain
 query TT
@@ -760,8 +760,8 @@ drop table t;
 statement ok
 CREATE external table t(c1 integer, c2 integer, c3 integer)
 STORED as CSV
-WITH HEADER ROW
-LOCATION '../core/tests/data/empty.csv';
+LOCATION '../core/tests/data/empty.csv'
+OPTIONS ('format.has_header' 'true');
 
 # expect to see no infinite_source in the explain
 query TT
diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt
index 3f75e42d9304..8db28c32f13b 100644
--- a/datafusion/sqllogictest/test_files/decimal.slt
+++ b/datafusion/sqllogictest/test_files/decimal.slt
@@ -44,8 +44,8 @@ c4 BOOLEAN NOT NULL,
 c5 DECIMAL(12,7) NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../core/tests/data/decimal_data.csv';
+LOCATION '../core/tests/data/decimal_data.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 query TT
@@ -639,8 +639,8 @@ c4 BOOLEAN NOT NULL,
 c5 DECIMAL(52,7) NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../core/tests/data/decimal_data.csv';
+LOCATION '../core/tests/data/decimal_data.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 select arrow_typeof(c1), arrow_typeof(c5) from decimal256_simple limit 1;
diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt
index f94a2e453884..a15c3a109cab 100644
--- a/datafusion/sqllogictest/test_files/describe.slt
+++ b/datafusion/sqllogictest/test_files/describe.slt
@@ -24,7 +24,7 @@ statement ok
 set datafusion.catalog.information_schema = true
 
 statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 query TTT rowsort
 DESCRIBE aggregate_simple;
@@ -44,7 +44,7 @@ statement ok
 set datafusion.catalog.information_schema = false
 
 statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 query TTT rowsort
 DESCRIBE aggregate_simple;
diff --git a/datafusion/sqllogictest/test_files/distinct_on.slt b/datafusion/sqllogictest/test_files/distinct_on.slt
index 0bb931c88cbc..cdef2990fa3c 100644
--- a/datafusion/sqllogictest/test_files/distinct_on.slt
+++ b/datafusion/sqllogictest/test_files/distinct_on.slt
@@ -32,8 +32,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 # Basic example: distinct on the first column project the second one, and
 # order by the third
diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt
index ab281eac31f5..c9c96df2b6a9 100644
--- a/datafusion/sqllogictest/test_files/errors.slt
+++ b/datafusion/sqllogictest/test_files/errors.slt
@@ -34,8 +34,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 # csv_query_error
 statement error DataFusion error: Error during planning: No function matches the given name and argument types 'sin\(Utf8\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tsin\(Float64/Float32\)
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index 205306882b4a..3a4ac747ebd6 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -32,8 +32,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 explain SELECT c1 FROM aggregate_test_100 where c2 > 10
@@ -68,9 +68,9 @@ CREATE EXTERNAL TABLE aggregate_test_100_with_order (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (c1 ASC)
-LOCATION '../core/tests/data/aggregate_test_100_order_by_c1_asc.csv';
+LOCATION '../core/tests/data/aggregate_test_100_order_by_c1_asc.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 explain SELECT c1 FROM aggregate_test_100_with_order order by c1 ASC limit 10
@@ -128,8 +128,8 @@ CREATE EXTERNAL TABLE simple_explain_test (
   c INT
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../core/tests/data/example.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT a, b, c FROM simple_explain_test
@@ -156,8 +156,8 @@ CREATE UNBOUNDED EXTERNAL TABLE sink_table (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 EXPLAIN INSERT INTO sink_table SELECT * FROM aggregate_test_100 ORDER by c1
diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
index 7e7ebd8529da..e3a7db7aef3c 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -1935,8 +1935,8 @@ CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index d03b33d0c8e5..4bb553767b45 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -549,8 +549,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 # sqrt_f32_vs_f64
diff --git a/datafusion/sqllogictest/test_files/group.slt b/datafusion/sqllogictest/test_files/group.slt
index 2a28efa73a62..a6b5f9b72a53 100644
--- a/datafusion/sqllogictest/test_files/group.slt
+++ b/datafusion/sqllogictest/test_files/group.slt
@@ -32,11 +32,11 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 
 # csv_query_group_by_int_min_max
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 5a605ea58b6c..43bbf6bed643 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2042,9 +2042,9 @@ CREATE UNBOUNDED EXTERNAL TABLE annotated_data_infinite2 (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC, c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # Create a table with 2 ordered columns.
 # In the next step, we will expect to observe the removed sort execs.
@@ -2057,10 +2057,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # Expected a sort exec for b DESC
 query TT
@@ -3894,10 +3894,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table_with_pk (
   primary key(c)
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # We can use column b during selection
 # even if it is not among group by expressions
@@ -3935,10 +3935,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table_with_pk (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # We can use column b during selection
 # even if it is not among group by expressions
@@ -4377,8 +4377,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TIIII
 SELECT c1, count(distinct c2), min(distinct c2), min(c3), max(c4) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1;
@@ -4454,10 +4454,10 @@ CREATE EXTERNAL TABLE unbounded_multiple_ordered_table_with_pk (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # Query below can be executed, since c is primary key.
 query III rowsort
@@ -4538,8 +4538,8 @@ CREATE EXTERNAL TABLE timestamp_table (
 	c2 INT,
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION 'test_files/scratch/group_by/timestamp_table';
+LOCATION 'test_files/scratch/group_by/timestamp_table' 
+OPTIONS ('format.has_header' 'true');
 
 # Group By using date_trunc
 query PI rowsort
diff --git a/datafusion/sqllogictest/test_files/identifiers.slt b/datafusion/sqllogictest/test_files/identifiers.slt
index f60d60b2bfe0..755d617e7a2a 100644
--- a/datafusion/sqllogictest/test_files/identifiers.slt
+++ b/datafusion/sqllogictest/test_files/identifiers.slt
@@ -22,8 +22,8 @@ CREATE EXTERNAL TABLE case_insensitive_test (
   c INT
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../core/tests/data/example.csv'
+OPTIONS ('format.has_header' 'true');
 
 # normalized column identifiers
 query II
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index f52147142cb7..de00cf9d0547 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -571,7 +571,8 @@ DROP VIEW test.xyz
 statement ok
 CREATE EXTERNAL TABLE abc
 STORED AS CSV
-WITH HEADER ROW LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TTTT
 SHOW CREATE TABLE abc;
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index 7c9bc4abe767..126e4120e9be 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -37,8 +37,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test_insert_into
 
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index 70eb2b75a75a..dfb333d16e23 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -37,8 +37,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index 60a14f78bdf5..8de8c478fbc4 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -34,9 +34,9 @@ CREATE EXTERNAL TABLE annotated_data (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC, c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT t2.a
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index f4a86fd02709..95834c4c6ebb 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3162,9 +3162,9 @@ CREATE EXTERNAL TABLE annotated_data (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC NULLS FIRST, b ASC, c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # sort merge join should propagate ordering equivalence of the left side
 # for inner join. Hence final requirement rn1 ASC is already satisfied at
@@ -3379,10 +3379,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT LAST_VALUE(l.d ORDER BY l.a) AS amount_usd
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index 2238a4aaeaf4..2c65b1da4474 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -36,8 +36,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 # async fn csv_query_limit
 query T
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index 802323ca45ee..7ddf9d9d0522 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -20,7 +20,7 @@
 ##########
 
 statement ok
-CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV WITH HEADER ROW LOCATION '../core/tests/data/aggregate_simple.csv';
+CREATE external table aggregate_simple(c1 real, c2 double, c3 boolean) STORED as CSV LOCATION '../core/tests/data/aggregate_simple.csv' OPTIONS ('format.has_header' 'true');
 
 # Round
 query R
diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
index 0c89fb3bcdaa..d41b78dcd3f2 100644
--- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
+++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
@@ -25,10 +25,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test for substitute CAST scenario
 query TT
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 3f00bd2bb4ea..0f869fc0b419 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -36,8 +36,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test_sort_unprojected_col
 query I
@@ -422,11 +422,11 @@ CREATE EXTERNAL TABLE multiple_ordered_table (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC)
 WITH ORDER (b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT (b+a+c) AS result
@@ -511,10 +511,10 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER(c11)
 WITH ORDER(c12 DESC)
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT ATAN(c11) as atan_c11
@@ -627,8 +627,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS orders (
         o_clerk VARCHAR,
         o_shippriority INTEGER,
         o_comment VARCHAR,
-) STORED AS CSV  WITH ORDER (o_orderkey ASC) WITH HEADER ROW LOCATION '../core/tests/tpch-csv/orders.csv'
-OPTIONS ('format.delimiter' ',');
+) STORED AS CSV  WITH ORDER (o_orderkey ASC) LOCATION '../core/tests/tpch-csv/orders.csv'
+OPTIONS ('format.delimiter' ',', 'format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT o_orderkey, o_orderstatus FROM orders ORDER BY o_orderkey ASC
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index aa1681de6201..9c90a1aaaeae 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -66,8 +66,8 @@ CREATE EXTERNAL TABLE test_table (
   date_col DATE
 )
 STORED AS PARQUET
-WITH HEADER ROW
-LOCATION 'test_files/scratch/parquet/test_table';
+LOCATION 'test_files/scratch/parquet/test_table'
+OPTIONS ('format.has_header' 'true');
 
 # Basic query:
 query ITID
@@ -107,9 +107,9 @@ CREATE EXTERNAL TABLE test_table (
   date_col DATE
 )
 STORED AS PARQUET
-WITH HEADER ROW
 WITH ORDER (string_col ASC NULLS LAST, int_col ASC NULLS LAST)
-LOCATION 'test_files/scratch/parquet/test_table';
+LOCATION 'test_files/scratch/parquet/test_table'
+OPTIONS ('format.has_header' 'true');
 
 # Check output plan, expect an "output_ordering" clause in the physical_plan -> ParquetExec:
 query TT
@@ -189,8 +189,8 @@ CREATE EXTERNAL TABLE alltypes_plain (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS PARQUET
-WITH HEADER ROW
 LOCATION '../../parquet-testing/data/alltypes_plain.parquet'
+OPTIONS ('format.has_header' 'true')
 
 # Test a basic query with a CAST:
 query IT
@@ -214,8 +214,8 @@ DROP TABLE alltypes_plain;
 statement ok
 CREATE EXTERNAL TABLE test_binary
 STORED AS PARQUET
-WITH HEADER ROW
-LOCATION '../core/tests/data/test_binary.parquet';
+LOCATION '../core/tests/data/test_binary.parquet'
+OPTIONS ('format.has_header' 'true');
 
 # Check size of table:
 query I
@@ -247,8 +247,8 @@ DROP TABLE test_binary;
 statement ok
 CREATE EXTERNAL TABLE timestamp_with_tz
 STORED AS PARQUET
-WITH HEADER ROW
-LOCATION '../core/tests/data/timestamp_with_tz.parquet';
+LOCATION '../core/tests/data/timestamp_with_tz.parquet'
+OPTIONS ('format.has_header' 'true');
 
 # Check size of table:
 query I
@@ -288,8 +288,8 @@ STORED AS PARQUET;
 statement ok
 CREATE EXTERNAL TABLE listing_table
 STORED AS PARQUET
-WITH HEADER ROW
-LOCATION 'test_files/scratch/parquet/test_table/*.parquet';
+LOCATION 'test_files/scratch/parquet/test_table/*.parquet'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 set datafusion.execution.listing_table_ignore_subdirectory = true;
@@ -317,8 +317,8 @@ DROP TABLE timestamp_with_tz;
 statement ok
 CREATE EXTERNAL TABLE single_nan
 STORED AS PARQUET
-WITH HEADER ROW
-LOCATION '../../parquet-testing/data/single_nan.parquet';
+LOCATION '../../parquet-testing/data/single_nan.parquet'
+OPTIONS ('format.has_header' 'true');
 
 # Check table size:
 query I
@@ -339,8 +339,8 @@ DROP TABLE single_nan;
 statement ok
 CREATE EXTERNAL TABLE list_columns
 STORED AS PARQUET
-WITH HEADER ROW
-LOCATION '../../parquet-testing/data/list_columns.parquet';
+LOCATION '../../parquet-testing/data/list_columns.parquet'
+OPTIONS ('format.has_header' 'true');
 
 query ??
 SELECT int64_list, utf8_list FROM list_columns
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
index f933c90acc73..d14b6ca81f67 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_null.slt
@@ -66,8 +66,8 @@ CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
index b01ea73c8056..25b4924715ca 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_simple.slt
@@ -67,8 +67,8 @@ CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
index 05343de32268..e02c19016790 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_union.slt
@@ -64,8 +64,8 @@ CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query I rowsort
 SELECT * FROM (
diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
index cec51d472075..edad3747a203 100644
--- a/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
+++ b/datafusion/sqllogictest/test_files/pg_compat/pg_compat_window.slt
@@ -64,8 +64,8 @@ CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query IIIIIIIIII
 SELECT
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index 12f3ebccea3c..5483750c6a03 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -39,8 +39,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+LOCATION '../../testing/data/csv/aggregate_test_100.csv' 
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain STORED AS PARQUET LOCATION '../../parquet-testing/data/alltypes_plain.parquet';
@@ -621,7 +621,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipinstruct VARCHAR,
         l_shipmode VARCHAR,
         l_comment VARCHAR,
-) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',');
+) STORED AS CSV LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',', 'format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS part (
@@ -634,7 +634,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS part (
         p_container VARCHAR,
         p_retailprice DECIMAL(15, 2),
         p_comment VARCHAR,
-) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/part.csv' OPTIONS ('format.delimiter' ',');
+) STORED AS CSV LOCATION '../core/tests/tpch-csv/part.csv' OPTIONS ('format.delimiter' ',', 'format.has_header' 'true');
 
 query TT
 EXPLAIN SELECT l_partkey FROM
diff --git a/datafusion/sqllogictest/test_files/projection.slt b/datafusion/sqllogictest/test_files/projection.slt
index b752f5644b7f..843ab71091f5 100644
--- a/datafusion/sqllogictest/test_files/projection.slt
+++ b/datafusion/sqllogictest/test_files/projection.slt
@@ -37,8 +37,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE aggregate_simple (
@@ -47,8 +47,8 @@ CREATE EXTERNAL TABLE aggregate_simple (
   c3 BOOLEAN NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../core/tests/data/aggregate_simple.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE TABLE memory_table(a INT NOT NULL, b INT NOT NULL, c INT NOT NULL) AS VALUES
diff --git a/datafusion/sqllogictest/test_files/references.slt b/datafusion/sqllogictest/test_files/references.slt
index fd77d57c06f1..4c3ac68aebd1 100644
--- a/datafusion/sqllogictest/test_files/references.slt
+++ b/datafusion/sqllogictest/test_files/references.slt
@@ -39,8 +39,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query I
 SELECT COUNT(*) FROM aggregate_test_100;
diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt
index 3f9e6e61f1d0..a301982740c7 100644
--- a/datafusion/sqllogictest/test_files/repartition.slt
+++ b/datafusion/sqllogictest/test_files/repartition.slt
@@ -95,8 +95,8 @@ CREATE UNBOUNDED EXTERNAL TABLE sink_table (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TII
 SELECT c1, c2, c3 FROM sink_table WHERE c3 > 0 LIMIT 5;
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 6ce7d7211202..2d4ca1baf23d 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -277,8 +277,8 @@ DROP TABLE arrow_table;
 statement ok
 CREATE EXTERNAL TABLE avro_table
 STORED AS AVRO
-WITH HEADER ROW
 LOCATION '../../testing/data/avro/simple_enum.avro'
+OPTIONS ('format.has_header' 'true');
 
 
 # It would be great to see the file read as "4" groups with even sizes (offsets) eventually
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index 7fb2d55ff84a..2b1a1ef289aa 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1352,8 +1352,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 # c8 = i32; c6 = i64
 query TTT
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 24163e37dec3..d73157570d8a 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -33,8 +33,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE aggregate_simple (
@@ -43,8 +43,8 @@ CREATE EXTERNAL TABLE aggregate_simple (
   c3 BOOLEAN NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../core/tests/data/aggregate_simple.csv'
+OPTIONS ('format.has_header' 'true');
 
 ##########
 ## SELECT Tests
@@ -1067,9 +1067,9 @@ CREATE EXTERNAL TABLE annotated_data_finite2 (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC, c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test_source_projection
 
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 069404d6129d..ba4d644345d4 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -66,7 +66,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS customer (
         c_acctbal DECIMAL(15, 2),
         c_mktsegment VARCHAR,
         c_comment VARCHAR,
-) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/customer.csv' OPTIONS ('format.delimiter' ',');
+) STORED AS CSV LOCATION '../core/tests/tpch-csv/customer.csv' OPTIONS ('format.delimiter' ',', 'format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS orders (
@@ -79,7 +79,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS orders (
         o_clerk VARCHAR,
         o_shippriority INTEGER,
         o_comment VARCHAR,
-) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/orders.csv' OPTIONS ('format.delimiter' ',');
+) STORED AS CSV LOCATION '../core/tests/tpch-csv/orders.csv' OPTIONS ('format.delimiter' ',', 'format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
@@ -99,7 +99,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipinstruct VARCHAR,
         l_shipmode VARCHAR,
         l_comment VARCHAR,
-) STORED AS CSV WITH HEADER ROW LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',');
+) STORED AS CSV LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',', format.has_header' 'true');
 
 # in_subquery_to_join_with_correlated_outer_filter
 query ITI rowsort
diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt
index 19412defcd40..616794f84918 100644
--- a/datafusion/sqllogictest/test_files/topk.slt
+++ b/datafusion/sqllogictest/test_files/topk.slt
@@ -69,8 +69,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 explain select * from aggregate_test_100 ORDER BY c13 desc limit 5;
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index c168890b7b00..36f024961875 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -105,8 +105,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query I
 select COUNT(*) from (
@@ -475,9 +475,9 @@ CREATE EXTERNAL TABLE t1 (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (c1 ASC)
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE t2 (
@@ -496,9 +496,9 @@ CREATE EXTERNAL TABLE t2 (
         c13 VARCHAR NOT NULL
     )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (c1a ASC)
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 query TT
 explain
diff --git a/datafusion/sqllogictest/test_files/wildcard.slt b/datafusion/sqllogictest/test_files/wildcard.slt
index f83e84804a37..9285bdbf2306 100644
--- a/datafusion/sqllogictest/test_files/wildcard.slt
+++ b/datafusion/sqllogictest/test_files/wildcard.slt
@@ -40,8 +40,8 @@ CREATE EXTERNAL TABLE aggregate_simple (
   c3  BOOLEAN NOT NULL,
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../core/tests/data/aggregate_simple.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 ##########
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index af09e644c9bf..be1517aa75c1 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -32,8 +32,8 @@ CREATE EXTERNAL TABLE aggregate_test_100 (
   c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 
 statement ok
 CREATE EXTERNAL TABLE null_cases(
@@ -42,8 +42,8 @@ CREATE EXTERNAL TABLE null_cases(
   c3 BIGINT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '../core/tests/data/null_cases.csv';
+LOCATION '../core/tests/data/null_cases.csv'
+OPTIONS ('format.has_header' 'true');
 
 ### This is the same table as
 ### execute_with_partition with 4 partitions
@@ -2488,10 +2488,9 @@ CREATE EXTERNAL TABLE annotated_data_finite (
   desc_col INTEGER,
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (ts ASC)
 LOCATION '../core/tests/data/window_1.csv'
-;
+OPTIONS ('format.has_header' 'true');
 
 # 100 rows. Columns in the table are ts, inc_col, desc_col.
 # Source is CsvExec which is ordered by ts column.
@@ -2503,9 +2502,9 @@ CREATE UNBOUNDED EXTERNAL TABLE annotated_data_infinite (
   desc_col INTEGER,
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (ts ASC)
-LOCATION '../core/tests/data/window_1.csv';
+LOCATION '../core/tests/data/window_1.csv'
+OPTIONS ('format.has_header' 'true');
 
 # test_source_sorted_aggregate
 
@@ -2910,9 +2909,9 @@ CREATE EXTERNAL TABLE annotated_data_finite2 (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC, c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # Columns in the table are a,b,c,d. Source is CsvExec which is ordered by
 # a,b,c column. Column a has cardinality 2, column b has cardinality 4.
@@ -2926,9 +2925,9 @@ CREATE UNBOUNDED EXTERNAL TABLE annotated_data_infinite2 (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC, c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 
 # test_infinite_source_partition_by
@@ -3402,10 +3401,10 @@ CREATE EXTERNAL TABLE multiple_ordered_table (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # Since column b is constant after filter b=0,
 # There should be no SortExec(b ASC) in the plan
@@ -3455,10 +3454,10 @@ CREATE UNBOUNDED EXTERNAL TABLE multiple_ordered_table_inf (
   d INTEGER
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (a ASC, b ASC)
 WITH ORDER (c ASC)
-LOCATION '../core/tests/data/window_2.csv';
+LOCATION '../core/tests/data/window_2.csv'
+OPTIONS ('format.has_header' 'true');
 
 # All of the window execs in the physical plan should work in the
 # sorted mode.
diff --git a/docs/source/user-guide/cli/datasources.md b/docs/source/user-guide/cli/datasources.md
index c2c00b633479..8b254a896f90 100644
--- a/docs/source/user-guide/cli/datasources.md
+++ b/docs/source/user-guide/cli/datasources.md
@@ -166,8 +166,8 @@ Register a single file csv datasource with a header row.
 ```sql
 CREATE EXTERNAL TABLE test
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_test_100.csv';
+LOCATION '/path/to/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 ```
 
 Register a single file csv datasource with explicitly defined schema.
diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md
index 3d8b632f6ec2..41980851df1e 100644
--- a/docs/source/user-guide/sql/ddl.md
+++ b/docs/source/user-guide/sql/ddl.md
@@ -60,9 +60,6 @@ CREATE [UNBOUNDED] EXTERNAL TABLE
 [ IF NOT EXISTS ]
 <TABLE_NAME>[ (<column_definition>) ]
 STORED AS <file_type>
-[ WITH HEADER ROW ]
-[ DELIMITER <char> ]
-[ COMPRESSION TYPE <GZIP | BZIP2 | XZ | ZSTD> ]
 [ PARTITIONED BY (<column list>) ]
 [ WITH ORDER (<ordered column list>) ]
 [ OPTIONS (<key_value_list>) ]
@@ -100,8 +97,8 @@ scanning a subset of the file.
 ```sql
 CREATE EXTERNAL TABLE test
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_simple.csv';
+LOCATION '/path/to/aggregate_simple.csv'
+OPTIONS ('format.has_header' 'true');
 ```
 
 It is also possible to use compressed files, such as `.csv.gz`:
@@ -110,8 +107,8 @@ It is also possible to use compressed files, such as `.csv.gz`:
 CREATE EXTERNAL TABLE test
 STORED AS CSV
 COMPRESSION TYPE GZIP
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_simple.csv.gz';
+LOCATION '/path/to/aggregate_simple.csv.gz'
+OPTIONS ('format.has_header' 'true');
 ```
 
 It is also possible to specify the schema manually.
@@ -133,8 +130,8 @@ CREATE EXTERNAL TABLE test (
     c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/aggregate_test_100.csv';
+LOCATION '/path/to/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 ```
 
 It is also possible to specify a directory that contains a partitioned
@@ -143,8 +140,8 @@ table (multiple files with the same schema)
 ```sql
 CREATE EXTERNAL TABLE test
 STORED AS CSV
-WITH HEADER ROW
-LOCATION '/path/to/directory/of/files';
+LOCATION '/path/to/directory/of/files'
+OPTIONS ('format.has_header' 'true');
 ```
 
 With `CREATE UNBOUNDED EXTERNAL TABLE` SQL statement. We can create unbounded data sources such as following:
@@ -181,9 +178,9 @@ CREATE EXTERNAL TABLE test (
     c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-WITH HEADER ROW
 WITH ORDER (c2 ASC, c5 + c8 DESC NULL FIRST)
-LOCATION '/path/to/aggregate_test_100.csv';
+LOCATION '/path/to/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
 ```
 
 Where `WITH ORDER` clause specifies the sort order:
diff --git a/docs/source/user-guide/sql/write_options.md b/docs/source/user-guide/sql/write_options.md
index 7631525f13e5..c496d8d48a02 100644
--- a/docs/source/user-guide/sql/write_options.md
+++ b/docs/source/user-guide/sql/write_options.md
@@ -38,11 +38,11 @@ CREATE EXTERNAL TABLE
   my_table(a bigint, b bigint)
   STORED AS csv
   COMPRESSION TYPE gzip
-  WITH HEADER ROW
   DELIMITER ';'
   LOCATION '/test/location/my_csv_table/'
   OPTIONS(
-    NULL_VALUE 'NAN'
+    NULL_VALUE 'NAN',
+    'format.has_header' 'true'
   )
 ```
 

From 4873d61d54800290b1523f89b421e400a91c11d5 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Thu, 9 May 2024 12:58:15 +0300
Subject: [PATCH 06/15] Review Part 1

---
 datafusion-cli/src/helper.rs                  | 17 +++++---------
 datafusion/common/src/config.rs               | 23 ++++++++++---------
 .../core/src/datasource/file_format/csv.rs    | 12 +++++-----
 3 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 28cba06723aa..f93aaec4218d 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -20,25 +20,20 @@
 
 use std::borrow::Cow;
 
+use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};
+
 use datafusion::common::sql_datafusion_err;
 use datafusion::error::DataFusionError;
 use datafusion::sql::parser::{DFParser, Statement};
 use datafusion::sql::sqlparser::dialect::dialect_from_str;
 use datafusion::sql::sqlparser::parser::ParserError;
-use rustyline::completion::Completer;
-use rustyline::completion::FilenameCompleter;
-use rustyline::completion::Pair;
+
+use rustyline::completion::{Completer, FilenameCompleter, Pair};
 use rustyline::error::ReadlineError;
 use rustyline::highlight::Highlighter;
 use rustyline::hint::Hinter;
-use rustyline::validate::ValidationContext;
-use rustyline::validate::ValidationResult;
-use rustyline::validate::Validator;
-use rustyline::Context;
-use rustyline::Helper;
-use rustyline::Result;
-
-use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};
+use rustyline::validate::{ValidationContext, ValidationResult, Validator};
+use rustyline::{Context, Helper, Result};
 
 pub struct CliHelper {
     completer: FilenameCompleter,
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 49d31257c8ba..a1445dfb2f40 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1564,20 +1564,21 @@ config_namespace_with_hashmap! {
 config_namespace! {
     /// Options controlling CSV format
     pub struct CsvOptions {
-        /// If the first line of the CSV is column names.
-        /// If not specified, uses default from `CREATE TABLE` command, if any.
-        pub has_header: Option<bool>, default =None
+        /// Specifies whether there is a CSV header (i.e. the first line
+        /// consists of is column names). If not specified, uses default from
+        /// the `CREATE TABLE` command, if any.
+        pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
         pub escape: Option<u8>, default = None
         pub compression: CompressionTypeVariant, default = CompressionTypeVariant::UNCOMPRESSED
         pub schema_infer_max_rec: usize, default = 100
-        pub date_format: Option<String>,  default = None
-        pub datetime_format: Option<String>,  default = None
-        pub timestamp_format: Option<String>,  default = None
-        pub timestamp_tz_format: Option<String>,  default = None
-        pub time_format: Option<String>,  default = None
-        pub null_value: Option<String>,  default = None
+        pub date_format: Option<String>, default = None
+        pub datetime_format: Option<String>, default = None
+        pub timestamp_format: Option<String>, default = None
+        pub timestamp_tz_format: Option<String>, default = None
+        pub time_format: Option<String>, default = None
+        pub null_value: Option<String>, default = None
     }
 }
 
@@ -1606,8 +1607,8 @@ impl CsvOptions {
         self
     }
 
-    /// True if the first line is a header. If the condition of having header
-    /// is not set by the format options, session state decides it.
+    /// Returns true if the first line is a header. If format options does not
+    /// specify whether there is a header, consults the configuration.
     pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
         self.has_header.unwrap_or(config_opt.catalog.has_header)
     }
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index ab2582b4e820..3e5c7af1f82e 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -32,8 +32,9 @@ use crate::datasource::physical_plan::{
 use crate::error::Result;
 use crate::execution::context::SessionState;
 use crate::physical_plan::insert::{DataSink, DataSinkExec};
-use crate::physical_plan::{DisplayAs, DisplayFormatType, Statistics};
-use crate::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
+use crate::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, Statistics,
+};
 
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
@@ -242,8 +243,8 @@ impl FileFormat for CsvFormat {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let exec = CsvExec::new(
             conf,
-            // If the condition of having header is not set by
-            // the format options, session state decides it.
+            // If format options does not specify whether there is a header,
+            // we consult configuration options.
             self.options.has_header(state.config_options()),
             self.options.delimiter,
             self.options.quote,
@@ -819,8 +820,7 @@ mod tests {
         has_header: bool,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let root = format!("{}/csv", crate::test_util::arrow_test_data());
-        let mut format = CsvFormat::default();
-        format = format.with_has_header(has_header);
+        let format = CsvFormat::default().with_has_header(has_header);
         scan_format(state, &format, &root, file_name, projection, limit).await
     }
 

From f0f6f09dcb966f97c50f54d62757932cd8b75177 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Thu, 9 May 2024 13:12:52 +0300
Subject: [PATCH 07/15] .

---
 datafusion/core/src/datasource/listing_table_factory.rs | 3 ++-
 datafusion/sqllogictest/test_files/parquet.slt          | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 1d33944330da..987b9e12a424 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -218,6 +218,7 @@ mod tests {
 
         let mut options = HashMap::new();
         options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
+        options.insert("format.has_header".into(), "true".into());
         let cmd = CreateExternalTable {
             name,
             location: csv_file.path().to_str().unwrap().to_string(),
@@ -228,7 +229,7 @@ mod tests {
             definition: None,
             order_exprs: vec![],
             unbounded: false,
-            options: HashMap::from([("format.has_header".into(), "true".into())]),
+            options,
             constraints: Constraints::empty(),
             column_defaults: HashMap::new(),
         };
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 9c90a1aaaeae..25463eb36273 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -66,8 +66,7 @@ CREATE EXTERNAL TABLE test_table (
   date_col DATE
 )
 STORED AS PARQUET
-LOCATION 'test_files/scratch/parquet/test_table'
-OPTIONS ('format.has_header' 'true');
+LOCATION 'test_files/scratch/parquet/test_table';
 
 # Basic query:
 query ITID

From 9d816017f2c11d0197c35e4d8a98c249840a6f96 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Thu, 9 May 2024 14:19:58 +0300
Subject: [PATCH 08/15] Fix failing tests

---
 datafusion/common/src/config.rs               |   6 +-
 .../core/src/datasource/file_format/csv.rs    |  15 ++-
 datafusion/core/src/datasource/stream.rs      |  13 ++-
 datafusion/core/tests/sql/mod.rs              |   2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   4 +-
 datafusion/sql/src/parser.rs                  | 105 ++++--------------
 datafusion/sql/src/statement.rs               |  31 +-----
 .../test_files/create_external_table.slt      |   8 --
 .../test_files/repartition_scan.slt           |   1 -
 9 files changed, 49 insertions(+), 136 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index a1445dfb2f40..584e69cd8591 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1566,7 +1566,7 @@ config_namespace! {
     pub struct CsvOptions {
         /// Specifies whether there is a CSV header (i.e. the first line
         /// consists of is column names). If not specified, uses default from
-        /// the `CREATE TABLE` command, if any.
+        /// the session state, if any.
         pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
@@ -1609,8 +1609,8 @@ impl CsvOptions {
 
     /// Returns true if the first line is a header. If format options does not
     /// specify whether there is a header, consults the configuration.
-    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
-        self.has_header.unwrap_or(config_opt.catalog.has_header)
+    pub fn has_header(&self) -> Option<bool> {
+        self.has_header
     }
 
     /// The character separating values within a row.
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 3e5c7af1f82e..bfd0975a52a9 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -40,7 +40,7 @@ use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use arrow::datatypes::SchemaRef;
 use arrow::datatypes::{DataType, Field, Fields, Schema};
-use datafusion_common::config::{ConfigOptions, CsvOptions};
+use datafusion_common::config::CsvOptions;
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
 use datafusion_execution::TaskContext;
@@ -142,8 +142,8 @@ impl CsvFormat {
     }
 
     /// True if the first line is a header.
-    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
-        self.options.has_header(config_opt)
+    pub fn has_header(&self) -> Option<bool> {
+        self.options.has_header
     }
 
     /// The character separating values within a row.
@@ -245,7 +245,9 @@ impl FileFormat for CsvFormat {
             conf,
             // If format options does not specify whether there is a header,
             // we consult configuration options.
-            self.options.has_header(state.config_options()),
+            self.options
+                .has_header
+                .unwrap_or(state.config_options().catalog.has_header),
             self.options.delimiter,
             self.options.quote,
             self.options.escape,
@@ -303,7 +305,10 @@ impl CsvFormat {
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
                 .with_header(
-                    self.options.has_header(state.config_options()) && first_chunk,
+                    self.options
+                        .has_header
+                        .unwrap_or(state.config_options().catalog.has_header)
+                        && first_chunk,
                 )
                 .with_delimiter(self.options.delimiter);
 
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 5e485f42b516..41296c1b8d23 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -30,7 +30,7 @@ use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use futures::StreamExt;
 
-use datafusion_common::{plan_err, Constraints, DataFusionError, Result};
+use datafusion_common::{config_err, plan_err, Constraints, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
@@ -58,11 +58,22 @@ impl TableProviderFactory for StreamTableFactory {
         let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into());
         let location = cmd.location.clone();
         let encoding = cmd.file_type.parse()?;
+        let header = if let Ok(opt) = cmd
+            .options
+            .get("format.has_header")
+            .map(|has_header| bool::from_str(has_header))
+            .transpose()
+        {
+            opt.unwrap_or(false)
+        } else {
+            return config_err!("format.has_header can either be true or false");
+        };
 
         let config = StreamConfig::new_file(schema, location.into())
             .with_encoding(encoding)
             .with_order(cmd.order_exprs.clone())
             .with_batch_size(state.config().batch_size())
+            .with_header(header)
             .with_constraints(cmd.constraints.clone());
 
         Ok(Arc::new(StreamTable(Arc::new(config))))
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 00f091d77cff..995ce35c5bc2 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -85,7 +85,7 @@ async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
         c13 VARCHAR NOT NULL
     )
     STORED AS CSV
-    LOCATION '{testdata}/csv/aggregate_test_100.csv
+    LOCATION '{testdata}/csv/aggregate_test_100.csv'
     OPTIONS ('format.has_header' 'true')
     "
         ))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 74df7178b0f2..63e9177ffaf8 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -238,7 +238,7 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
             STORED AS CSV
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv';
+            LOCATION '../core/tests/data/window_2.csv'
             OPTIONS ('format.has_header' 'true')";
 
     let plan = ctx.state().create_logical_plan(query).await?;
@@ -268,7 +268,7 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
             STORED AS CSV
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv';
+            LOCATION '../core/tests/data/window_2.csv'
             OPTIONS ('format.has_header' 'true')",
     )
     .await?;
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 7468527662f6..d3ae7a13ff3d 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -101,12 +101,10 @@ pub struct CopyToStatement {
     pub target: String,
     /// Partition keys
     pub partitioned_by: Vec<String>,
-    /// Indicates whether there is a header row (e.g. CSV)
-    pub has_header: bool,
     /// File type (Parquet, NDJSON, CSV etc.)
     pub stored_as: Option<String>,
     /// Target specific options
-    pub options: Vec<(String, Value)>,
+    pub options: HashMap<String, String>,
 }
 
 impl fmt::Display for CopyToStatement {
@@ -129,7 +127,10 @@ impl fmt::Display for CopyToStatement {
         }
 
         if !options.is_empty() {
-            let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} {v}")).collect();
+            let opts: Vec<_> = options
+                .iter()
+                .map(|(k, v)| format!("'{k}' '{v}'"))
+                .collect();
             write!(f, " OPTIONS ({})", opts.join(", "))?;
         }
 
@@ -386,8 +387,7 @@ impl<'a> DFParser<'a> {
             stored_as: Option<String>,
             target: Option<String>,
             partitioned_by: Option<Vec<String>>,
-            has_header: Option<bool>,
-            options: Option<Vec<(String, Value)>>,
+            options: Option<HashMap<String, String>>,
         }
 
         let mut builder = Builder::default();
@@ -423,7 +423,7 @@ impl<'a> DFParser<'a> {
                     }
                     Keyword::OPTIONS => {
                         ensure_not_set(&builder.options, "OPTIONS")?;
-                        builder.options = Some(self.parse_value_options()?);
+                        builder.options = Some(self.parse_string_options()?);
                     }
                     _ => {
                         unreachable!()
@@ -451,9 +451,8 @@ impl<'a> DFParser<'a> {
             source,
             target,
             partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
-            has_header: builder.has_header.unwrap_or(false),
             stored_as: builder.stored_as,
-            options: builder.options.unwrap_or(vec![]),
+            options: builder.options.unwrap_or(HashMap::new()),
         }))
     }
 
@@ -835,33 +834,6 @@ impl<'a> DFParser<'a> {
         }
         Ok(options)
     }
-
-    /// Parses (key value) style options into a map of String --> [`Value`].
-    ///
-    /// Unlike [`Self::parse_string_options`], this method supports
-    /// keywords as key names as well as multiple value types such as
-    /// Numbers as well as Strings.
-    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
-        let mut options = vec![];
-        self.parser.expect_token(&Token::LParen)?;
-
-        loop {
-            let key = self.parse_option_key()?;
-            let value = self.parse_option_value()?;
-            options.push((key, value));
-            let comma = self.parser.consume_token(&Token::Comma);
-            if self.parser.consume_token(&Token::RParen) {
-                // allow a trailing comma, even though it's not in standard
-                break;
-            } else if !comma {
-                return self.expected(
-                    "',' or ')' after option definition",
-                    self.parser.peek_token(),
-                );
-            }
-        }
-        Ok(options)
-    }
 }
 
 #[cfg(test)]
@@ -997,27 +969,6 @@ mod tests {
         });
         expect_parse_ok(sql, expected)?;
 
-        // positive case: it is ok for case insensitive sql stmt with has_header option tokens
-        let sqls = vec![
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('FORMAT.HAS_HEADER' 'TRUE')",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.has_header' 'true')",
-        ];
-        for sql in sqls {
-            let expected = Statement::CreateExternalTable(CreateExternalTable {
-                name: "t".into(),
-                columns: vec![make_column_def("c1", DataType::Int(display))],
-                file_type: "CSV".to_string(),
-                location: "foo.csv".into(),
-                table_partition_cols: vec![],
-                order_exprs: vec![],
-                if_not_exists: false,
-                unbounded: false,
-                options: HashMap::from([("format.has_header".into(), "true".into())]),
-                constraints: vec![],
-            });
-            expect_parse_ok(sql, expected)?;
-        }
-
         // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
         let sqls = vec![
              ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
@@ -1357,9 +1308,8 @@ mod tests {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec![],
-            has_header: false,
             stored_as: Some("CSV".to_owned()),
-            options: vec![],
+            options: HashMap::new(),
         });
 
         assert_eq!(verified_stmt(sql), expected);
@@ -1393,9 +1343,8 @@ mod tests {
                 source: object_name("foo"),
                 target: "bar".to_string(),
                 partitioned_by: vec![],
-                has_header: false,
                 stored_as: Some("PARQUET".to_owned()),
-                options: vec![],
+                options: HashMap::new(),
             });
             let expected = Statement::Explain(ExplainStatement {
                 analyze,
@@ -1430,9 +1379,8 @@ mod tests {
             source: CopyToSource::Query(query),
             target: "bar".to_string(),
             partitioned_by: vec![],
-            has_header: true,
             stored_as: Some("CSV".to_owned()),
-            options: vec![],
+            options: HashMap::from([("format.has_header".into(), "true".into())]),
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1445,12 +1393,8 @@ mod tests {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec![],
-            has_header: false,
             stored_as: Some("CSV".to_owned()),
-            options: vec![(
-                "row_group_size".to_string(),
-                Value::Number("55".to_string(), false),
-            )],
+            options: HashMap::from([("row_group_size".into(), "55".into())]),
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1458,17 +1402,13 @@ mod tests {
 
     #[test]
     fn copy_to_partitioned_by() -> Result<(), ParserError> {
-        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS (row_group_size 55)";
+        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec!["a".to_string()],
-            has_header: false,
             stored_as: Some("CSV".to_owned()),
-            options: vec![(
-                "row_group_size".to_string(),
-                Value::Number("55".to_string(), false),
-            )],
+            options: HashMap::from([("row_group_size".to_string(), "55".into())]),
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1478,18 +1418,12 @@ mod tests {
     fn copy_to_multi_options() -> Result<(), ParserError> {
         // order of options is preserved
         let sql =
-            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy)";
+            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' '55', 'format.compression' 'snappy')";
 
-        let expected_options = vec![
-            (
-                "format.row_group_size".to_string(),
-                Value::Number("55".to_string(), false),
-            ),
-            (
-                "format.compression".to_string(),
-                Value::UnQuotedString("snappy".to_string()),
-            ),
-        ];
+        let expected_options = HashMap::from([
+            ("format.row_group_size".to_string(), "55".into()),
+            ("format.compression".to_string(), "snappy".into()),
+        ]);
 
         let mut statements = DFParser::parse_sql(sql).unwrap();
         assert_eq!(statements.len(), 1);
@@ -1527,6 +1461,7 @@ mod tests {
     /// `canonical` sql string
     fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
         let mut statements = DFParser::parse_sql(sql).unwrap();
+        println!("{:?}", statements[0]);
         assert_eq!(statements.len(), 1);
 
         if sql != canonical {
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index e6eda0b2787d..e5eef838b301 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -849,36 +849,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         };
 
-        let mut options = HashMap::new();
-        for (key, value) in statement.options {
-            let value_string = match value {
-                Value::SingleQuotedString(s) => s.to_string(),
-                Value::DollarQuotedString(s) => s.to_string(),
-                Value::UnQuotedString(s) => s.to_string(),
-                Value::Number(_, _) | Value::Boolean(_) => value.to_string(),
-                Value::DoubleQuotedString(_)
-                | Value::EscapedStringLiteral(_)
-                | Value::NationalStringLiteral(_)
-                | Value::SingleQuotedByteStringLiteral(_)
-                | Value::DoubleQuotedByteStringLiteral(_)
-                | Value::RawStringLiteral(_)
-                | Value::HexStringLiteral(_)
-                | Value::Null
-                | Value::Placeholder(_) => {
-                    return plan_err!("Unsupported Value in COPY statement {}", value);
-                }
-            };
-            if !(&key.contains('.')) {
-                // If config does not belong to any namespace, assume it is
-                // a format option and apply the format prefix for backwards
-                // compatibility.
-
-                let renamed_key = format!("format.{}", key);
-                options.insert(renamed_key.to_lowercase(), value_string.to_lowercase());
-            } else {
-                options.insert(key.to_lowercase(), value_string.to_lowercase());
-            }
-        }
+        let options = statement.options;
 
         let file_type = if let Some(file_type) = statement.stored_as {
             FileType::from_str(&file_type).map_err(|_| {
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 4724bc87a099..9068c77ab422 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -52,14 +52,6 @@ CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER LOCATION 'abc'
 statement error DataFusion error: SQL error: ParserError\("Expected BY, found: LOCATION"\)
 CREATE EXTERNAL TABLE t STORED AS CSV PARTITIONED LOCATION 'abc'
 
-# Missing `TYPE` in COMPRESSION clause
-statement error DataFusion error: SQL error: ParserError\("Expected TYPE, found: LOCATION"\)
-CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION LOCATION 'abc'
-
-# Invalid compression type
-statement error DataFusion error: SQL error: ParserError\("Unsupported file compression type ZZZ"\)
-CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE ZZZ LOCATION 'blahblah'
-
 # Duplicate `STORED AS` clause
 statement error DataFusion error: SQL error: ParserError\("STORED AS specified more than once"\)
 CREATE EXTERNAL TABLE t STORED AS CSV STORED AS PARQUET LOCATION 'foo.parquet'
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 2d4ca1baf23d..6484d437e179 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -278,7 +278,6 @@ statement ok
 CREATE EXTERNAL TABLE avro_table
 STORED AS AVRO
 LOCATION '../../testing/data/avro/simple_enum.avro'
-OPTIONS ('format.has_header' 'true');
 
 
 # It would be great to see the file read as "4" groups with even sizes (offsets) eventually

From bb61cc182affe386ecb047aeaaef35751d7a83c5 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Thu, 9 May 2024 15:33:39 +0300
Subject: [PATCH 09/15] Revert "Fix failing tests"

This reverts commit 9d816017f2c11d0197c35e4d8a98c249840a6f96.
---
 datafusion/common/src/config.rs               |   6 +-
 .../core/src/datasource/file_format/csv.rs    |  15 +--
 datafusion/core/src/datasource/stream.rs      |  13 +--
 datafusion/core/tests/sql/mod.rs              |   2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   4 +-
 datafusion/sql/src/parser.rs                  | 105 ++++++++++++++----
 datafusion/sql/src/statement.rs               |  31 +++++-
 .../test_files/create_external_table.slt      |   8 ++
 .../test_files/repartition_scan.slt           |   1 +
 9 files changed, 136 insertions(+), 49 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 584e69cd8591..a1445dfb2f40 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1566,7 +1566,7 @@ config_namespace! {
     pub struct CsvOptions {
         /// Specifies whether there is a CSV header (i.e. the first line
         /// consists of is column names). If not specified, uses default from
-        /// the session state, if any.
+        /// the `CREATE TABLE` command, if any.
         pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
@@ -1609,8 +1609,8 @@ impl CsvOptions {
 
     /// Returns true if the first line is a header. If format options does not
     /// specify whether there is a header, consults the configuration.
-    pub fn has_header(&self) -> Option<bool> {
-        self.has_header
+    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
+        self.has_header.unwrap_or(config_opt.catalog.has_header)
     }
 
     /// The character separating values within a row.
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index bfd0975a52a9..3e5c7af1f82e 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -40,7 +40,7 @@ use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use arrow::datatypes::SchemaRef;
 use arrow::datatypes::{DataType, Field, Fields, Schema};
-use datafusion_common::config::CsvOptions;
+use datafusion_common::config::{ConfigOptions, CsvOptions};
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
 use datafusion_execution::TaskContext;
@@ -142,8 +142,8 @@ impl CsvFormat {
     }
 
     /// True if the first line is a header.
-    pub fn has_header(&self) -> Option<bool> {
-        self.options.has_header
+    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
+        self.options.has_header(config_opt)
     }
 
     /// The character separating values within a row.
@@ -245,9 +245,7 @@ impl FileFormat for CsvFormat {
             conf,
             // If format options does not specify whether there is a header,
             // we consult configuration options.
-            self.options
-                .has_header
-                .unwrap_or(state.config_options().catalog.has_header),
+            self.options.has_header(state.config_options()),
             self.options.delimiter,
             self.options.quote,
             self.options.escape,
@@ -305,10 +303,7 @@ impl CsvFormat {
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
                 .with_header(
-                    self.options
-                        .has_header
-                        .unwrap_or(state.config_options().catalog.has_header)
-                        && first_chunk,
+                    self.options.has_header(state.config_options()) && first_chunk,
                 )
                 .with_delimiter(self.options.delimiter);
 
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 41296c1b8d23..5e485f42b516 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -30,7 +30,7 @@ use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use futures::StreamExt;
 
-use datafusion_common::{config_err, plan_err, Constraints, DataFusionError, Result};
+use datafusion_common::{plan_err, Constraints, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
@@ -58,22 +58,11 @@ impl TableProviderFactory for StreamTableFactory {
         let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into());
         let location = cmd.location.clone();
         let encoding = cmd.file_type.parse()?;
-        let header = if let Ok(opt) = cmd
-            .options
-            .get("format.has_header")
-            .map(|has_header| bool::from_str(has_header))
-            .transpose()
-        {
-            opt.unwrap_or(false)
-        } else {
-            return config_err!("format.has_header can either be true or false");
-        };
 
         let config = StreamConfig::new_file(schema, location.into())
             .with_encoding(encoding)
             .with_order(cmd.order_exprs.clone())
             .with_batch_size(state.config().batch_size())
-            .with_header(header)
             .with_constraints(cmd.constraints.clone());
 
         Ok(Arc::new(StreamTable(Arc::new(config))))
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 995ce35c5bc2..00f091d77cff 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -85,7 +85,7 @@ async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
         c13 VARCHAR NOT NULL
     )
     STORED AS CSV
-    LOCATION '{testdata}/csv/aggregate_test_100.csv'
+    LOCATION '{testdata}/csv/aggregate_test_100.csv
     OPTIONS ('format.has_header' 'true')
     "
         ))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 63e9177ffaf8..74df7178b0f2 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -238,7 +238,7 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
             STORED AS CSV
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv'
+            LOCATION '../core/tests/data/window_2.csv';
             OPTIONS ('format.has_header' 'true')";
 
     let plan = ctx.state().create_logical_plan(query).await?;
@@ -268,7 +268,7 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
             STORED AS CSV
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv'
+            LOCATION '../core/tests/data/window_2.csv';
             OPTIONS ('format.has_header' 'true')",
     )
     .await?;
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index d3ae7a13ff3d..7468527662f6 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -101,10 +101,12 @@ pub struct CopyToStatement {
     pub target: String,
     /// Partition keys
     pub partitioned_by: Vec<String>,
+    /// Indicates whether there is a header row (e.g. CSV)
+    pub has_header: bool,
     /// File type (Parquet, NDJSON, CSV etc.)
     pub stored_as: Option<String>,
     /// Target specific options
-    pub options: HashMap<String, String>,
+    pub options: Vec<(String, Value)>,
 }
 
 impl fmt::Display for CopyToStatement {
@@ -127,10 +129,7 @@ impl fmt::Display for CopyToStatement {
         }
 
         if !options.is_empty() {
-            let opts: Vec<_> = options
-                .iter()
-                .map(|(k, v)| format!("'{k}' '{v}'"))
-                .collect();
+            let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} {v}")).collect();
             write!(f, " OPTIONS ({})", opts.join(", "))?;
         }
 
@@ -387,7 +386,8 @@ impl<'a> DFParser<'a> {
             stored_as: Option<String>,
             target: Option<String>,
             partitioned_by: Option<Vec<String>>,
-            options: Option<HashMap<String, String>>,
+            has_header: Option<bool>,
+            options: Option<Vec<(String, Value)>>,
         }
 
         let mut builder = Builder::default();
@@ -423,7 +423,7 @@ impl<'a> DFParser<'a> {
                     }
                     Keyword::OPTIONS => {
                         ensure_not_set(&builder.options, "OPTIONS")?;
-                        builder.options = Some(self.parse_string_options()?);
+                        builder.options = Some(self.parse_value_options()?);
                     }
                     _ => {
                         unreachable!()
@@ -451,8 +451,9 @@ impl<'a> DFParser<'a> {
             source,
             target,
             partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
+            has_header: builder.has_header.unwrap_or(false),
             stored_as: builder.stored_as,
-            options: builder.options.unwrap_or(HashMap::new()),
+            options: builder.options.unwrap_or(vec![]),
         }))
     }
 
@@ -834,6 +835,33 @@ impl<'a> DFParser<'a> {
         }
         Ok(options)
     }
+
+    /// Parses (key value) style options into a map of String --> [`Value`].
+    ///
+    /// Unlike [`Self::parse_string_options`], this method supports
+    /// keywords as key names as well as multiple value types such as
+    /// Numbers as well as Strings.
+    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
+        let mut options = vec![];
+        self.parser.expect_token(&Token::LParen)?;
+
+        loop {
+            let key = self.parse_option_key()?;
+            let value = self.parse_option_value()?;
+            options.push((key, value));
+            let comma = self.parser.consume_token(&Token::Comma);
+            if self.parser.consume_token(&Token::RParen) {
+                // allow a trailing comma, even though it's not in standard
+                break;
+            } else if !comma {
+                return self.expected(
+                    "',' or ')' after option definition",
+                    self.parser.peek_token(),
+                );
+            }
+        }
+        Ok(options)
+    }
 }
 
 #[cfg(test)]
@@ -969,6 +997,27 @@ mod tests {
         });
         expect_parse_ok(sql, expected)?;
 
+        // positive case: it is ok for case insensitive sql stmt with has_header option tokens
+        let sqls = vec![
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('FORMAT.HAS_HEADER' 'TRUE')",
+            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.has_header' 'true')",
+        ];
+        for sql in sqls {
+            let expected = Statement::CreateExternalTable(CreateExternalTable {
+                name: "t".into(),
+                columns: vec![make_column_def("c1", DataType::Int(display))],
+                file_type: "CSV".to_string(),
+                location: "foo.csv".into(),
+                table_partition_cols: vec![],
+                order_exprs: vec![],
+                if_not_exists: false,
+                unbounded: false,
+                options: HashMap::from([("format.has_header".into(), "true".into())]),
+                constraints: vec![],
+            });
+            expect_parse_ok(sql, expected)?;
+        }
+
         // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
         let sqls = vec![
              ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
@@ -1308,8 +1357,9 @@ mod tests {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec![],
+            has_header: false,
             stored_as: Some("CSV".to_owned()),
-            options: HashMap::new(),
+            options: vec![],
         });
 
         assert_eq!(verified_stmt(sql), expected);
@@ -1343,8 +1393,9 @@ mod tests {
                 source: object_name("foo"),
                 target: "bar".to_string(),
                 partitioned_by: vec![],
+                has_header: false,
                 stored_as: Some("PARQUET".to_owned()),
-                options: HashMap::new(),
+                options: vec![],
             });
             let expected = Statement::Explain(ExplainStatement {
                 analyze,
@@ -1379,8 +1430,9 @@ mod tests {
             source: CopyToSource::Query(query),
             target: "bar".to_string(),
             partitioned_by: vec![],
+            has_header: true,
             stored_as: Some("CSV".to_owned()),
-            options: HashMap::from([("format.has_header".into(), "true".into())]),
+            options: vec![],
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1393,8 +1445,12 @@ mod tests {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec![],
+            has_header: false,
             stored_as: Some("CSV".to_owned()),
-            options: HashMap::from([("row_group_size".into(), "55".into())]),
+            options: vec![(
+                "row_group_size".to_string(),
+                Value::Number("55".to_string(), false),
+            )],
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1402,13 +1458,17 @@ mod tests {
 
     #[test]
     fn copy_to_partitioned_by() -> Result<(), ParserError> {
-        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
+        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS (row_group_size 55)";
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec!["a".to_string()],
+            has_header: false,
             stored_as: Some("CSV".to_owned()),
-            options: HashMap::from([("row_group_size".to_string(), "55".into())]),
+            options: vec![(
+                "row_group_size".to_string(),
+                Value::Number("55".to_string(), false),
+            )],
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1418,12 +1478,18 @@ mod tests {
     fn copy_to_multi_options() -> Result<(), ParserError> {
         // order of options is preserved
         let sql =
-            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' '55', 'format.compression' 'snappy')";
+            "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy)";
 
-        let expected_options = HashMap::from([
-            ("format.row_group_size".to_string(), "55".into()),
-            ("format.compression".to_string(), "snappy".into()),
-        ]);
+        let expected_options = vec![
+            (
+                "format.row_group_size".to_string(),
+                Value::Number("55".to_string(), false),
+            ),
+            (
+                "format.compression".to_string(),
+                Value::UnQuotedString("snappy".to_string()),
+            ),
+        ];
 
         let mut statements = DFParser::parse_sql(sql).unwrap();
         assert_eq!(statements.len(), 1);
@@ -1461,7 +1527,6 @@ mod tests {
     /// `canonical` sql string
     fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement {
         let mut statements = DFParser::parse_sql(sql).unwrap();
-        println!("{:?}", statements[0]);
         assert_eq!(statements.len(), 1);
 
         if sql != canonical {
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index e5eef838b301..e6eda0b2787d 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -849,7 +849,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
         };
 
-        let options = statement.options;
+        let mut options = HashMap::new();
+        for (key, value) in statement.options {
+            let value_string = match value {
+                Value::SingleQuotedString(s) => s.to_string(),
+                Value::DollarQuotedString(s) => s.to_string(),
+                Value::UnQuotedString(s) => s.to_string(),
+                Value::Number(_, _) | Value::Boolean(_) => value.to_string(),
+                Value::DoubleQuotedString(_)
+                | Value::EscapedStringLiteral(_)
+                | Value::NationalStringLiteral(_)
+                | Value::SingleQuotedByteStringLiteral(_)
+                | Value::DoubleQuotedByteStringLiteral(_)
+                | Value::RawStringLiteral(_)
+                | Value::HexStringLiteral(_)
+                | Value::Null
+                | Value::Placeholder(_) => {
+                    return plan_err!("Unsupported Value in COPY statement {}", value);
+                }
+            };
+            if !(&key.contains('.')) {
+                // If config does not belong to any namespace, assume it is
+                // a format option and apply the format prefix for backwards
+                // compatibility.
+
+                let renamed_key = format!("format.{}", key);
+                options.insert(renamed_key.to_lowercase(), value_string.to_lowercase());
+            } else {
+                options.insert(key.to_lowercase(), value_string.to_lowercase());
+            }
+        }
 
         let file_type = if let Some(file_type) = statement.stored_as {
             FileType::from_str(&file_type).map_err(|_| {
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 9068c77ab422..4724bc87a099 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -52,6 +52,14 @@ CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER LOCATION 'abc'
 statement error DataFusion error: SQL error: ParserError\("Expected BY, found: LOCATION"\)
 CREATE EXTERNAL TABLE t STORED AS CSV PARTITIONED LOCATION 'abc'
 
+# Missing `TYPE` in COMPRESSION clause
+statement error DataFusion error: SQL error: ParserError\("Expected TYPE, found: LOCATION"\)
+CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION LOCATION 'abc'
+
+# Invalid compression type
+statement error DataFusion error: SQL error: ParserError\("Unsupported file compression type ZZZ"\)
+CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE ZZZ LOCATION 'blahblah'
+
 # Duplicate `STORED AS` clause
 statement error DataFusion error: SQL error: ParserError\("STORED AS specified more than once"\)
 CREATE EXTERNAL TABLE t STORED AS CSV STORED AS PARQUET LOCATION 'foo.parquet'
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 6484d437e179..2d4ca1baf23d 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -278,6 +278,7 @@ statement ok
 CREATE EXTERNAL TABLE avro_table
 STORED AS AVRO
 LOCATION '../../testing/data/avro/simple_enum.avro'
+OPTIONS ('format.has_header' 'true');
 
 
 # It would be great to see the file read as "4" groups with even sizes (offsets) eventually

From 43be99b6f0d55f1336daa9b18e407c7c10cb90de Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Thu, 9 May 2024 17:17:04 +0300
Subject: [PATCH 10/15] Final commit

---
 datafusion/common/src/config.rs               |   6 +-
 .../core/src/datasource/file_format/csv.rs    |  15 +-
 datafusion/core/src/datasource/stream.rs      |  13 +-
 datafusion/core/tests/sql/mod.rs              |   2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |   4 +-
 datafusion/sql/src/parser.rs                  | 185 +++++++-----------
 datafusion/sql/src/statement.rs               |  39 +++-
 .../test_files/create_external_table.slt      |  10 +-
 .../sqllogictest/test_files/parquet.slt       |  21 +-
 .../test_files/repartition_scan.slt           |   3 +-
 .../sqllogictest/test_files/subquery.slt      |   2 +-
 11 files changed, 147 insertions(+), 153 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index a1445dfb2f40..86f35944700c 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1566,7 +1566,7 @@ config_namespace! {
     pub struct CsvOptions {
         /// Specifies whether there is a CSV header (i.e. the first line
         /// consists of is column names). If not specified, uses default from
-        /// the `CREATE TABLE` command, if any.
+        /// the session state.
         pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
@@ -1609,8 +1609,8 @@ impl CsvOptions {
 
     /// Returns true if the first line is a header. If format options does not
     /// specify whether there is a header, consults the configuration.
-    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
-        self.has_header.unwrap_or(config_opt.catalog.has_header)
+    pub fn has_header(&self) -> Option<bool> {
+        self.has_header
     }
 
     /// The character separating values within a row.
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 3e5c7af1f82e..bfd0975a52a9 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -40,7 +40,7 @@ use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
 use arrow::datatypes::SchemaRef;
 use arrow::datatypes::{DataType, Field, Fields, Schema};
-use datafusion_common::config::{ConfigOptions, CsvOptions};
+use datafusion_common::config::CsvOptions;
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
 use datafusion_execution::TaskContext;
@@ -142,8 +142,8 @@ impl CsvFormat {
     }
 
     /// True if the first line is a header.
-    pub fn has_header(&self, config_opt: &ConfigOptions) -> bool {
-        self.options.has_header(config_opt)
+    pub fn has_header(&self) -> Option<bool> {
+        self.options.has_header
     }
 
     /// The character separating values within a row.
@@ -245,7 +245,9 @@ impl FileFormat for CsvFormat {
             conf,
             // If format options does not specify whether there is a header,
             // we consult configuration options.
-            self.options.has_header(state.config_options()),
+            self.options
+                .has_header
+                .unwrap_or(state.config_options().catalog.has_header),
             self.options.delimiter,
             self.options.quote,
             self.options.escape,
@@ -303,7 +305,10 @@ impl CsvFormat {
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
                 .with_header(
-                    self.options.has_header(state.config_options()) && first_chunk,
+                    self.options
+                        .has_header
+                        .unwrap_or(state.config_options().catalog.has_header)
+                        && first_chunk,
                 )
                 .with_delimiter(self.options.delimiter);
 
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 5e485f42b516..41296c1b8d23 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -30,7 +30,7 @@ use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use futures::StreamExt;
 
-use datafusion_common::{plan_err, Constraints, DataFusionError, Result};
+use datafusion_common::{config_err, plan_err, Constraints, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
@@ -58,11 +58,22 @@ impl TableProviderFactory for StreamTableFactory {
         let schema: SchemaRef = Arc::new(cmd.schema.as_ref().into());
         let location = cmd.location.clone();
         let encoding = cmd.file_type.parse()?;
+        let header = if let Ok(opt) = cmd
+            .options
+            .get("format.has_header")
+            .map(|has_header| bool::from_str(has_header))
+            .transpose()
+        {
+            opt.unwrap_or(false)
+        } else {
+            return config_err!("format.has_header can either be true or false");
+        };
 
         let config = StreamConfig::new_file(schema, location.into())
             .with_encoding(encoding)
             .with_order(cmd.order_exprs.clone())
             .with_batch_size(state.config().batch_size())
+            .with_header(header)
             .with_constraints(cmd.constraints.clone());
 
         Ok(Arc::new(StreamTable(Arc::new(config))))
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 00f091d77cff..995ce35c5bc2 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -85,7 +85,7 @@ async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
         c13 VARCHAR NOT NULL
     )
     STORED AS CSV
-    LOCATION '{testdata}/csv/aggregate_test_100.csv
+    LOCATION '{testdata}/csv/aggregate_test_100.csv'
     OPTIONS ('format.has_header' 'true')
     "
         ))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 74df7178b0f2..63e9177ffaf8 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -238,7 +238,7 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
             STORED AS CSV
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv';
+            LOCATION '../core/tests/data/window_2.csv'
             OPTIONS ('format.has_header' 'true')";
 
     let plan = ctx.state().create_logical_plan(query).await?;
@@ -268,7 +268,7 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
             STORED AS CSV
             WITH ORDER (a ASC, b ASC)
             WITH ORDER (c ASC)
-            LOCATION '../core/tests/data/window_2.csv';
+            LOCATION '../core/tests/data/window_2.csv'
             OPTIONS ('format.has_header' 'true')",
     )
     .await?;
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 7468527662f6..6a0798fbcfb4 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -17,7 +17,7 @@
 
 //! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
 
-use std::collections::{HashMap, VecDeque};
+use std::collections::VecDeque;
 use std::fmt;
 
 use sqlparser::{
@@ -101,8 +101,6 @@ pub struct CopyToStatement {
     pub target: String,
     /// Partition keys
     pub partitioned_by: Vec<String>,
-    /// Indicates whether there is a header row (e.g. CSV)
-    pub has_header: bool,
     /// File type (Parquet, NDJSON, CSV etc.)
     pub stored_as: Option<String>,
     /// Target specific options
@@ -129,7 +127,8 @@ impl fmt::Display for CopyToStatement {
         }
 
         if !options.is_empty() {
-            let opts: Vec<_> = options.iter().map(|(k, v)| format!("{k} {v}")).collect();
+            let opts: Vec<_> =
+                options.iter().map(|(k, v)| format!("'{k}' {v}")).collect();
             write!(f, " OPTIONS ({})", opts.join(", "))?;
         }
 
@@ -198,7 +197,7 @@ pub struct CreateExternalTable {
     /// Infinite streams?
     pub unbounded: bool,
     /// Table(provider) specific options
-    pub options: HashMap<String, String>,
+    pub options: Vec<(String, Value)>,
     /// A table-level constraint
     pub constraints: Vec<TableConstraint>,
 }
@@ -386,7 +385,6 @@ impl<'a> DFParser<'a> {
             stored_as: Option<String>,
             target: Option<String>,
             partitioned_by: Option<Vec<String>>,
-            has_header: Option<bool>,
             options: Option<Vec<(String, Value)>>,
         }
 
@@ -413,8 +411,7 @@ impl<'a> DFParser<'a> {
                     Keyword::WITH => {
                         self.parser.expect_keyword(Keyword::HEADER)?;
                         self.parser.expect_keyword(Keyword::ROW)?;
-                        return parser_err!("\"WITH HEADER ROW\" clause is no longer in use. Please use the \"OPTIONS\" 
-                        clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
+                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -451,7 +448,6 @@ impl<'a> DFParser<'a> {
             source,
             target,
             partitioned_by: builder.partitioned_by.unwrap_or(vec![]),
-            has_header: builder.has_header.unwrap_or(false),
             stored_as: builder.stored_as,
             options: builder.options.unwrap_or(vec![]),
         }))
@@ -686,7 +682,7 @@ impl<'a> DFParser<'a> {
             location: Option<String>,
             table_partition_cols: Option<Vec<String>>,
             order_exprs: Vec<LexOrdering>,
-            options: Option<HashMap<String, String>>,
+            options: Option<Vec<(String, Value)>>,
         }
         let mut builder = Builder::default();
 
@@ -716,18 +712,15 @@ impl<'a> DFParser<'a> {
                         } else {
                             self.parser.expect_keyword(Keyword::HEADER)?;
                             self.parser.expect_keyword(Keyword::ROW)?;
-                            return parser_err!("\"WITH HEADER ROW\" clause is no longer in use. Please use the \"OPTIONS\" 
-                            clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
+                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
                         }
                     }
                     Keyword::DELIMITER => {
-                        return parser_err!("\"DELIMITER\" clause is no longer in use. Please use the \"OPTIONS\" 
-                            clause with 'format.delimiter' set appropriately, e.g., OPTIONS ('format.delimiter' ',')");
+                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS ('format.delimiter' ',')");
                     }
                     Keyword::COMPRESSION => {
                         self.parser.expect_keyword(Keyword::TYPE)?;
-                        return parser_err!("\"COMPRESSION TYPE\" clause is no longer in use. Please use the \"OPTIONS\" 
-                            clause with 'format.compression' set appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
+                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -759,7 +752,7 @@ impl<'a> DFParser<'a> {
                     }
                     Keyword::OPTIONS => {
                         ensure_not_set(&builder.options, "OPTIONS")?;
-                        builder.options = Some(self.parse_string_options()?);
+                        builder.options = Some(self.parse_value_options()?);
                     }
                     _ => {
                         unreachable!()
@@ -798,7 +791,7 @@ impl<'a> DFParser<'a> {
             order_exprs: builder.order_exprs,
             if_not_exists,
             unbounded,
-            options: builder.options.unwrap_or(HashMap::new()),
+            options: builder.options.unwrap_or(Vec::new()),
             constraints,
         };
         Ok(Statement::CreateExternalTable(create))
@@ -813,34 +806,10 @@ impl<'a> DFParser<'a> {
         }
     }
 
-    /// Parses (key value) style options where the values are literal strings.
-    fn parse_string_options(&mut self) -> Result<HashMap<String, String>, ParserError> {
-        let mut options = HashMap::new();
-        self.parser.expect_token(&Token::LParen)?;
-
-        loop {
-            let key = self.parser.parse_literal_string()?;
-            let value = self.parser.parse_literal_string()?;
-            options.insert(key, value);
-            let comma = self.parser.consume_token(&Token::Comma);
-            if self.parser.consume_token(&Token::RParen) {
-                // allow a trailing comma, even though it's not in standard
-                break;
-            } else if !comma {
-                return self.expected(
-                    "',' or ')' after option definition",
-                    self.parser.peek_token(),
-                );
-            }
-        }
-        Ok(options)
-    }
-
     /// Parses (key value) style options into a map of String --> [`Value`].
     ///
-    /// Unlike [`Self::parse_string_options`], this method supports
-    /// keywords as key names as well as multiple value types such as
-    /// Numbers as well as Strings.
+    /// This method supports keywords as key names as well as multiple
+    /// value types such as Numbers as well as Strings.
     fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
         let mut options = vec![];
         self.parser.expect_token(&Token::LParen)?;
@@ -925,7 +894,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: Vec::new(),
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -941,7 +910,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: Vec::new(),
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -958,7 +927,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -975,7 +944,10 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::from([("format.delimiter".into(), "|".into())]),
+            options: vec![(
+                "format.delimiter".into(),
+                Value::SingleQuotedString("|".into()),
+            )],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -992,42 +964,21 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
 
-        // positive case: it is ok for case insensitive sql stmt with has_header option tokens
-        let sqls = vec![
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('FORMAT.HAS_HEADER' 'TRUE')",
-            "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS ('format.has_header' 'true')",
-        ];
-        for sql in sqls {
-            let expected = Statement::CreateExternalTable(CreateExternalTable {
-                name: "t".into(),
-                columns: vec![make_column_def("c1", DataType::Int(display))],
-                file_type: "CSV".to_string(),
-                location: "foo.csv".into(),
-                table_partition_cols: vec![],
-                order_exprs: vec![],
-                if_not_exists: false,
-                unbounded: false,
-                options: HashMap::from([("format.has_header".into(), "true".into())]),
-                constraints: vec![],
-            });
-            expect_parse_ok(sql, expected)?;
-        }
-
         // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
         let sqls = vec![
              ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
-             ('format.key.compression' 'GZIP')", "GZIP"),
+             ('format.compression' 'GZIP')", "GZIP"),
              ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
-             ('format.key.compression' 'BZIP2')", "BZIP2"),
+             ('format.compression' 'BZIP2')", "BZIP2"),
              ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
-             ('format.key.compression' 'XZ')", "XZ"),
+             ('format.compression' 'XZ')", "XZ"),
              ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS 
-             ('format.key.compression' 'ZSTD')", "ZSTD"),
+             ('format.compression' 'ZSTD')", "ZSTD"),
          ];
         for (sql, compression) in sqls {
             let expected = Statement::CreateExternalTable(CreateExternalTable {
@@ -1039,10 +990,10 @@ mod tests {
                 order_exprs: vec![],
                 if_not_exists: false,
                 unbounded: false,
-                options: vec!["format.key.compression".to_string()]
-                    .into_iter()
-                    .zip(vec![compression.to_string()])
-                    .collect(),
+                options: vec![(
+                    "format.compression".into(),
+                    Value::SingleQuotedString(compression.into()),
+                )],
                 constraints: vec![],
             });
             expect_parse_ok(sql, expected)?;
@@ -1059,7 +1010,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1075,7 +1026,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1091,7 +1042,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1108,7 +1059,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: true,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1128,7 +1079,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1155,7 +1106,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::from([("k1".into(), "v1".into())]),
+            options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1172,10 +1123,10 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::from([
-                ("k1".into(), "v1".into()),
-                ("k2".into(), "v2".into()),
-            ]),
+            options: vec![
+                ("k1".into(), Value::SingleQuotedString("v1".into())),
+                ("k2".into(), Value::UnQuotedString("v2".into())),
+            ],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1218,7 +1169,7 @@ mod tests {
                 }]],
                 if_not_exists: false,
                 unbounded: false,
-                options: HashMap::new(),
+                options: vec![],
                 constraints: vec![],
             });
             expect_parse_ok(sql, expected)?;
@@ -1256,7 +1207,7 @@ mod tests {
             ]],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1290,7 +1241,7 @@ mod tests {
             }]],
             if_not_exists: false,
             unbounded: false,
-            options: HashMap::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1302,11 +1253,11 @@ mod tests {
             WITH ORDER (c1 - c2 ASC)
             PARTITIONED BY (c1)
             LOCATION 'foo.parquet'
-            OPTIONS (ROW_GROUP_SIZE '1024', 'TRUNCATE' 'NO',
-                     'format.compression' 'zstd',
+            OPTIONS ('format.compression' 'zstd',
                      'format.delimiter' '*',
-                     'format.has_header' 'true')
-                     ";
+                     'ROW_GROUP_SIZE' '1024', 
+                     'TRUNCATE' 'NO',
+                     'format.has_header' 'true')";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: "t".into(),
             columns: vec![
@@ -1333,13 +1284,25 @@ mod tests {
             }]],
             if_not_exists: true,
             unbounded: true,
-            options: HashMap::from([
-                ("format.compression".into(), "zstd".into()),
-                ("format.delimiter".into(), "*".into()),
-                ("ROW_GROUP_SIZE".into(), "1024".into()),
-                ("TRUNCATE".into(), "NO".into()),
-                ("format.has_header".into(), "true".into()),
-            ]),
+            options: vec![
+                (
+                    "format.compression".into(),
+                    Value::SingleQuotedString("zstd".into()),
+                ),
+                (
+                    "format.delimiter".into(),
+                    Value::SingleQuotedString("*".into()),
+                ),
+                (
+                    "ROW_GROUP_SIZE".into(),
+                    Value::SingleQuotedString("1024".into()),
+                ),
+                ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())),
+                (
+                    "format.has_header".into(),
+                    Value::SingleQuotedString("true".into()),
+                ),
+            ],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -1357,7 +1320,6 @@ mod tests {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec![],
-            has_header: false,
             stored_as: Some("CSV".to_owned()),
             options: vec![],
         });
@@ -1393,7 +1355,6 @@ mod tests {
                 source: object_name("foo"),
                 target: "bar".to_string(),
                 partitioned_by: vec![],
-                has_header: false,
                 stored_as: Some("PARQUET".to_owned()),
                 options: vec![],
             });
@@ -1430,9 +1391,11 @@ mod tests {
             source: CopyToSource::Query(query),
             target: "bar".to_string(),
             partitioned_by: vec![],
-            has_header: true,
             stored_as: Some("CSV".to_owned()),
-            options: vec![],
+            options: vec![(
+                "format.has_header".into(),
+                Value::SingleQuotedString("true".into()),
+            )],
         });
         assert_eq!(verified_stmt(sql), expected);
         Ok(())
@@ -1440,16 +1403,15 @@ mod tests {
 
     #[test]
     fn copy_to_options() -> Result<(), ParserError> {
-        let sql = "COPY foo TO bar STORED AS CSV OPTIONS (row_group_size 55)";
+        let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec![],
-            has_header: false,
             stored_as: Some("CSV".to_owned()),
             options: vec![(
                 "row_group_size".to_string(),
-                Value::Number("55".to_string(), false),
+                Value::SingleQuotedString("55".to_string()),
             )],
         });
         assert_eq!(verified_stmt(sql), expected);
@@ -1458,16 +1420,15 @@ mod tests {
 
     #[test]
     fn copy_to_partitioned_by() -> Result<(), ParserError> {
-        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS (row_group_size 55)";
+        let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
             target: "bar".to_string(),
             partitioned_by: vec!["a".to_string()],
-            has_header: false,
             stored_as: Some("CSV".to_owned()),
             options: vec![(
                 "row_group_size".to_string(),
-                Value::Number("55".to_string(), false),
+                Value::SingleQuotedString("55".to_string()),
             )],
         });
         assert_eq!(verified_stmt(sql), expected);
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index e6eda0b2787d..6bdca77f0b02 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -872,7 +872,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 // If config does not belong to any namespace, assume it is
                 // a format option and apply the format prefix for backwards
                 // compatibility.
-
                 let renamed_key = format!("format.{}", key);
                 options.insert(renamed_key.to_lowercase(), value_string.to_lowercase());
             } else {
@@ -982,7 +981,41 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let inline_constraints = calc_inline_constraints_from_columns(&columns);
         all_constraints.extend(inline_constraints);
 
-        let compression = options
+        let mut options_map = HashMap::<String, String>::new();
+        for (key, value) in options {
+            let value_string = match value {
+                Value::SingleQuotedString(s) => s.to_string(),
+                Value::DollarQuotedString(s) => s.to_string(),
+                Value::UnQuotedString(s) => s.to_string(),
+                Value::Number(_, _) | Value::Boolean(_) => value.to_string(),
+                Value::DoubleQuotedString(_)
+                | Value::EscapedStringLiteral(_)
+                | Value::NationalStringLiteral(_)
+                | Value::SingleQuotedByteStringLiteral(_)
+                | Value::DoubleQuotedByteStringLiteral(_)
+                | Value::RawStringLiteral(_)
+                | Value::HexStringLiteral(_)
+                | Value::Null
+                | Value::Placeholder(_) => {
+                    return plan_err!(
+                        "Unsupported Value in CREATE EXTERNAL TABLE statement {}",
+                        value
+                    );
+                }
+            };
+            if !(&key.contains('.')) {
+                // If config does not belong to any namespace, assume it is
+                // a format option and apply the format prefix for backwards
+                // compatibility.
+                let renamed_key = format!("format.{}", key);
+                options_map
+                    .insert(renamed_key.to_lowercase(), value_string.to_lowercase());
+            } else {
+                options_map.insert(key.to_lowercase(), value_string.to_lowercase());
+            }
+        }
+
+        let compression = options_map
             .get("format.compression")
             .map(|comp| CompressionTypeVariant::from_str(comp))
             .transpose()?;
@@ -1025,7 +1058,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 definition,
                 order_exprs: ordered_exprs,
                 unbounded,
-                options,
+                options: options_map,
                 constraints,
                 column_defaults,
             },
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 4724bc87a099..ff96220dba60 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -33,7 +33,7 @@ statement error DataFusion error: SQL error: ParserError\("Missing LOCATION clau
 CREATE EXTERNAL TABLE t STORED AS CSV
 
 # Option value is missing
-statement error DataFusion error: SQL error: ParserError\("Expected literal string, found: \)"\)
+statement error DataFusion error: SQL error: ParserError\("Expected string or numeric value, found: \)"\)
 CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2, k3) LOCATION 'blahblah'
 
 # Missing `(` in WITH ORDER clause
@@ -52,14 +52,6 @@ CREATE EXTERNAL TABLE t STORED AS CSV WITH HEADER LOCATION 'abc'
 statement error DataFusion error: SQL error: ParserError\("Expected BY, found: LOCATION"\)
 CREATE EXTERNAL TABLE t STORED AS CSV PARTITIONED LOCATION 'abc'
 
-# Missing `TYPE` in COMPRESSION clause
-statement error DataFusion error: SQL error: ParserError\("Expected TYPE, found: LOCATION"\)
-CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION LOCATION 'abc'
-
-# Invalid compression type
-statement error DataFusion error: SQL error: ParserError\("Unsupported file compression type ZZZ"\)
-CREATE EXTERNAL TABLE t STORED AS CSV COMPRESSION TYPE ZZZ LOCATION 'blahblah'
-
 # Duplicate `STORED AS` clause
 statement error DataFusion error: SQL error: ParserError\("STORED AS specified more than once"\)
 CREATE EXTERNAL TABLE t STORED AS CSV STORED AS PARQUET LOCATION 'foo.parquet'
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 25463eb36273..e70f800bde74 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -107,8 +107,7 @@ CREATE EXTERNAL TABLE test_table (
 )
 STORED AS PARQUET
 WITH ORDER (string_col ASC NULLS LAST, int_col ASC NULLS LAST)
-LOCATION 'test_files/scratch/parquet/test_table'
-OPTIONS ('format.has_header' 'true');
+LOCATION 'test_files/scratch/parquet/test_table';
 
 # Check output plan, expect an "output_ordering" clause in the physical_plan -> ParquetExec:
 query TT
@@ -188,8 +187,7 @@ CREATE EXTERNAL TABLE alltypes_plain (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS PARQUET
-LOCATION '../../parquet-testing/data/alltypes_plain.parquet'
-OPTIONS ('format.has_header' 'true')
+LOCATION '../../parquet-testing/data/alltypes_plain.parquet';
 
 # Test a basic query with a CAST:
 query IT
@@ -213,8 +211,7 @@ DROP TABLE alltypes_plain;
 statement ok
 CREATE EXTERNAL TABLE test_binary
 STORED AS PARQUET
-LOCATION '../core/tests/data/test_binary.parquet'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../core/tests/data/test_binary.parquet';
 
 # Check size of table:
 query I
@@ -246,8 +243,7 @@ DROP TABLE test_binary;
 statement ok
 CREATE EXTERNAL TABLE timestamp_with_tz
 STORED AS PARQUET
-LOCATION '../core/tests/data/timestamp_with_tz.parquet'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../core/tests/data/timestamp_with_tz.parquet';
 
 # Check size of table:
 query I
@@ -287,8 +283,7 @@ STORED AS PARQUET;
 statement ok
 CREATE EXTERNAL TABLE listing_table
 STORED AS PARQUET
-LOCATION 'test_files/scratch/parquet/test_table/*.parquet'
-OPTIONS ('format.has_header' 'true');
+LOCATION 'test_files/scratch/parquet/test_table/*.parquet';
 
 statement ok
 set datafusion.execution.listing_table_ignore_subdirectory = true;
@@ -316,8 +311,7 @@ DROP TABLE timestamp_with_tz;
 statement ok
 CREATE EXTERNAL TABLE single_nan
 STORED AS PARQUET
-LOCATION '../../parquet-testing/data/single_nan.parquet'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../parquet-testing/data/single_nan.parquet';
 
 # Check table size:
 query I
@@ -338,8 +332,7 @@ DROP TABLE single_nan;
 statement ok
 CREATE EXTERNAL TABLE list_columns
 STORED AS PARQUET
-LOCATION '../../parquet-testing/data/list_columns.parquet'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../parquet-testing/data/list_columns.parquet';
 
 query ??
 SELECT int64_list, utf8_list FROM list_columns
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 2d4ca1baf23d..1e71696f296a 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -277,8 +277,7 @@ DROP TABLE arrow_table;
 statement ok
 CREATE EXTERNAL TABLE avro_table
 STORED AS AVRO
-LOCATION '../../testing/data/avro/simple_enum.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/simple_enum.avro';
 
 
 # It would be great to see the file read as "4" groups with even sizes (offsets) eventually
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index ba4d644345d4..4a9fb38e7db1 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -99,7 +99,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
         l_shipinstruct VARCHAR,
         l_shipmode VARCHAR,
         l_comment VARCHAR,
-) STORED AS CSV LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',', format.has_header' 'true');
+) STORED AS CSV LOCATION '../core/tests/tpch-csv/lineitem.csv' OPTIONS ('format.delimiter' ',', 'format.has_header' 'true');
 
 # in_subquery_to_join_with_correlated_outer_filter
 query ITI rowsort

From fe505a2fad289e619fbe9d103955173eb382fe60 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Thu, 9 May 2024 17:59:12 +0300
Subject: [PATCH 11/15] Minor

---
 datafusion/core/src/datasource/listing/table.rs | 12 ++++++++++++
 datafusion/sql/src/parser.rs                    |  4 ++--
 datafusion/sql/src/statement.rs                 |  5 +++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 7488091fff70..b59ad439f6d8 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -1561,6 +1561,18 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_insert_into_sql_csv_defaults_header_row() -> Result<()> {
+        helper_test_insert_into_sql(
+            "csv",
+            FileCompressionType::UNCOMPRESSED,
+            "",
+            Some(HashMap::from([("has_header".into(), "true".into())])),
+        )
+        .await?;
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_insert_into_sql_json_defaults() -> Result<()> {
         helper_test_insert_into_sql("json", FileCompressionType::UNCOMPRESSED, "", None)
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 6a0798fbcfb4..f61c9cda6345 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -894,7 +894,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: Vec::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
@@ -910,7 +910,7 @@ mod tests {
             order_exprs: vec![],
             if_not_exists: false,
             unbounded: false,
-            options: Vec::new(),
+            options: vec![],
             constraints: vec![],
         });
         expect_parse_ok(sql, expected)?;
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 6bdca77f0b02..79cd1b643654 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -983,6 +983,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let mut options_map = HashMap::<String, String>::new();
         for (key, value) in options {
+            if options_map.contains_key(&key) {
+                return plan_err!("Option {key} is specified multiple times");
+            }
+
             let value_string = match value {
                 Value::SingleQuotedString(s) => s.to_string(),
                 Value::DollarQuotedString(s) => s.to_string(),
@@ -1003,6 +1007,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     );
                 }
             };
+
             if !(&key.contains('.')) {
                 // If config does not belong to any namespace, assume it is
                 // a format option and apply the format prefix for backwards

From 8ab014ce893988b0c87bbbdad27583526b7a4630 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Fri, 10 May 2024 03:23:13 +0300
Subject: [PATCH 12/15] Review

---
 datafusion/common/src/config.rs                    |  7 ++++---
 datafusion/core/src/datasource/file_format/csv.rs  | 12 +++++++-----
 datafusion/core/src/datasource/listing/table.rs    |  1 -
 datafusion/core/src/datasource/stream.rs           | 12 ++++++------
 datafusion/core/src/execution/context/mod.rs       | 14 ++++++--------
 datafusion/core/tests/path_partition.rs            |  5 ++---
 datafusion/proto/src/physical_plan/to_proto.rs     |  5 +----
 .../proto/tests/cases/roundtrip_logical_plan.rs    |  7 ++++---
 datafusion/sql/src/statement.rs                    | 11 +++++------
 datafusion/sql/tests/sql_integration.rs            |  2 +-
 datafusion/sqllogictest/test_files/csv_files.slt   |  2 +-
 .../sqllogictest/test_files/repartition_scan.slt   |  4 ++--
 12 files changed, 39 insertions(+), 43 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 86f35944700c..c60f843393f8 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1565,8 +1565,8 @@ config_namespace! {
     /// Options controlling CSV format
     pub struct CsvOptions {
         /// Specifies whether there is a CSV header (i.e. the first line
-        /// consists of is column names). If not specified, uses default from
-        /// the session state.
+        /// consists of is column names). The value `None` indicates that
+        /// the configuration should be consulted.
         pub has_header: Option<bool>, default = None
         pub delimiter: u8, default = b','
         pub quote: u8, default = b'"'
@@ -1608,7 +1608,8 @@ impl CsvOptions {
     }
 
     /// Returns true if the first line is a header. If format options does not
-    /// specify whether there is a header, consults the configuration.
+    /// specify whether there is a header, returns `None` (indicating that the
+    /// configuration should be consulted).
     pub fn has_header(&self) -> Option<bool> {
         self.has_header
     }
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index bfd0975a52a9..7cf510910b75 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -141,7 +141,8 @@ impl CsvFormat {
         self
     }
 
-    /// True if the first line is a header.
+    /// Returns `Some(true)` if the first line is a header, `Some(false)` if
+    /// it is not, and `None` if it is not specified.
     pub fn has_header(&self) -> Option<bool> {
         self.options.has_header
     }
@@ -305,10 +306,11 @@ impl CsvFormat {
         while let Some(chunk) = stream.next().await.transpose()? {
             let format = arrow::csv::reader::Format::default()
                 .with_header(
-                    self.options
-                        .has_header
-                        .unwrap_or(state.config_options().catalog.has_header)
-                        && first_chunk,
+                    first_chunk
+                        && self
+                            .options
+                            .has_header
+                            .unwrap_or(state.config_options().catalog.has_header),
                 )
                 .with_delimiter(self.options.delimiter);
 
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index b59ad439f6d8..cf70894806a3 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -1033,7 +1033,6 @@ impl ListingTable {
 
 #[cfg(test)]
 mod tests {
-
     use super::*;
     #[cfg(feature = "parquet")]
     use crate::datasource::{provider_as_source, MemTable};
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 41296c1b8d23..4962e410587e 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -25,11 +25,12 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::sync::Arc;
 
+use crate::datasource::provider::TableProviderFactory;
+use crate::datasource::{create_ordering, TableProvider};
+use crate::execution::context::SessionState;
+
 use arrow_array::{RecordBatch, RecordBatchReader, RecordBatchWriter};
 use arrow_schema::SchemaRef;
-use async_trait::async_trait;
-use futures::StreamExt;
-
 use datafusion_common::{config_err, plan_err, Constraints, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
@@ -40,9 +41,8 @@ use datafusion_physical_plan::stream::RecordBatchReceiverStreamBuilder;
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 
-use crate::datasource::provider::TableProviderFactory;
-use crate::datasource::{create_ordering, TableProvider};
-use crate::execution::context::SessionState;
+use async_trait::async_trait;
+use futures::StreamExt;
 
 /// A [`TableProviderFactory`] for [`StreamTable`]
 #[derive(Debug, Default)]
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 6c2dc692dae0..6b29f352dc82 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -23,6 +23,8 @@ use std::ops::ControlFlow;
 use std::sync::{Arc, Weak};
 
 use super::options::ReadOptions;
+#[cfg(feature = "array_expressions")]
+use crate::functions_array;
 use crate::{
     catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA},
     catalog::listing_schema::ListingSchemaProvider,
@@ -53,14 +55,12 @@ use crate::{
     },
     optimizer::analyzer::{Analyzer, AnalyzerRule},
     optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
+    physical_expr::{create_physical_expr, PhysicalExpr},
     physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
     physical_plan::ExecutionPlan,
     physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
     variable::{VarProvider, VarType},
 };
-
-#[cfg(feature = "array_expressions")]
-use crate::functions_array;
 use crate::{functions, functions_aggregate};
 
 use arrow::datatypes::{DataType, SchemaRef};
@@ -75,10 +75,13 @@ use datafusion_common::{
 };
 use datafusion_execution::registry::SerializerRegistry;
 use datafusion_expr::{
+    expr_rewriter::FunctionRewrite,
     logical_plan::{DdlStatement, Statement},
+    simplify::SimplifyInfo,
     var_provider::is_system_variables,
     Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
 };
+use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 use datafusion_sql::{
     parser::{CopyToSource, CopyToStatement, DFParser},
     planner::{object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel},
@@ -93,14 +96,9 @@ use sqlparser::dialect::dialect_from_str;
 use url::Url;
 use uuid::Uuid;
 
-use crate::physical_expr::PhysicalExpr;
 pub use datafusion_execution::config::SessionConfig;
 pub use datafusion_execution::TaskContext;
 pub use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::expr_rewriter::FunctionRewrite;
-use datafusion_expr::simplify::SimplifyInfo;
-use datafusion_optimizer::simplify_expressions::ExprSimplifier;
-use datafusion_physical_expr::create_physical_expr;
 
 mod avro;
 mod csv;
diff --git a/datafusion/core/tests/path_partition.rs b/datafusion/core/tests/path_partition.rs
index 6dfed4be795b..ffe0494dae99 100644
--- a/datafusion/core/tests/path_partition.rs
+++ b/datafusion/core/tests/path_partition.rs
@@ -38,13 +38,12 @@ use datafusion::{
 };
 use datafusion_common::stats::Precision;
 use datafusion_common::ScalarValue;
+use datafusion_execution::config::SessionConfig;
 
 use async_trait::async_trait;
 use bytes::Bytes;
 use chrono::{TimeZone, Utc};
-use datafusion_execution::config::SessionConfig;
-use futures::stream;
-use futures::stream::BoxStream;
+use futures::stream::{self, BoxStream};
 use object_store::{
     path::Path, GetOptions, GetResult, GetResultPayload, ListResult, MultipartId,
     ObjectMeta, ObjectStore, PutOptions, PutResult,
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 85ad6cb6ff4c..ffe2aeaaa25a 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -975,10 +975,7 @@ impl TryFrom<&CsvOptions> for protobuf::CsvOptions {
     fn try_from(opts: &CsvOptions) -> Result<Self, Self::Error> {
         let compression: protobuf::CompressionTypeVariant = opts.compression.into();
         Ok(protobuf::CsvOptions {
-            has_header: opts
-                .has_header
-                .map(|h| h as u8)
-                .map_or_else(Vec::new, |e| vec![e]),
+            has_header: opts.has_header.map_or_else(Vec::new, |h| vec![h as u8]),
             delimiter: vec![opts.delimiter],
             quote: vec![opts.quote],
             escape: opts.escape.map_or_else(Vec::new, |e| vec![e]),
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 63e9177ffaf8..8a2032fd46cc 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -30,6 +30,7 @@ use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::TableProvider;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::covariance::covar_samp;
 use datafusion::functions_aggregate::expr_fn::first_value;
 use datafusion::prelude::*;
@@ -57,11 +58,11 @@ use datafusion_proto::bytes::{
     logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
 };
 use datafusion_proto::logical_plan::to_proto::serialize_expr;
-use datafusion_proto::logical_plan::LogicalExtensionCodec;
-use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
+use datafusion_proto::logical_plan::{
+    from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
+};
 use datafusion_proto::protobuf;
 
-use datafusion::execution::FunctionRegistry;
 use prost::Message;
 
 #[cfg(feature = "json")]
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 79cd1b643654..13d2e05661a8 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1009,12 +1009,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             };
 
             if !(&key.contains('.')) {
-                // If config does not belong to any namespace, assume it is
+                // If a config does not belong to any namespace, we assume it is
                 // a format option and apply the format prefix for backwards
                 // compatibility.
-                let renamed_key = format!("format.{}", key);
-                options_map
-                    .insert(renamed_key.to_lowercase(), value_string.to_lowercase());
+                let renamed_key = format!("format.{}", key.to_lowercase());
+                options_map.insert(renamed_key, value_string.to_lowercase());
             } else {
                 options_map.insert(key.to_lowercase(), value_string.to_lowercase());
             }
@@ -1022,11 +1021,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let compression = options_map
             .get("format.compression")
-            .map(|comp| CompressionTypeVariant::from_str(comp))
+            .map(|c| CompressionTypeVariant::from_str(c))
             .transpose()?;
         if (file_type == "PARQUET" || file_type == "AVRO" || file_type == "ARROW")
             && compression
-                .map(|comp| comp != CompressionTypeVariant::UNCOMPRESSED)
+                .map(|c| c != CompressionTypeVariant::UNCOMPRESSED)
                 .unwrap_or(false)
         {
             plan_err!(
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 781d2ce5bb06..af4dac5f3f89 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -32,13 +32,13 @@ use datafusion_expr::{
     AggregateUDF, ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, TableSource,
     Volatility, WindowUDF,
 };
+use datafusion_functions::{string, unicode};
 use datafusion_sql::unparser::{expr_to_sql, plan_to_sql};
 use datafusion_sql::{
     parser::DFParser,
     planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel},
 };
 
-use datafusion_functions::{string, unicode};
 use rstest::rstest;
 use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect};
 use sqlparser::parser::Parser;
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
index 5e30d6ed09ae..50477e1dab45 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -160,4 +160,4 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST]
 02)--SortExec: expr=[int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----CsvExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]}, projection=[int_col, string_col, bigint_col, partition_col], has_header=false
\ No newline at end of file
+03)----CsvExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]}, projection=[int_col, string_col, bigint_col, partition_col], has_header=false
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index 1e71696f296a..6b9cb521f5f8 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -163,8 +163,8 @@ STORED AS CSV OPTIONS ('format.has_header' 'true');
 statement ok
 CREATE EXTERNAL TABLE csv_table(column1 int)
 STORED AS csv
-LOCATION 'test_files/scratch/repartition_scan/csv_table/' OPTIONS
-    ('format.has_header' 'true');
+LOCATION 'test_files/scratch/repartition_scan/csv_table/'
+OPTIONS ('format.has_header' 'true');
 
 query I
 select * from csv_table ORDER BY column1;

From 86e473e472706b2a3250ce329f94b0db7510a786 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Fri, 10 May 2024 11:17:59 +0300
Subject: [PATCH 13/15] Update avro.slt

---
 datafusion/sqllogictest/test_files/avro.slt | 27 +++++++--------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/avro.slt b/datafusion/sqllogictest/test_files/avro.slt
index f9f55f0dc153..fced1924ced9 100644
--- a/datafusion/sqllogictest/test_files/avro.slt
+++ b/datafusion/sqllogictest/test_files/avro.slt
@@ -31,8 +31,7 @@ CREATE EXTERNAL TABLE alltypes_plain (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-LOCATION '../../testing/data/avro/alltypes_plain.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/alltypes_plain.avro';
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_snappy (
@@ -49,8 +48,7 @@ CREATE EXTERNAL TABLE alltypes_plain_snappy (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-LOCATION '../../testing/data/avro/alltypes_plain.snappy.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/alltypes_plain.snappy.avro';
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_bzip2 (
@@ -67,8 +65,7 @@ CREATE EXTERNAL TABLE alltypes_plain_bzip2 (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-LOCATION '../../testing/data/avro/alltypes_plain.bzip2.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/alltypes_plain.bzip2.avro';
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_xz (
@@ -85,8 +82,7 @@ CREATE EXTERNAL TABLE alltypes_plain_xz (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-LOCATION '../../testing/data/avro/alltypes_plain.xz.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/alltypes_plain.xz.avro';
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain_zstandard (
@@ -103,34 +99,29 @@ CREATE EXTERNAL TABLE alltypes_plain_zstandard (
   timestamp_col TIMESTAMP NOT NULL,
 )
 STORED AS AVRO
-LOCATION '../../testing/data/avro/alltypes_plain.zstandard.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/alltypes_plain.zstandard.avro';
 
 statement ok
 CREATE EXTERNAL TABLE single_nan (
   mycol FLOAT
 )
 STORED AS AVRO
-LOCATION '../../testing/data/avro/single_nan.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/single_nan.avro';
 
 statement ok
 CREATE EXTERNAL TABLE nested_records
 STORED AS AVRO
-LOCATION '../../testing/data/avro/nested_records.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/nested_records.avro';
 
 statement ok
 CREATE EXTERNAL TABLE simple_enum
 STORED AS AVRO
-LOCATION '../../testing/data/avro/simple_enum.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/simple_enum.avro';
 
 statement ok
 CREATE EXTERNAL TABLE simple_fixed
 STORED AS AVRO
-LOCATION '../../testing/data/avro/simple_fixed.avro'
-OPTIONS ('format.has_header' 'true');
+LOCATION '../../testing/data/avro/simple_fixed.avro';
 
 # test avro query
 query IT

From 3af0bfe722f7c3508cc1c5cfd8ad766d92763f6d Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Mon, 13 May 2024 10:26:33 +0300
Subject: [PATCH 14/15] Apply suggestions

---
 .../core/src/datasource/file_format/csv.rs    |  3 ++-
 datafusion/core/src/datasource/stream.rs      |  2 +-
 .../test_files/create_external_table.slt      | 20 +++++++++++++++++++
 .../test_files/tpch/create_tables.slt.part    |  2 +-
 docs/source/user-guide/cli/datasources.md     |  2 +-
 docs/source/user-guide/sql/ddl.md             | 10 +++++-----
 docs/source/user-guide/sql/write_options.md   |  2 +-
 7 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 7cf510910b75..17bc7aafce85 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -734,7 +734,8 @@ mod tests {
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
         let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap());
-        let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
+        let mut cfg = SessionConfig::new();
+        cfg.options_mut().catalog.has_header = true;
         let session_state = SessionState::new_with_config_rt(cfg, runtime);
 
         let integration = LocalFileSystem::new_with_prefix(arrow_test_data()).unwrap();
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 4962e410587e..f0795bdbb0d8 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -66,7 +66,7 @@ impl TableProviderFactory for StreamTableFactory {
         {
             opt.unwrap_or(false)
         } else {
-            return config_err!("format.has_header can either be true or false");
+            return config_err!("Valid values for format.has_header option are 'true' or 'false'");
         };
 
         let config = StreamConfig::new_file(schema, location.into())
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index ff96220dba60..fca177bb61f0 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -185,3 +185,23 @@ CREATE EXTERNAL TABLE test3(name string)
 PARTITIONED BY (month string, year string)
 STORED AS parquet
 LOCATION 'test_files/scratch/create_external_table/manual_partitioning/';
+
+# Duplicate key assignment in OPTIONS clause
+statement error DataFusion error: Error during planning: Option format.delimiter is specified multiple times
+CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS (
+    'format.delimiter' '*',
+    'format.has_header' 'true', 
+    'format.delimiter' '|') 
+LOCATION 'foo.csv';
+
+# If a config does not belong to any namespace, we assume it is a 'format' option and apply the 'format' prefix for backwards compatibility.
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS region (
+        r_regionkey BIGINT,
+        r_name VARCHAR,
+        r_comment VARCHAR,
+        r_rev VARCHAR,
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' 
+OPTIONS (
+        'format.delimiter' '|',
+        'has_header' 'false');
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index 75bcbc198bef..111d24773055 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
         r_name VARCHAR,
         r_comment VARCHAR,
         r_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|');
\ No newline at end of file
diff --git a/docs/source/user-guide/cli/datasources.md b/docs/source/user-guide/cli/datasources.md
index 8b254a896f90..2b11645c471a 100644
--- a/docs/source/user-guide/cli/datasources.md
+++ b/docs/source/user-guide/cli/datasources.md
@@ -167,7 +167,7 @@ Register a single file csv datasource with a header row.
 CREATE EXTERNAL TABLE test
 STORED AS CSV
 LOCATION '/path/to/aggregate_test_100.csv'
-OPTIONS ('format.has_header' 'true');
+OPTIONS ('has_header' 'true');
 ```
 
 Register a single file csv datasource with explicitly defined schema.
diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md
index 41980851df1e..e16b9681eb80 100644
--- a/docs/source/user-guide/sql/ddl.md
+++ b/docs/source/user-guide/sql/ddl.md
@@ -98,7 +98,7 @@ scanning a subset of the file.
 CREATE EXTERNAL TABLE test
 STORED AS CSV
 LOCATION '/path/to/aggregate_simple.csv'
-OPTIONS ('format.has_header' 'true');
+OPTIONS ('has_header' 'true');
 ```
 
 It is also possible to use compressed files, such as `.csv.gz`:
@@ -108,7 +108,7 @@ CREATE EXTERNAL TABLE test
 STORED AS CSV
 COMPRESSION TYPE GZIP
 LOCATION '/path/to/aggregate_simple.csv.gz'
-OPTIONS ('format.has_header' 'true');
+OPTIONS ('has_header' 'true');
 ```
 
 It is also possible to specify the schema manually.
@@ -131,7 +131,7 @@ CREATE EXTERNAL TABLE test (
 )
 STORED AS CSV
 LOCATION '/path/to/aggregate_test_100.csv'
-OPTIONS ('format.has_header' 'true');
+OPTIONS ('has_header' 'true');
 ```
 
 It is also possible to specify a directory that contains a partitioned
@@ -141,7 +141,7 @@ table (multiple files with the same schema)
 CREATE EXTERNAL TABLE test
 STORED AS CSV
 LOCATION '/path/to/directory/of/files'
-OPTIONS ('format.has_header' 'true');
+OPTIONS ('has_header' 'true');
 ```
 
 With `CREATE UNBOUNDED EXTERNAL TABLE` SQL statement. We can create unbounded data sources such as following:
@@ -180,7 +180,7 @@ CREATE EXTERNAL TABLE test (
 STORED AS CSV
 WITH ORDER (c2 ASC, c5 + c8 DESC NULL FIRST)
 LOCATION '/path/to/aggregate_test_100.csv'
-OPTIONS ('format.has_header' 'true');
+OPTIONS ('has_header' 'true');
 ```
 
 Where `WITH ORDER` clause specifies the sort order:
diff --git a/docs/source/user-guide/sql/write_options.md b/docs/source/user-guide/sql/write_options.md
index c496d8d48a02..3c4790dd0255 100644
--- a/docs/source/user-guide/sql/write_options.md
+++ b/docs/source/user-guide/sql/write_options.md
@@ -42,7 +42,7 @@ CREATE EXTERNAL TABLE
   LOCATION '/test/location/my_csv_table/'
   OPTIONS(
     NULL_VALUE 'NAN',
-    'format.has_header' 'true'
+    'has_header' 'true'
   )
 ```
 

From a817dc86d4622d70482d3e27351e390a6a39a878 Mon Sep 17 00:00:00 2001
From: berkaysynnada <berkay.sahin@synnada.ai>
Date: Mon, 13 May 2024 10:27:42 +0300
Subject: [PATCH 15/15] Fix imports

---
 datafusion/core/src/datasource/stream.rs      |  4 +++-
 datafusion/core/src/execution/context/mod.rs  | 21 ++++++++++---------
 .../tests/cases/roundtrip_logical_plan.rs     | 17 +++++++--------
 3 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index f0795bdbb0d8..bcce3c1b6422 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -66,7 +66,9 @@ impl TableProviderFactory for StreamTableFactory {
         {
             opt.unwrap_or(false)
         } else {
-            return config_err!("Valid values for format.has_header option are 'true' or 'false'");
+            return config_err!(
+                "Valid values for format.has_header option are 'true' or 'false'"
+            );
         };
 
         let config = StreamConfig::new_file(schema, location.into())
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 6b29f352dc82..e69a249410b1 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -23,8 +23,6 @@ use std::ops::ControlFlow;
 use std::sync::{Arc, Weak};
 
 use super::options::ReadOptions;
-#[cfg(feature = "array_expressions")]
-use crate::functions_array;
 use crate::{
     catalog::information_schema::{InformationSchemaProvider, INFORMATION_SCHEMA},
     catalog::listing_schema::ListingSchemaProvider,
@@ -55,17 +53,22 @@ use crate::{
     },
     optimizer::analyzer::{Analyzer, AnalyzerRule},
     optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
-    physical_expr::{create_physical_expr, PhysicalExpr},
     physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
     physical_plan::ExecutionPlan,
     physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
     variable::{VarProvider, VarType},
 };
+
+#[cfg(feature = "array_expressions")]
+use crate::functions_array;
 use crate::{functions, functions_aggregate};
 
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::Schema;
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{
     alias::AliasGenerator,
     config::{ConfigExtension, TableOptions},
@@ -75,30 +78,28 @@ use datafusion_common::{
 };
 use datafusion_execution::registry::SerializerRegistry;
 use datafusion_expr::{
-    expr_rewriter::FunctionRewrite,
     logical_plan::{DdlStatement, Statement},
-    simplify::SimplifyInfo,
     var_provider::is_system_variables,
     Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
 };
-use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 use datafusion_sql::{
     parser::{CopyToSource, CopyToStatement, DFParser},
     planner::{object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel},
     ResolvedTableReference,
 };
-
-use async_trait::async_trait;
-use chrono::{DateTime, Utc};
-use datafusion_common::tree_node::TreeNode;
 use parking_lot::RwLock;
 use sqlparser::dialect::dialect_from_str;
 use url::Url;
 use uuid::Uuid;
 
+use crate::physical_expr::PhysicalExpr;
 pub use datafusion_execution::config::SessionConfig;
 pub use datafusion_execution::TaskContext;
 pub use datafusion_expr::execution_props::ExecutionProps;
+use datafusion_expr::expr_rewriter::FunctionRewrite;
+use datafusion_expr::simplify::SimplifyInfo;
+use datafusion_optimizer::simplify_expressions::ExprSimplifier;
+use datafusion_physical_expr::create_physical_expr;
 
 mod avro;
 mod csv;
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 4ef7da0f04aa..e5e57c0bc893 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -15,12 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt::{self, Debug, Formatter};
-use std::sync::Arc;
-use std::vec;
-
 use arrow::array::{ArrayRef, FixedSizeListArray};
 use arrow::datatypes::{
     DataType, Field, Fields, Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType,
@@ -57,11 +51,16 @@ use datafusion_proto::bytes::{
     logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
 };
 use datafusion_proto::logical_plan::to_proto::serialize_expr;
-use datafusion_proto::logical_plan::{
-    from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
-};
+use datafusion_proto::logical_plan::LogicalExtensionCodec;
+use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
 use datafusion_proto::protobuf;
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::{self, Debug, Formatter};
+use std::sync::Arc;
+use std::vec;
 
+use datafusion::execution::FunctionRegistry;
 use prost::Message;
 
 #[cfg(feature = "json")]