Skip to content

Commit 502f8fd

Browse files
authored
Merge branch 'spiceai' into sgrebnov/sqlite-retries
2 parents 9ed6f1e + b9cfc4d commit 502f8fd

File tree

8 files changed

+199
-41
lines changed

8 files changed

+199
-41
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ datafusion-federation-sql = { git = "https://github.com/spiceai/datafusion-feder
6161
itertools = "0.13.0"
6262
dyn-clone = { version = "1.0.17", optional = true }
6363
geo-types = "0.7.13"
64+
fundu = "2.0.1"
6465

6566
[dev-dependencies]
6667
anyhow = "1.0.86"

src/sql/arrow_sql_gen/arrow.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use arrow::{
99
TimestampNanosecondBuilder, TimestampSecondBuilder, UInt16Builder, UInt32Builder,
1010
UInt64Builder, UInt8Builder,
1111
},
12-
datatypes::{DataType, TimeUnit},
12+
datatypes::{DataType, TimeUnit, UInt16Type},
1313
};
1414

1515
pub fn map_data_type_to_array_builder_optional(
@@ -21,6 +21,7 @@ pub fn map_data_type_to_array_builder_optional(
2121
}
2222
}
2323

24+
#[allow(clippy::too_many_lines)]
2425
pub fn map_data_type_to_array_builder(data_type: &DataType) -> Box<dyn ArrayBuilder> {
2526
match data_type {
2627
DataType::Int8 => Box::new(Int8Builder::new()),
@@ -67,6 +68,9 @@ pub fn map_data_type_to_array_builder(data_type: &DataType) -> Box<dyn ArrayBuil
6768
(DataType::Int8, DataType::Utf8) => {
6869
Box::new(StringDictionaryBuilder::<Int8Type>::new())
6970
}
71+
(DataType::UInt16, DataType::Utf8) => {
72+
Box::new(StringDictionaryBuilder::<UInt16Type>::new())
73+
}
7074
_ => unimplemented!("Unimplemented dictionary type"),
7175
},
7276
DataType::Date32 => Box::new(Date32Builder::new()),

src/sql/arrow_sql_gen/mysql.rs

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ use arrow::{
44
ArrayBuilder, ArrayRef, BinaryBuilder, Date32Builder, Decimal256Builder, Decimal128Builder, Float32Builder,
55
Float64Builder, Int16Builder, Int32Builder, Int64Builder, Int8Builder, LargeBinaryBuilder,
66
LargeStringBuilder, NullBuilder, RecordBatch, RecordBatchOptions,
7-
StringBuilder, Time64NanosecondBuilder, TimestampMicrosecondBuilder, UInt64Builder,
7+
StringBuilder, Time64NanosecondBuilder, StringDictionaryBuilder, TimestampMicrosecondBuilder, UInt64Builder,
88
},
9-
datatypes::{i256, DataType, Date32Type, Field, Schema, SchemaRef, TimeUnit},
9+
datatypes::{i256, DataType, Date32Type, Field, Schema, SchemaRef, TimeUnit, UInt16Type},
1010
};
1111
use bigdecimal::BigDecimal;
1212
use chrono::{NaiveDate, NaiveTime, Timelike};
@@ -92,6 +92,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
9292
let mut mysql_types: Vec<ColumnType> = Vec::new();
9393
let mut column_names: Vec<String> = Vec::new();
9494
let mut column_is_binary_stats: Vec<bool> = Vec::new();
95+
let mut column_is_enum_stats: Vec<bool> = Vec::new();
9596
let mut column_use_large_str_or_blob_stats: Vec<bool> = Vec::new();
9697

9798
if !rows.is_empty() {
@@ -100,6 +101,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
100101
let column_name = column.name_str();
101102
let column_type = column.column_type();
102103
let column_is_binary = column.flags().contains(ColumnFlags::BINARY_FLAG);
104+
let column_is_enum = column.flags().contains(ColumnFlags::ENUM_FLAG);
103105
let column_use_large_str_or_blob = column.column_length() > 2_u32.pow(31) - 1;
104106

105107
let (decimal_precision, decimal_scale) = match column_type {
@@ -120,6 +122,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
120122
let data_type = map_column_to_data_type(
121123
column_type,
122124
column_is_binary,
125+
column_is_enum,
123126
column_use_large_str_or_blob,
124127
decimal_precision,
125128
decimal_scale,
@@ -135,6 +138,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
135138
mysql_types.push(column_type);
136139
column_names.push(column_name.to_string());
137140
column_is_binary_stats.push(column_is_binary);
141+
column_is_enum_stats.push(column_is_enum);
138142
column_use_large_str_or_blob_stats.push(column_use_large_str_or_blob);
139143
}
140144
}
@@ -299,8 +303,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
299303
}
300304
}
301305
column_type @ (ColumnType::MYSQL_TYPE_VARCHAR
302-
| ColumnType::MYSQL_TYPE_JSON
303-
| ColumnType::MYSQL_TYPE_ENUM) => {
306+
| ColumnType::MYSQL_TYPE_JSON) => {
304307
handle_primitive_type!(
305308
builder,
306309
column_type,
@@ -349,9 +352,40 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
349352
),
350353
}
351354
}
355+
ColumnType::MYSQL_TYPE_ENUM => {
356+
// ENUM and SET values are returned as strings. For these, check that the type value is MYSQL_TYPE_STRING and that the ENUM_FLAG or SET_FLAG flag is set in the flags value.
357+
// https://dev.mysql.com/doc/c-api/9.0/en/c-api-data-structures.html
358+
unreachable!()
359+
}
352360
column_type @ (ColumnType::MYSQL_TYPE_STRING
353361
| ColumnType::MYSQL_TYPE_VAR_STRING) => {
354-
if column_is_binary_stats[i] {
362+
// Handle MYSQL_TYPE_ENUM value
363+
if column_is_enum_stats[i] {
364+
let Some(builder) = builder else {
365+
return NoBuilderForIndexSnafu { index: i }.fail();
366+
};
367+
let Some(builder) = builder
368+
.as_any_mut()
369+
.downcast_mut::<StringDictionaryBuilder<UInt16Type>>()
370+
else {
371+
return FailedToDowncastBuilderSnafu {
372+
mysql_type: format!("{mysql_type:?}"),
373+
}
374+
.fail();
375+
};
376+
377+
let v = handle_null_error(row.get_opt::<String, usize>(i).transpose())
378+
.context(FailedToGetRowValueSnafu {
379+
mysql_type: ColumnType::MYSQL_TYPE_ENUM,
380+
})?;
381+
382+
match v {
383+
Some(v) => {
384+
builder.append_value(v);
385+
}
386+
None => builder.append_null(),
387+
}
388+
} else if column_is_binary_stats[i] {
355389
handle_primitive_type!(
356390
builder,
357391
column_type,
@@ -361,7 +395,14 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
361395
i
362396
);
363397
} else {
364-
handle_primitive_type!(builder, column_type, StringBuilder, String, row, i);
398+
handle_primitive_type!(
399+
builder,
400+
column_type,
401+
StringBuilder,
402+
String,
403+
row,
404+
i
405+
);
365406
}
366407
}
367408
ColumnType::MYSQL_TYPE_DATE => {
@@ -463,6 +504,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
463504
pub fn map_column_to_data_type(
464505
column_type: ColumnType,
465506
column_is_binary: bool,
507+
column_is_enum: bool,
466508
column_use_large_str_or_blob: bool,
467509
column_decimal_precision: Option<u8>,
468510
column_decimal_scale: Option<i8>,
@@ -491,9 +533,7 @@ pub fn map_column_to_data_type(
491533
Some(DataType::Time64(TimeUnit::Nanosecond))
492534
}
493535
ColumnType::MYSQL_TYPE_VARCHAR
494-
| ColumnType::MYSQL_TYPE_JSON
495-
| ColumnType::MYSQL_TYPE_ENUM
496-
| ColumnType::MYSQL_TYPE_SET => Some(DataType::LargeUtf8),
536+
| ColumnType::MYSQL_TYPE_JSON => Some(DataType::LargeUtf8),
497537
// MYSQL_TYPE_BLOB includes TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT https://dev.mysql.com/doc/c-api/8.0/en/c-api-data-structures.html
498538
// MySQL String Type Storage requirement: https://dev.mysql.com/doc/refman/8.4/en/storage-requirements.html
499539
// Binary / Utf8 stores up to 2^31 - 1 length binary / non-binary string
@@ -505,9 +545,12 @@ pub fn map_column_to_data_type(
505545
(false, false) => Some(DataType::Utf8),
506546
}
507547
}
548+
ColumnType::MYSQL_TYPE_ENUM | ColumnType::MYSQL_TYPE_SET => unreachable!(),
508549
ColumnType::MYSQL_TYPE_STRING
509550
| ColumnType::MYSQL_TYPE_VAR_STRING => {
510-
if column_is_binary {
551+
if column_is_enum {
552+
Some(DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)))
553+
} else if column_is_binary {
511554
Some(DataType::Binary)
512555
} else {
513556
Some(DataType::Utf8)

src/sql/db_connection_pool/dbconnection/mysqlconn.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ fn columns_meta_to_schema(columns_meta: Vec<Row>) -> Result<SchemaRef> {
185185

186186
let column_type = map_str_type_to_column_type(&data_type)?;
187187
let column_is_binary = map_str_type_to_is_binary(&data_type);
188+
let column_is_enum = map_str_type_to_is_enum(&data_type);
188189
let column_use_large_str_or_blob = map_str_type_to_use_large_str_or_blob(&data_type);
189190

190191
let (precision, scale) = match column_type {
@@ -199,6 +200,7 @@ fn columns_meta_to_schema(columns_meta: Vec<Row>) -> Result<SchemaRef> {
199200
let arrow_data_type = map_column_to_data_type(
200201
column_type,
201202
column_is_binary,
203+
column_is_enum,
202204
column_use_large_str_or_blob,
203205
precision,
204206
scale,
@@ -235,8 +237,9 @@ fn map_str_type_to_column_type(data_type: &str) -> Result<ColumnType> {
235237
_ if data_type.starts_with("array") => ColumnType::MYSQL_TYPE_TYPED_ARRAY,
236238
_ if data_type.starts_with("json") => ColumnType::MYSQL_TYPE_JSON,
237239
_ if data_type.starts_with("newdecimal") => ColumnType::MYSQL_TYPE_NEWDECIMAL,
238-
_ if data_type.starts_with("enum") => ColumnType::MYSQL_TYPE_ENUM,
239-
_ if data_type.starts_with("set") => ColumnType::MYSQL_TYPE_SET,
240+
// MySQL ENUM & SET value is exported as MYSQL_TYPE_STRING under c api: https://dev.mysql.com/doc/c-api/9.0/en/c-api-data-structures.html
241+
_ if data_type.starts_with("enum") => ColumnType::MYSQL_TYPE_STRING,
242+
_ if data_type.starts_with("set") => ColumnType::MYSQL_TYPE_STRING,
240243
_ if data_type.starts_with("tinyblob") => ColumnType::MYSQL_TYPE_BLOB,
241244
_ if data_type.starts_with("tinytext") => ColumnType::MYSQL_TYPE_BLOB,
242245
_ if data_type.starts_with("mediumblob") => ColumnType::MYSQL_TYPE_BLOB,
@@ -276,6 +279,13 @@ fn map_str_type_to_use_large_str_or_blob(data_type: &str) -> bool {
276279
false
277280
}
278281

282+
fn map_str_type_to_is_enum(data_type: &str) -> bool {
283+
if data_type.starts_with("enum") {
284+
return true;
285+
}
286+
false
287+
}
288+
279289
fn extract_decimal_precision_and_scale(data_type: &str) -> Result<(u8, i8)> {
280290
let (start, end) = match (data_type.find('('), data_type.find(')')) {
281291
(Some(start), Some(end)) => (start, end),

0 commit comments

Comments
 (0)