@@ -2,9 +2,9 @@ use crate::sql::arrow_sql_gen::arrow::map_data_type_to_array_builder_optional;
22use arrow:: {
33 array:: {
44 ArrayBuilder , ArrayRef , BinaryBuilder , Date32Builder , Decimal256Builder , Decimal128Builder , Float32Builder ,
5- Float64Builder , Int16Builder , Int32Builder , Int64Builder , Int8Builder , LargeStringBuilder ,
6- NullBuilder , RecordBatch , RecordBatchOptions , StringBuilder , Time64NanosecondBuilder ,
7- TimestampMicrosecondBuilder , UInt64Builder ,
5+ Float64Builder , Int16Builder , Int32Builder , Int64Builder , Int8Builder , LargeBinaryBuilder ,
6+ LargeStringBuilder , NullBuilder , RecordBatch , RecordBatchOptions ,
7+ StringBuilder , Time64NanosecondBuilder , TimestampMicrosecondBuilder , UInt64Builder ,
88 } ,
99 datatypes:: { i256, DataType , Date32Type , Field , Schema , SchemaRef , TimeUnit } ,
1010} ;
@@ -92,13 +92,15 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
9292 let mut mysql_types: Vec < ColumnType > = Vec :: new ( ) ;
9393 let mut column_names: Vec < String > = Vec :: new ( ) ;
9494 let mut column_is_binary_stats: Vec < bool > = Vec :: new ( ) ;
95+ let mut column_use_large_str_or_blob_stats: Vec < bool > = Vec :: new ( ) ;
9596
9697 if !rows. is_empty ( ) {
9798 let row = & rows[ 0 ] ;
9899 for column in row. columns ( ) . iter ( ) {
99100 let column_name = column. name_str ( ) ;
100101 let column_type = column. column_type ( ) ;
101102 let column_is_binary = column. flags ( ) . contains ( ColumnFlags :: BINARY_FLAG ) ;
103+ let column_use_large_str_or_blob = column. column_length ( ) > 2_u32 . pow ( 31 ) - 1 ;
102104
103105 let ( decimal_precision, decimal_scale) = match column_type {
104106 ColumnType :: MYSQL_TYPE_DECIMAL | ColumnType :: MYSQL_TYPE_NEWDECIMAL => {
@@ -118,6 +120,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
118120 let data_type = map_column_to_data_type (
119121 column_type,
120122 column_is_binary,
123+ column_use_large_str_or_blob,
121124 decimal_precision,
122125 decimal_scale,
123126 ) ;
@@ -132,6 +135,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
132135 mysql_types. push ( column_type) ;
133136 column_names. push ( column_name. to_string ( ) ) ;
134137 column_is_binary_stats. push ( column_is_binary) ;
138+ column_use_large_str_or_blob_stats. push ( column_use_large_str_or_blob) ;
135139 }
136140 }
137141
@@ -296,10 +300,6 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
296300 }
297301 column_type @ ( ColumnType :: MYSQL_TYPE_VARCHAR
298302 | ColumnType :: MYSQL_TYPE_JSON
299- | ColumnType :: MYSQL_TYPE_TINY_BLOB
300- | ColumnType :: MYSQL_TYPE_BLOB
301- | ColumnType :: MYSQL_TYPE_MEDIUM_BLOB
302- | ColumnType :: MYSQL_TYPE_LONG_BLOB
303303 | ColumnType :: MYSQL_TYPE_ENUM ) => {
304304 handle_primitive_type ! (
305305 builder,
@@ -310,6 +310,45 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
310310 i
311311 ) ;
312312 }
313+ ColumnType :: MYSQL_TYPE_BLOB => {
314+ match (
315+ column_use_large_str_or_blob_stats[ i] ,
316+ column_is_binary_stats[ i] ,
317+ ) {
318+ ( true , true ) => handle_primitive_type ! (
319+ builder,
320+ ColumnType :: MYSQL_TYPE_BLOB ,
321+ LargeBinaryBuilder ,
322+ Vec <u8 >,
323+ row,
324+ i
325+ ) ,
326+ ( true , false ) => handle_primitive_type ! (
327+ builder,
328+ ColumnType :: MYSQL_TYPE_BLOB ,
329+ LargeStringBuilder ,
330+ String ,
331+ row,
332+ i
333+ ) ,
334+ ( false , true ) => handle_primitive_type ! (
335+ builder,
336+ ColumnType :: MYSQL_TYPE_BLOB ,
337+ BinaryBuilder ,
338+ Vec <u8 >,
339+ row,
340+ i
341+ ) ,
342+ ( false , false ) => handle_primitive_type ! (
343+ builder,
344+ ColumnType :: MYSQL_TYPE_BLOB ,
345+ StringBuilder ,
346+ String ,
347+ row,
348+ i
349+ ) ,
350+ }
351+ }
313352 column_type @ ( ColumnType :: MYSQL_TYPE_STRING
314353 | ColumnType :: MYSQL_TYPE_VAR_STRING ) => {
315354 if column_is_binary_stats[ i] {
@@ -424,6 +463,7 @@ pub fn rows_to_arrow(rows: &[Row], projected_schema: &Option<SchemaRef>) -> Resu
424463pub fn map_column_to_data_type (
425464 column_type : ColumnType ,
426465 column_is_binary : bool ,
466+ column_use_large_str_or_blob : bool ,
427467 column_decimal_precision : Option < u8 > ,
428468 column_decimal_scale : Option < i8 > ,
429469) -> Option < DataType > {
@@ -453,11 +493,18 @@ pub fn map_column_to_data_type(
453493 ColumnType :: MYSQL_TYPE_VARCHAR
454494 | ColumnType :: MYSQL_TYPE_JSON
455495 | ColumnType :: MYSQL_TYPE_ENUM
456- | ColumnType :: MYSQL_TYPE_SET
457- | ColumnType :: MYSQL_TYPE_TINY_BLOB
458- | ColumnType :: MYSQL_TYPE_BLOB
459- | ColumnType :: MYSQL_TYPE_MEDIUM_BLOB
460- | ColumnType :: MYSQL_TYPE_LONG_BLOB => Some ( DataType :: LargeUtf8 ) ,
496+ | ColumnType :: MYSQL_TYPE_SET => Some ( DataType :: LargeUtf8 ) ,
497+ // MYSQL_TYPE_BLOB includes TINYBLOB, BLOB, MEDIUMBLOB, LONGBLOB, TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT https://dev.mysql.com/doc/c-api/8.0/en/c-api-data-structures.html
498+ // MySQL String Type Storage requirement: https://dev.mysql.com/doc/refman/8.4/en/storage-requirements.html
499+ // Binary / Utf8 stores up to 2^31 - 1 length binary / non-binary string
500+ ColumnType :: MYSQL_TYPE_BLOB => {
501+ match ( column_use_large_str_or_blob, column_is_binary) {
502+ ( true , true ) => Some ( DataType :: LargeBinary ) ,
503+ ( true , false ) => Some ( DataType :: LargeUtf8 ) ,
504+ ( false , true ) => Some ( DataType :: Binary ) ,
505+ ( false , false ) => Some ( DataType :: Utf8 ) ,
506+ }
507+ }
461508 ColumnType :: MYSQL_TYPE_STRING
462509 | ColumnType :: MYSQL_TYPE_VAR_STRING => {
463510 if column_is_binary {
@@ -475,6 +522,9 @@ pub fn map_column_to_data_type(
475522 | ColumnType :: MYSQL_TYPE_TIMESTAMP2
476523 | ColumnType :: MYSQL_TYPE_DATETIME2
477524 | ColumnType :: MYSQL_TYPE_TIME2
525+ | ColumnType :: MYSQL_TYPE_LONG_BLOB
526+ | ColumnType :: MYSQL_TYPE_TINY_BLOB
527+ | ColumnType :: MYSQL_TYPE_MEDIUM_BLOB
478528 | ColumnType :: MYSQL_TYPE_GEOMETRY => {
479529 unimplemented ! ( "Unsupported column type {:?}" , column_type)
480530 }
0 commit comments