Skip to content

Commit 8e872c3

Browse files
committed
Initial commit
1 parent 54592e8 commit 8e872c3

File tree

15 files changed

+342
-216
lines changed

15 files changed

+342
-216
lines changed

Cargo.lock

Lines changed: 252 additions & 132 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,22 @@ ahash = { version = "0.8", default-features = false, features = [
8989
"runtime-rng",
9090
] }
9191
apache-avro = { version = "0.17", default-features = false }
92-
arrow = { version = "55.2.0", features = [
92+
arrow = { git = "https://github.com/rok/arrow-rs.git", branch = "multi-threaded_encrypted_writing" , features = [
9393
"prettyprint",
9494
"chrono-tz",
9595
] }
96-
arrow-buffer = { version = "55.2.0", default-features = false }
97-
arrow-flight = { version = "55.2.0", features = [
96+
97+
98+
arrow-buffer = { git = "https://github.com/rok/arrow-rs.git", branch = "multi-threaded_encrypted_writing", default-features = false }
99+
arrow-flight = {git = "https://github.com/rok/arrow-rs.git", features = [
98100
"flight-sql-experimental",
99101
] }
100-
arrow-ipc = { version = "55.2.0", default-features = false, features = [
102+
arrow-ipc = { git = "https://github.com/rok/arrow-rs.git", branch = "multi-threaded_encrypted_writing" , default-features = false, features = [
101103
"lz4",
102104
"zstd",
103105
] }
104-
arrow-ord = { version = "55.2.0", default-features = false }
105-
arrow-schema = { version = "55.2.0", default-features = false }
106+
arrow-ord = { git = "https://github.com/rok/arrow-rs.git", branch = "multi-threaded_encrypted_writing" , default-features = false }
107+
arrow-schema = { git = "https://github.com/rok/arrow-rs.git", branch = "multi-threaded_encrypted_writing" , default-features = false }
106108
async-trait = "0.1.88"
107109
bigdecimal = "0.4.8"
108110
bytes = "1.10"
@@ -155,7 +157,7 @@ itertools = "0.14"
155157
log = "^0.4"
156158
object_store = { version = "0.12.2", default-features = false }
157159
parking_lot = "0.12"
158-
parquet = { version = "55.2.0", default-features = false, features = [
160+
parquet = { git = "https://github.com/rok/arrow-rs.git", branch = "multi-threaded_encrypted_writing" , default-features = false, features = [
159161
"arrow",
160162
"async",
161163
"object_store",

datafusion-examples/Cargo.toml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,6 @@ rust-version = { workspace = true }
3232
[lints]
3333
workspace = true
3434

35-
[[example]]
36-
name = "flight_sql_server"
37-
path = "examples/flight/flight_sql_server.rs"
38-
39-
[[example]]
40-
name = "flight_server"
41-
path = "examples/flight/flight_server.rs"
42-
43-
[[example]]
44-
name = "flight_client"
45-
path = "examples/flight/flight_client.rs"
46-
4735
[[example]]
4836
name = "dataframe_to_s3"
4937
path = "examples/external_dependency/dataframe-to-s3.rs"

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ log = { workspace = true }
6565
object_store = { workspace = true, optional = true }
6666
parquet = { workspace = true, optional = true, default-features = true }
6767
paste = "1.0.15"
68-
pyo3 = { version = "0.24.2", optional = true }
68+
pyo3 = { version = "0.25.1", optional = true }
6969
recursive = { workspace = true, optional = true }
7070
sqlparser = { workspace = true }
7171
tokio = { workspace = true }

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ use crate::{
2525
DataFusionError, Result, _internal_datafusion_err,
2626
};
2727

28+
pub const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
29+
2830
use arrow::datatypes::Schema;
2931
// TODO: handle once deprecated
3032
#[allow(deprecated)]
@@ -35,7 +37,7 @@ use parquet::{
3537
metadata::KeyValue,
3638
properties::{
3739
EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
38-
DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED,
40+
DEFAULT_STATISTICS_ENABLED,
3941
},
4042
},
4143
schema::types::ColumnPath,
@@ -167,13 +169,13 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
167169

168170
// max_statistics_size is deprecated, currently it is not being used
169171
// TODO: remove once deprecated
170-
#[allow(deprecated)]
171-
if let Some(max_statistics_size) = options.max_statistics_size {
172-
builder = {
173-
#[allow(deprecated)]
174-
builder.set_column_max_statistics_size(path, max_statistics_size)
175-
}
176-
}
172+
// #[allow(deprecated)]
173+
// if let Some(max_statistics_size) = options.max_statistics_size {
174+
// builder = {
175+
// #[allow(deprecated)]
176+
// builder.set_column_max_statistics_size(path, max_statistics_size)
177+
// }
178+
// }
177179
}
178180

179181
Ok(builder)
@@ -222,7 +224,7 @@ impl ParquetOptions {
222224
dictionary_enabled,
223225
dictionary_page_size_limit,
224226
statistics_enabled,
225-
max_statistics_size,
227+
max_statistics_size: _max_statistics_size,
226228
max_row_group_size,
227229
created_by,
228230
column_index_truncate_length,
@@ -268,12 +270,12 @@ impl ParquetOptions {
268270
.set_data_page_row_count_limit(*data_page_row_count_limit)
269271
.set_bloom_filter_enabled(*bloom_filter_on_write);
270272

271-
builder = {
272-
#[allow(deprecated)]
273-
builder.set_max_statistics_size(
274-
max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
275-
)
276-
};
273+
// builder = {
274+
// #[allow(deprecated)]
275+
// builder.set_max_statistics_size(
276+
// max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
277+
// )
278+
// };
277279

278280
if let Some(bloom_filter_fpp) = bloom_filter_fpp {
279281
builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp);
@@ -557,7 +559,7 @@ mod tests {
557559
),
558560
bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
559561
bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
560-
max_statistics_size: Some(props.max_statistics_size(&col)),
562+
max_statistics_size: Some(DEFAULT_MAX_STATISTICS_SIZE),
561563
}
562564
}
563565

datafusion/common/src/scalar/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2134,7 +2134,9 @@ impl ScalarValue {
21342134
| DataType::Time64(TimeUnit::Millisecond)
21352135
| DataType::RunEndEncoded(_, _)
21362136
| DataType::ListView(_)
2137-
| DataType::LargeListView(_) => {
2137+
| DataType::LargeListView(_)
2138+
| DataType::Decimal32(_, _)
2139+
| DataType::Decimal64(_, _) => {
21382140
return _not_impl_err!(
21392141
"Unsupported creation of {:?} array from ScalarValue {:?}",
21402142
data_type,

datafusion/common/src/types/native.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,10 @@ impl From<DataType> for NativeType {
407407
DataType::Union(union_fields, _) => {
408408
Union(LogicalUnionFields::from(&union_fields))
409409
}
410-
DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => Decimal(p, s),
410+
DataType::Decimal32(p, s)
411+
| DataType::Decimal64(p, s)
412+
| DataType::Decimal128(p, s)
413+
| DataType::Decimal256(p, s) => Decimal(p, s),
411414
DataType::Map(field, _) => Map(Arc::new(field.as_ref().into())),
412415
DataType::Dictionary(_, data_type) => data_type.as_ref().clone().into(),
413416
DataType::RunEndEncoded(_, field) => field.data_type().clone().into(),

datafusion/core/src/dataframe/parquet.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ mod tests {
277277
// Write encrypted parquet using write_parquet
278278
let mut options = TableParquetOptions::default();
279279
options.crypto.file_encryption = Some((&encrypt).into());
280+
options.global.allow_single_file_parallelism = true;
280281

281282
df.write_parquet(
282283
tempfile_str.as_str(),

datafusion/core/tests/fuzz_cases/pruning.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,18 +314,12 @@ async fn execute_with_predicate(
314314
}
315315

316316
async fn write_parquet_file(
317-
truncation_length: Option<usize>,
317+
_truncation_length: Option<usize>,
318318
schema: Arc<Schema>,
319319
row_groups: Vec<Vec<String>>,
320320
) -> Bytes {
321321
let mut buf = BytesMut::new().writer();
322322
let mut props = WriterProperties::builder();
323-
if let Some(truncation_length) = truncation_length {
324-
props = {
325-
#[allow(deprecated)]
326-
props.set_max_statistics_size(truncation_length)
327-
}
328-
}
329323
props = props.set_statistics_enabled(EnabledStatistics::Chunk); // row group level
330324
let props = props.build();
331325
{

datafusion/datasource-avro/src/avro_to_arrow/schema.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ fn default_field_name(dt: &DataType) -> &str {
237237
}
238238
DataType::Decimal128(_, _) => "decimal",
239239
DataType::Decimal256(_, _) => "decimal",
240+
DataType::Decimal32(_, _) => "decimal",
241+
DataType::Decimal64(_, _) => "decimal",
240242
}
241243
}
242244

0 commit comments

Comments
 (0)