Skip to content

Commit 659d1a2

Browse files
authored
Merge pull request #9171 from sundy-li/binary-arrow
feat(query): bump arrow2 crate
2 parents 9de72eb + 8b25af6 commit 659d1a2

File tree

5 files changed

+59
-20
lines changed

5 files changed

+59
-20
lines changed

Cargo.lock

Lines changed: 41 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/arrow/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,13 @@ simd = ["arrow/simd"]
3434
# Workspace dependencies
3535

3636
# Crates.io dependencies
37-
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "95e117d", default-features = false, features = [
37+
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "1da33ac", default-features = false, features = [
3838
"io_parquet",
3939
"io_parquet_compression",
4040
] }
41+
4142
arrow-format = { version = "0.8.0", features = ["flight-data", "flight-service", "ipc"] }
4243
futures = "0.3.24"
43-
parquet2 = { version = "0.16.3", default_features = false }
44+
parquet2 = { version = "0.17.0", default_features = false }
4445

4546
[dev-dependencies]

src/query/datablocks/src/serialize.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ pub fn serialize_data_blocks_with_compression(
4040
write_statistics: false,
4141
compression,
4242
version: Version::V2,
43+
data_pagesize_limit: None,
4344
};
4445
let batches = blocks
4546
.into_iter()
@@ -87,16 +88,12 @@ pub fn serialize_data_blocks(
8788
serialize_data_blocks_with_compression(blocks, schema, buf, CompressionOptions::Lz4Raw)
8889
}
8990

90-
fn col_encoding(_data_type: &ArrowDataType) -> Encoding {
91-
// Although encoding does work, parquet2 has not implemented decoding of DeltaLengthByteArray yet, we fallback to Plain
92-
// From parquet2: Decoding "DeltaLengthByteArray"-encoded required V2 pages is not yet implemented for Binary.
93-
//
94-
// match data_type {
95-
// ArrowDataType::Binary
96-
// | ArrowDataType::LargeBinary
97-
// | ArrowDataType::Utf8
98-
// | ArrowDataType::LargeUtf8 => Encoding::DeltaLengthByteArray,
99-
// _ => Encoding::Plain,
100-
//}
101-
Encoding::Plain
91+
fn col_encoding(data_type: &ArrowDataType) -> Encoding {
92+
match data_type {
93+
ArrowDataType::Binary
94+
| ArrowDataType::LargeBinary
95+
| ArrowDataType::Utf8
96+
| ArrowDataType::LargeUtf8 => Encoding::DeltaLengthByteArray,
97+
_ => Encoding::Plain,
98+
}
10299
}

src/query/storages/fuse/fuse/src/io/read/block_reader.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,9 @@ impl BlockReader {
430430
Err(ErrorCode::StorageOther(err_msg))
431431
}
432432
Compression::Lz4Raw => Ok(ParquetCompression::Lz4Raw),
433+
Compression::Snappy => Ok(ParquetCompression::Snappy),
434+
Compression::Zstd => Ok(ParquetCompression::Zstd),
435+
Compression::Gzip => Ok(ParquetCompression::Gzip),
433436
}
434437
}
435438

src/query/storages/table-meta/src/meta/common.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ where Self: Sized
8585
pub enum Compression {
8686
Lz4,
8787
Lz4Raw,
88+
Snappy,
89+
Zstd,
90+
Gzip,
8891
}
8992

9093
impl Compression {

0 commit comments

Comments
 (0)