Skip to content

Commit 4556bd0

Browse files
authored
fix: wrong usage of compression_ratio in parquet table. (#18333)
1 parent 91100fe commit 4556bd0

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

src/query/storages/parquet/src/parquet_part.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ fn collect_small_file_parts(
167167
}
168168
if max_compressed_size == 0 {
169169
// there are no large files, so we choose a default value.
170-
max_compressed_size = ((128usize << 20) as f64 / max_compression_ratio) as u64;
170+
max_compressed_size = 128u64 << 20;
171171
}
172172
let mut num_small_files = small_files.len();
173173
stats.read_rows += num_small_files;
@@ -195,7 +195,7 @@ fn collect_small_file_parts(
195195
.map(|(path, size, dedup_key)| ParquetFilePart {
196196
file: path,
197197
compressed_size: size,
198-
estimated_uncompressed_size: (size as f64 / max_compression_ratio) as u64,
198+
estimated_uncompressed_size: (size as f64 * max_compression_ratio) as u64,
199199
dedup_key,
200200
bucket_option: None,
201201
})
@@ -284,18 +284,15 @@ pub(crate) fn collect_parts(
284284
);
285285

286286
if !small_files.is_empty() {
287-
let mut max_compression_ratio = compression_ratio;
288287
let mut max_compressed_size = 0u64;
289288
for part in partitions.partitions.iter() {
290289
let p = part.as_any().downcast_ref::<ParquetPart>().unwrap();
291-
max_compression_ratio = max_compression_ratio
292-
.max(p.uncompressed_size() as f64 / p.compressed_size() as f64);
293290
max_compressed_size = max_compressed_size.max(p.compressed_size());
294291
}
295292

296293
collect_small_file_parts(
297294
small_files,
298-
max_compression_ratio,
295+
compression_ratio,
299296
max_compressed_size,
300297
&mut partitions,
301298
&mut stats,

0 commit comments

Comments
 (0)