Skip to content

Commit b0c9521

Browse files
authored
chore: include path when read invalid parquet file. (#18343)
1 parent 11afe9d commit b0c9521

File tree

3 files changed

+18
-10
lines changed

3 files changed

+18
-10
lines changed

src/common/storage/src/parquet.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ pub async fn read_metadata_async(
116116
let buffer_len = buffer.len();
117117

118118
let map_err =
119-
|e: ParquetError| ErrorCode::BadBytes(format!("Invalid Parquet File {path}: {e}",));
119+
|e: ParquetError| ErrorCode::BadBytes(format!("Invalid Parquet file '{path}': {e}",));
120120
let footer_tail = ParquetMetaDataReader::decode_footer_tail(
121121
&buffer[(buffer_len - FOOTER_SIZE as usize)..]
122122
.try_into()
@@ -160,7 +160,7 @@ pub fn read_metadata_sync(
160160
check_footer_size(file_size, path)?;
161161

162162
let map_err =
163-
|e: ParquetError| ErrorCode::BadBytes(format!("Invalid Parquet File {path}: {e}",));
163+
|e: ParquetError| ErrorCode::BadBytes(format!("Invalid Parquet file '{path}': {e}",));
164164
// read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
165165
let default_end_len = DEFAULT_FOOTER_READ_SIZE.min(file_size);
166166
let buffer = blocking
@@ -198,7 +198,7 @@ pub fn read_metadata_sync(
198198
fn check_footer_size(file_size: u64, path: &str) -> Result<()> {
199199
if file_size < FOOTER_SIZE {
200200
Err(ErrorCode::BadBytes(format!(
201-
"Invalid Parquet file {path}. Size is smaller than footer."
201+
"Not a parquet file ({path}): only {file_size} bytes."
202202
)))
203203
} else {
204204
Ok(())
@@ -209,7 +209,7 @@ fn check_footer_size(file_size: u64, path: &str) -> Result<()> {
209209
fn check_meta_size(file_size: u64, metadata_len: u64, path: &str) -> Result<()> {
210210
if metadata_len + FOOTER_SIZE > file_size {
211211
Err(ErrorCode::BadBytes(format!(
212-
"Invalid Parquet file {path}. Reported metadata length of {} + {} byte footer, but file is only {} bytes",
212+
"Invalid Parquet file '{path}': Reported metadata length of {} + {} byte footer, but file is only {} bytes",
213213
metadata_len, FOOTER_SIZE, file_size
214214
)))
215215
} else {

src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,11 @@ impl ParquetWholeFileReader {
179179

180180
/// Read a [`DataBlock`] from bytes.
181181
pub fn read_blocks_from_binary(&self, bytes: Bytes, path: &str) -> Result<DataBlockIterator> {
182-
let mut builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
183-
bytes,
184-
ArrowReaderOptions::new(),
185-
)?
186-
.with_projection(self.projection.clone())
187-
.with_batch_size(self.batch_size);
182+
let mut builder =
183+
ParquetRecordBatchReaderBuilder::try_new_with_options(bytes, ArrowReaderOptions::new())
184+
.map_err(|e| format!("Invalid Parquet file '{path}': {e}"))?
185+
.with_projection(self.projection.clone())
186+
.with_batch_size(self.batch_size);
188187

189188
// Prune row groups.
190189
let file_meta = builder.metadata().clone();

tests/sqllogictests/suites/stage/formats/parquet/select_parquet.test

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,12 @@ query I
5959
select count() from @data/parquet/no-stats.parquet (pattern => '') where line_item_usage_start_date < '2024-09-11 06:00:00'
6060
----
6161
0
62+
63+
query error gen
64+
settings (parquet_fast_read_bytes=0) select * from @data/parquet/diff_schema/ (files=>('f1.parquet', 'gen.py'));
65+
66+
query error gen
67+
settings select * from @data/parquet/diff_schema/ (files=>('f1.parquet', 'gen.py'));
68+
69+
query error gen
70+
settings select * from @data/parquet/diff_schema/ (files=>('gen.py', ''f1.parquet'));

0 commit comments

Comments
 (0)