Skip to content

Commit f80bc5f

Browse files
authored
parquet writer: Raise an error when the row_group_index overflows i16 (#6378)
This caused confusing panics down the line because 'ordinal' is negative.
1 parent 60ec869 commit f80bc5f

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

parquet/src/file/writer.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,25 @@ impl<W: Write + Send> SerializedFileWriter<W> {
190190
/// Creates new row group from this file writer.
191191
/// In case of IO error or Thrift error, returns `Err`.
192192
///
193-
/// There is no limit on a number of row groups in a file; however, row groups have
193+
/// There can be at most 2^15 row groups in a file; and row groups have
194194
/// to be written sequentially. Every time the next row group is requested, the
195195
/// previous row group must be finalised and closed using `RowGroupWriter::close` method.
196196
pub fn next_row_group(&mut self) -> Result<SerializedRowGroupWriter<'_, W>> {
197197
self.assert_previous_writer_closed()?;
198198
let ordinal = self.row_group_index;
199199

200-
self.row_group_index += 1;
200+
let ordinal: i16 = ordinal.try_into().map_err(|_| {
201+
ParquetError::General(format!(
202+
"Parquet does not support more than {} row groups per file (currently: {})",
203+
i16::MAX,
204+
ordinal
205+
))
206+
})?;
207+
208+
self.row_group_index = self
209+
.row_group_index
210+
.checked_add(1)
211+
.expect("SerializedFileWriter::row_group_index overflowed");
201212

202213
let bloom_filter_position = self.properties().bloom_filter_position();
203214
let row_groups = &mut self.row_groups;
@@ -227,7 +238,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
227238
self.descr.clone(),
228239
self.props.clone(),
229240
&mut self.buf,
230-
ordinal as i16,
241+
ordinal,
231242
Some(Box::new(on_close)),
232243
);
233244
Ok(row_group_writer)

0 commit comments

Comments
 (0)