Skip to content

Commit 2288c01

Browse files
committed
Merge remote-tracking branch 'upstream/branch-25.02'
2 parents 50b2d0f + 1fe744f commit 2288c01

File tree

8 files changed

+111
-96
lines changed

8 files changed

+111
-96
lines changed

conda/environments/all_cuda-118_arch-x86_64.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ dependencies:
6060
- numpy>=1.23,<3.0a0
6161
- numpydoc
6262
- nvcc_linux-64=11.8
63-
- nvcomp==4.1.0.6
63+
- nvcomp==4.2.0.11
6464
- nvtx>=0.2.1
6565
- openpyxl
6666
- packaging

conda/environments/all_cuda-128_arch-x86_64.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ dependencies:
5858
- numba>=0.59.1,<0.61.0a0
5959
- numpy>=1.23,<3.0a0
6060
- numpydoc
61-
- nvcomp==4.1.0.6
61+
- nvcomp==4.2.0.11
6262
- nvtx>=0.2.1
6363
- openpyxl
6464
- packaging

conda/recipes/libcudf/conda_build_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ flatbuffers_version:
3232
- "=24.3.25"
3333

3434
nvcomp_version:
35-
- "=4.1.0.6"
35+
- "=4.2.0.11"
3636

3737
zlib_version:
3838
- ">=1.2.13"

cpp/src/io/parquet/reader_impl_preprocess.cu

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,7 +1463,7 @@ void reader::impl::preprocess_subpass_pages(read_mode mode, size_t chunk_read_li
14631463
page_input,
14641464
chunk_row_output_iter{pass.pages.device_ptr()});
14651465

1466-
// copy chunk row into the subpass pages
1466+
// copy chunk_row into the subpass pages
14671467
// only need to do this if we are not processing the whole pass in one subpass
14681468
if (!subpass.single_subpass) {
14691469
thrust::for_each(rmm::exec_policy_nosync(_stream),
@@ -1481,31 +1481,42 @@ void reader::impl::preprocess_subpass_pages(read_mode mode, size_t chunk_read_li
14811481
// able to decode for this pass. we will have selected a set of pages for each column in the
14821482
// row group, but not every page will have the same number of rows. so, we can only read as many
14831483
// rows as the smallest batch (by column) we have decompressed.
1484-
size_t page_index = 0;
1485-
size_t max_row = std::numeric_limits<size_t>::max();
1484+
size_t first_page_index = 0;
1485+
size_t max_row = std::numeric_limits<size_t>::max();
14861486
auto const last_pass_row =
14871487
_file_itm_data.input_pass_start_row_count[_file_itm_data._current_input_pass + 1];
1488+
// for each column
14881489
for (size_t idx = 0; idx < subpass.column_page_count.size(); idx++) {
1489-
auto const& last_page = subpass.pages[page_index + (subpass.column_page_count[idx] - 1)];
1490-
auto const& chunk = pass.chunks[last_page.chunk_idx];
1490+
// compute max row for this column in the subpass
1491+
auto const& last_page = subpass.pages[first_page_index + (subpass.column_page_count[idx] - 1)];
1492+
auto const& last_chunk = pass.chunks[last_page.chunk_idx];
1493+
auto max_col_row = static_cast<size_t>(last_chunk.start_row) +
1494+
static_cast<size_t>(last_page.chunk_row) +
1495+
static_cast<size_t>(last_page.num_rows);
14911496

1492-
size_t max_col_row =
1493-
static_cast<size_t>(chunk.start_row + last_page.chunk_row + last_page.num_rows);
14941497
// special case. list rows can span page boundaries, but we can't tell if that is happening
14951498
// here because we have not yet decoded the pages. the very last row starting in the page may
14961499
// not terminate in the page. to handle this, only decode up to the second to last row in the
14971500
// subpass since we know that will safely completed.
1498-
bool const is_list = chunk.max_level[level_type::REPETITION] > 0;
1501+
bool const is_list = last_chunk.max_level[level_type::REPETITION] > 0;
1502+
// corner case: only decode up to the second-to-last row, except if this is the last page in the
1503+
// entire pass. this handles the case where we only have 1 chunk, 1 page, and potentially even
1504+
// just 1 row.
14991505
if (is_list && max_col_row < last_pass_row) {
1500-
auto const& first_page = subpass.pages[page_index];
1501-
size_t const min_col_row = static_cast<size_t>(chunk.start_row + first_page.chunk_row);
1506+
// compute min row for this column in the subpass
1507+
auto const& first_page = subpass.pages[first_page_index];
1508+
auto const& first_chunk = pass.chunks[first_page.chunk_idx];
1509+
auto const min_col_row =
1510+
static_cast<size_t>(first_chunk.start_row) + static_cast<size_t>(first_page.chunk_row);
1511+
1512+
// must have at least 2 rows in the subpass.
15021513
CUDF_EXPECTS((max_col_row - min_col_row) > 1, "Unexpected short subpass");
15031514
max_col_row--;
15041515
}
15051516

15061517
max_row = min(max_row, max_col_row);
15071518

1508-
page_index += subpass.column_page_count[idx];
1519+
first_page_index += subpass.column_page_count[idx];
15091520
}
15101521
subpass.skip_rows = pass.skip_rows + pass.processed_rows;
15111522
auto const pass_end = pass.skip_rows + pass.num_rows;

0 commit comments

Comments
 (0)