Skip to content

Commit 00cd766

Browse files
authored
Add C++ tests for different compression implementations (#18690)
Add tests that are parameterized on the compression codec and the implementation (nvCOMP, internal kernel, host) to ensure all available implementations are tested for all supported codecs. More tests will be added as issues with non-nvCOMP implementations are fixed. Also: tests uncovered another bug in the Parquet writer; fixed it in this PR. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - David Wendt (https://github.com/davidwendt) - MithunR (https://github.com/mythrocks) URL: #18690
1 parent e0ab818 commit 00cd766

File tree

8 files changed

+405
-105
lines changed

8 files changed

+405
-105
lines changed

cpp/src/io/parquet/page_enc.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ CUDF_KERNEL void __launch_bounds__(128)
689689
util::round_up_unsafe(page_g.max_hdr_size + page_g.max_data_size, page_align);
690690
if (not comp_page_sizes.empty()) {
691691
comp_page_offset += page_g.max_hdr_size + comp_page_sizes[ck_g.first_page];
692+
page_g.comp_data_size = comp_page_sizes[ck_g.first_page + num_pages];
692693
}
693694
page_headers_size += page_g.max_hdr_size;
694695
max_page_data_size = max(max_page_data_size, page_g.max_data_size);

cpp/tests/io/compression_common.hpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <cudf/io/types.hpp>
20+
#include <cudf/utilities/error.hpp>
21+
22+
#include <gtest/gtest.h>
23+
24+
#include <cstdlib>
25+
#include <list>
26+
#include <string>
27+
#include <tuple>
28+
29+
class tmp_env_var {
30+
public:
31+
explicit tmp_env_var(std::string name, std::string const& value) : name_(std::move(name))
32+
{
33+
auto const previous_value = std::getenv(name_.c_str());
34+
if (previous_value != nullptr) { previous_value_ = std::string(previous_value); }
35+
36+
setenv(name_.c_str(), value.c_str(), 1);
37+
}
38+
39+
tmp_env_var(tmp_env_var const&) = delete;
40+
tmp_env_var& operator=(tmp_env_var const&) = delete;
41+
tmp_env_var(tmp_env_var&&) = delete;
42+
tmp_env_var& operator=(tmp_env_var&&) = delete;
43+
44+
~tmp_env_var()
45+
{
46+
if (previous_value_.has_value()) {
47+
setenv(name_.c_str(), previous_value_->c_str(), 1);
48+
} else {
49+
unsetenv(name_.c_str());
50+
}
51+
}
52+
53+
private:
54+
std::string name_;
55+
std::optional<std::string> previous_value_;
56+
};
57+
58+
static constexpr char const* host_comp_env_var = "LIBCUDF_HOST_COMPRESSION";
59+
static constexpr char const* host_decomp_env_var = "LIBCUDF_HOST_DECOMPRESSION";
60+
static constexpr char const* nvcomp_policy_env_var = "LIBCUDF_NVCOMP_POLICY";
61+
62+
template <typename Base>
63+
struct CompressionTest
64+
: public Base,
65+
public ::testing::WithParamInterface<std::tuple<std::string, cudf::io::compression_type>> {
66+
CompressionTest()
67+
{
68+
auto const comp_impl = std::get<0>(GetParam());
69+
70+
if (comp_impl == "NVCOMP") {
71+
env_vars.emplace_back(host_comp_env_var, "OFF");
72+
env_vars.emplace_back(nvcomp_policy_env_var, "ALWAYS");
73+
} else if (comp_impl == "DEVICE_INTERNAL") {
74+
env_vars.emplace_back(host_comp_env_var, "OFF");
75+
env_vars.emplace_back(nvcomp_policy_env_var, "OFF");
76+
} else if (comp_impl == "HOST") {
77+
env_vars.emplace_back(host_comp_env_var, "ON");
78+
} else {
79+
CUDF_FAIL("Invalid test parameter");
80+
}
81+
}
82+
83+
private:
84+
std::list<tmp_env_var> env_vars;
85+
};
86+
87+
template <typename Base>
88+
struct DecompressionTest
89+
: public Base,
90+
public ::testing::WithParamInterface<std::tuple<std::string, cudf::io::compression_type>> {
91+
DecompressionTest()
92+
{
93+
auto const comp_impl = std::get<0>(GetParam());
94+
95+
if (comp_impl == "NVCOMP") {
96+
env_vars.emplace_back(host_decomp_env_var, "OFF");
97+
env_vars.emplace_back(nvcomp_policy_env_var, "ALWAYS");
98+
} else if (comp_impl == "DEVICE_INTERNAL") {
99+
env_vars.emplace_back(host_decomp_env_var, "OFF");
100+
env_vars.emplace_back(nvcomp_policy_env_var, "OFF");
101+
} else if (comp_impl == "HOST") {
102+
env_vars.emplace_back(host_decomp_env_var, "ON");
103+
} else {
104+
CUDF_FAIL("Invalid test parameter");
105+
}
106+
}
107+
108+
private:
109+
std::list<tmp_env_var> env_vars;
110+
};

cpp/tests/io/orc_chunked_reader_test.cu

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include "compression_common.hpp"
18+
1719
#include <cudf_test/base_fixture.hpp>
1820
#include <cudf_test/column_utilities.hpp>
1921
#include <cudf_test/column_wrapper.hpp>
@@ -161,6 +163,8 @@ auto chunked_read(std::string const& filepath,
161163

162164
struct OrcChunkedReaderTest : public cudf::test::BaseFixture {};
163165

166+
using OrcChunkedDecompressionTest = DecompressionTest<OrcChunkedReaderTest>;
167+
164168
TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData)
165169
{
166170
std::vector<std::unique_ptr<cudf::column>> input_columns;
@@ -1477,3 +1481,62 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow)
14771481

14781482
#endif // LOCAL_TEST
14791483
}
1484+
1485+
TEST_P(OrcChunkedDecompressionTest, RoundTripBasic)
1486+
{
1487+
auto const compression_type = std::get<1>(GetParam());
1488+
1489+
auto const num_rows = 12'345;
1490+
1491+
std::vector<std::unique_ptr<cudf::column>> input_columns;
1492+
auto value_iter = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 4; });
1493+
input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
1494+
input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release());
1495+
auto expected = std::make_unique<cudf::table>(std::move(input_columns));
1496+
1497+
auto const filepath = temp_env->get_temp_filepath("chunked_read_compressions.orc");
1498+
auto const write_opts =
1499+
cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, *expected)
1500+
.compression(compression_type)
1501+
.stripe_size_rows(2'000)
1502+
.row_index_stride(1'000)
1503+
.build();
1504+
cudf::io::write_orc(write_opts);
1505+
1506+
{
1507+
auto const [result, num_chunks] =
1508+
chunked_read(filepath, output_limit{0}, input_limit{2'400'000});
1509+
CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
1510+
}
1511+
1512+
{
1513+
auto const [result, num_chunks] = chunked_read(filepath, output_limit{0}, input_limit{240'000});
1514+
CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
1515+
}
1516+
1517+
{
1518+
auto const [result, num_chunks] = chunked_read(filepath, output_limit{0}, input_limit{24'000});
1519+
CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
1520+
}
1521+
}
1522+
1523+
INSTANTIATE_TEST_CASE_P(Nvcomp,
1524+
OrcChunkedDecompressionTest,
1525+
::testing::Combine(::testing::Values("NVCOMP"),
1526+
::testing::Values(cudf::io::compression_type::AUTO,
1527+
cudf::io::compression_type::SNAPPY,
1528+
cudf::io::compression_type::LZ4,
1529+
cudf::io::compression_type::ZSTD)));
1530+
1531+
INSTANTIATE_TEST_CASE_P(DeviceInternal,
1532+
OrcChunkedDecompressionTest,
1533+
::testing::Combine(::testing::Values("DEVICE_INTERNAL"),
1534+
::testing::Values(cudf::io::compression_type::AUTO,
1535+
cudf::io::compression_type::SNAPPY)));
1536+
1537+
INSTANTIATE_TEST_CASE_P(Host,
1538+
OrcChunkedDecompressionTest,
1539+
::testing::Combine(::testing::Values("HOST"),
1540+
::testing::Values(cudf::io::compression_type::AUTO,
1541+
cudf::io::compression_type::SNAPPY,
1542+
cudf::io::compression_type::ZSTD)));

0 commit comments

Comments
 (0)