Skip to content

Commit 36ce34b

Browse files
authored
feat: update to DuckDB 1.3.0 (#161)
1 parent ae71f52 commit 36ce34b

File tree

10 files changed

+213
-37
lines changed

10 files changed

+213
-37
lines changed

.github/workflows/c_tests.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,4 @@ jobs:
2626
run: make release_c_unit_test
2727

2828
- name: Run C unit tests
29-
# Disabled until iceberg can be loaded/installed again.
30-
if: false
3129
run: ./build/release/extension/substrait/test/c/test_substrait_exe

.github/workflows/distribution.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ jobs:
2626
name: Build extension binaries
2727
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
2828
with:
29-
duckdb_version: bbdc794b99a41eb5da97ba9e7f09c621eee6f300
30-
ci_tools_version: cca140d4cc47f3f3e40f29b49c305bd92845771f
29+
duckdb_version: 71c5c07cdd295e9409c0505885033ae9eb6b5ddd
30+
ci_tools_version: 71d20029c5314dfc34f3bbdab808b9bce03b8003
3131
exclude_archs: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_amd64_mingw"
3232
extension_name: substrait
3333

@@ -37,8 +37,8 @@ jobs:
3737
uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main
3838
secrets: inherit
3939
with:
40-
duckdb_version: bbdc794b99a41eb5da97ba9e7f09c621eee6f300
41-
ci_tools_version: cca140d4cc47f3f3e40f29b49c305bd92845771f
40+
duckdb_version: 71c5c07cdd295e9409c0505885033ae9eb6b5ddd
41+
ci_tools_version: 71d20029c5314dfc34f3bbdab808b9bce03b8003
4242
exclude_archs: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_amd64_mingw"
4343
extension_name: substrait
4444
deploy_latest: true

.github/workflows/main_distribution.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ jobs:
2121
name: Build extension binaries
2222
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
2323
with:
24-
duckdb_version: bbdc794b99a41eb5da97ba9e7f09c621eee6f300
25-
ci_tools_version: cca140d4cc47f3f3e40f29b49c305bd92845771f
24+
duckdb_version: 71c5c07cdd295e9409c0505885033ae9eb6b5ddd
25+
ci_tools_version: 71d20029c5314dfc34f3bbdab808b9bce03b8003
2626
exclude_archs: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_amd64_mingw;windows_amd64_rtools"
2727
extension_name: substrait
2828

duckdb

Submodule duckdb updated 1427 files

src/from_substrait.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,9 +795,10 @@ shared_ptr<Relation> SubstraitToDuckDB::TransformReadOp(const substrait::Rel &so
795795
if (snapshot_id <= 0 || snapshot_id == std::numeric_limits<int64_t>::max()) {
796796
throw InvalidInputException("Invalid snapshot id: " + sget.iceberg_table().direct().snapshot_id());
797797
}
798-
parameters.push_back(Value::UBIGINT(snapshot_id));
798+
named_parameters.emplace("snapshot_from_id", Value::UBIGINT(snapshot_id));
799799
} else if (sget.iceberg_table().direct().has_snapshot_timestamp()) {
800-
parameters.push_back(Value::TIMESTAMP(timestamp_t(sget.iceberg_table().direct().snapshot_timestamp())));
800+
named_parameters.emplace("snapshot_from_timestamp",
801+
Value::TIMESTAMP(timestamp_t(sget.iceberg_table().direct().snapshot_timestamp())));
801802
}
802803
shared_ptr<TableFunctionRelation> scan_rel;
803804
if (acquire_lock) {

src/include/to_substrait.hpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,20 +113,28 @@ class DuckDBToSubstrait {
113113
void TransformNotExpression(Expression &dexpr, substrait::Expression &sexpr, uint64_t col_offset);
114114
void TransformCaseExpression(Expression &dexpr, substrait::Expression &sexpr);
115115
void TransformInExpression(Expression &dexpr, substrait::Expression &sexpr);
116-
117116
//! Transforms a DuckDB Logical Type into a Substrait Type
118117
static substrait::Type DuckToSubstraitType(const LogicalType &type, BaseStatistics *column_statistics = nullptr,
119118
bool not_null = false);
120119

121120
//! Methods to transform DuckDB Filters to Substrait Expression
122-
substrait::Expression *TransformFilter(uint64_t col_idx, LogicalType &column_type, TableFilter &dfilter,
123-
LogicalType &return_type);
121+
substrait::Expression *TransformFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter,
122+
const LogicalType &return_type);
124123
substrait::Expression *TransformIsNotNullFilter(uint64_t col_idx, const LogicalType &column_type,
125-
TableFilter &dfilter, const LogicalType &return_type);
126-
substrait::Expression *TransformConjuctionAndFilter(uint64_t col_idx, LogicalType &column_type,
127-
TableFilter &dfilter, LogicalType &return_type);
128-
substrait::Expression *TransformConstantComparisonFilter(uint64_t col_idx, const LogicalType &column_type,
129-
TableFilter &dfilter, const LogicalType &return_type);
124+
const TableFilter &dfilter, const LogicalType &return_type);
125+
substrait::Expression *TransformIsNullFilter(uint64_t col_idx, const LogicalType &column_type,
126+
const TableFilter &dfilter, const LogicalType &return_type);
127+
substrait::Expression *TransformConjunctionAndFilter(uint64_t col_idx, const LogicalType &column_type,
128+
const TableFilter &dfilter, const LogicalType &return_type);
129+
substrait::Expression *TransformConjunctionOrFilter(uint64_t col_idx, const LogicalType &column_type,
130+
const TableFilter &dfilter, const LogicalType &return_type);
131+
substrait::Expression *TransformConstantComparisonFilter(uint64_t col_idx, const LogicalType &column_type,
132+
const TableFilter &dfilter, const LogicalType &return_type);
133+
substrait::Expression *TransformStructExtractFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type);
134+
substrait::Expression *TransformExpressionFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type);
135+
substrait::Expression *TransformInFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type);
136+
substrait::Expression *TransformDynamicFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type);
137+
130138

131139
//! Transforms DuckDB Join Conditions to Substrait Expression
132140
substrait::Expression *TransformJoinCond(const JoinCondition &dcond, uint64_t left_ncol);
@@ -140,10 +148,14 @@ class DuckDBToSubstrait {
140148

141149
//! Creates a Conjunction
142150
template <typename T, typename FUNC>
143-
substrait::Expression *CreateConjunction(T &source, const FUNC f) {
151+
substrait::Expression *CreateConjunction(T &source, const FUNC f, const string &name = "and") {
144152
substrait::Expression *res = nullptr;
145153
for (auto &ele : source) {
146154
auto child_expression = f(ele);
155+
// Skip null expressions (filters that cannot be pushed down)
156+
if (!child_expression) {
157+
continue;
158+
}
147159
if (!res) {
148160
res = child_expression;
149161
} else {
@@ -155,7 +167,7 @@ class DuckDBToSubstrait {
155167
vector<::substrait::Type> args_types {DuckToSubstraitType(boolean_type),
156168
DuckToSubstraitType(boolean_type)};
157169

158-
scalar_fun->set_function_reference(RegisterFunction("and", args_types));
170+
scalar_fun->set_function_reference(RegisterFunction(name, args_types));
159171
*scalar_fun->mutable_output_type() = DuckToSubstraitType(boolean_type);
160172
AllocateFunctionArgument(scalar_fun, res);
161173
AllocateFunctionArgument(scalar_fun, child_expression);

src/to_substrait.cpp

Lines changed: 112 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
#include "to_substrait.hpp"
22

3+
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
34
#include "duckdb/common/constants.hpp"
45
#include "duckdb/common/enums/expression_type.hpp"
56
#include "duckdb/common/types/value.hpp"
7+
#include "duckdb/execution/index/art/art_key.hpp"
68
#include "duckdb/function/table/table_scan.hpp"
9+
#include "duckdb/parser/constraints/not_null_constraint.hpp"
710
#include "duckdb/planner/expression/list.hpp"
811
#include "duckdb/planner/filter/conjunction_filter.hpp"
912
#include "duckdb/planner/filter/constant_filter.hpp"
13+
#include "duckdb/planner/filter/expression_filter.hpp"
14+
#include "duckdb/planner/filter/in_filter.hpp"
15+
#include "duckdb/planner/filter/dynamic_filter.hpp"
16+
#include "duckdb/planner/filter/struct_filter.hpp"
1017
#include "duckdb/planner/joinside.hpp"
1118
#include "duckdb/planner/operator/list.hpp"
19+
#include "duckdb/planner/operator/logical_set_operation.hpp"
1220
#include "duckdb/planner/table_filter.hpp"
1321
#include "duckdb/storage/statistics/base_statistics.hpp"
14-
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
15-
#include "duckdb/planner/operator/logical_set_operation.hpp"
1622
#include "google/protobuf/util/json_util.h"
1723
#include "substrait/algebra.pb.h"
1824
#include "substrait/plan.pb.h"
19-
#include "duckdb/parser/constraints/not_null_constraint.hpp"
20-
#include "duckdb/execution/index/art/art_key.hpp"
2125

2226
namespace duckdb {
2327
const std::unordered_map<std::string, std::string> DuckDBToSubstrait::function_names_remap = {
@@ -668,7 +672,7 @@ void DuckDBToSubstrait::DepthFirstNamesRecurse(vector<string> &names, const Logi
668672
}
669673

670674
substrait::Expression *DuckDBToSubstrait::TransformIsNotNullFilter(uint64_t col_idx, const LogicalType &column_type,
671-
TableFilter &dfilter,
675+
const TableFilter &dfilter,
672676
const LogicalType &return_type) {
673677
auto s_expr = new substrait::Expression();
674678
auto scalar_fun = s_expr->mutable_scalar_function();
@@ -683,17 +687,58 @@ substrait::Expression *DuckDBToSubstrait::TransformIsNotNullFilter(uint64_t col_
683687
return s_expr;
684688
}
685689

686-
substrait::Expression *DuckDBToSubstrait::TransformConjuctionAndFilter(uint64_t col_idx, LogicalType &column_type,
687-
TableFilter &dfilter, LogicalType &return_type) {
690+
substrait::Expression *DuckDBToSubstrait::TransformIsNullFilter(uint64_t col_idx, const LogicalType &column_type,
691+
const TableFilter &dfilter,
692+
const LogicalType &return_type) {
693+
auto s_expr = new substrait::Expression();
694+
auto scalar_fun = s_expr->mutable_scalar_function();
695+
vector<substrait::Type> args_types;
696+
697+
args_types.emplace_back(DuckToSubstraitType(column_type));
698+
699+
scalar_fun->set_function_reference(RegisterFunction("is_null", args_types));
700+
auto s_arg = scalar_fun->add_arguments();
701+
CreateFieldRef(s_arg->mutable_value(), col_idx);
702+
*scalar_fun->mutable_output_type() = DuckToSubstraitType(return_type);
703+
return s_expr;
704+
}
705+
706+
substrait::Expression *DuckDBToSubstrait::TransformStructExtractFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type) {
707+
auto &struct_filter = dfilter.Cast<StructFilter>();
708+
709+
// Create a field reference to the child_idx within the struct
710+
auto s_field_ref = new substrait::Expression();
711+
auto selection = new substrait::Expression_FieldReference();
712+
selection->mutable_direct_reference()->mutable_struct_field()->set_field(static_cast<int32_t>(struct_filter.child_idx));
713+
auto root_reference = new substrait::Expression_FieldReference_RootReference();
714+
selection->set_allocated_root_reference(root_reference);
715+
s_field_ref->set_allocated_selection(selection);
716+
717+
// Now, apply the child filter to this new field reference
718+
// The col_idx for the recursive call should be 0 because s_field_ref is now the "root" of the expression for the child filter
719+
return TransformFilter(0, StructType::GetChildType(column_type, struct_filter.child_idx),
720+
*struct_filter.child_filter, return_type);
721+
}
722+
723+
substrait::Expression *DuckDBToSubstrait::TransformConjunctionAndFilter(uint64_t col_idx, const LogicalType &column_type,
724+
const TableFilter &dfilter, const LogicalType &return_type) {
688725
auto &conjunction_filter = dfilter.Cast<ConjunctionAndFilter>();
689726
return CreateConjunction(conjunction_filter.child_filters, [&](const unique_ptr<TableFilter> &in) {
690727
return TransformFilter(col_idx, column_type, *in, return_type);
691728
});
692729
}
693730

731+
substrait::Expression *DuckDBToSubstrait::TransformConjunctionOrFilter(uint64_t col_idx, const LogicalType &column_type,
732+
const TableFilter &dfilter, const LogicalType &return_type) {
733+
auto &conjunction_filter = dfilter.Cast<ConjunctionOrFilter>();
734+
return CreateConjunction(conjunction_filter.child_filters, [&](const unique_ptr<TableFilter> &in) {
735+
return TransformFilter(col_idx, column_type, *in, return_type);
736+
}, "or");
737+
}
738+
694739
substrait::Expression *DuckDBToSubstrait::TransformConstantComparisonFilter(uint64_t col_idx,
695740
const LogicalType &column_type,
696-
TableFilter &dfilter,
741+
const TableFilter &dfilter,
697742
const LogicalType &return_type) {
698743
auto s_expr = new substrait::Expression();
699744
auto s_scalar = s_expr->mutable_scalar_function();
@@ -737,19 +782,71 @@ substrait::Expression *DuckDBToSubstrait::TransformConstantComparisonFilter(uint
737782
return s_expr;
738783
}
739784

740-
substrait::Expression *DuckDBToSubstrait::TransformFilter(uint64_t col_idx, LogicalType &column_type,
741-
TableFilter &dfilter, LogicalType &return_type) {
785+
substrait::Expression *DuckDBToSubstrait::TransformInFilter(uint64_t col_idx, const LogicalType &column_type,
786+
const TableFilter &dfilter, const LogicalType &return_type) {
787+
auto s_expr = new substrait::Expression();
788+
auto &in_filter = dfilter.Cast<InFilter>();
789+
auto singular_or_list = s_expr->mutable_singular_or_list();
790+
791+
// Set the input expression (the column being filtered)
792+
CreateFieldRef(singular_or_list->mutable_value(), col_idx);
793+
794+
// Add the options (the values in the IN list)
795+
for (auto &constant_value : in_filter.values) {
796+
TransformConstant(constant_value, *singular_or_list->add_options());
797+
}
798+
799+
return s_expr;
800+
}
801+
802+
substrait::Expression *DuckDBToSubstrait::TransformDynamicFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type) {
803+
auto &dynamic_filter = dfilter.Cast<DynamicFilter>();
804+
if (!dynamic_filter.filter_data || !dynamic_filter.filter_data->filter) {
805+
throw InternalException("Dynamic filter data or inner filter is null");
806+
}
807+
// Dynamic filter wraps a ConstantFilter, so we transform the inner filter
808+
return TransformConstantComparisonFilter(col_idx, column_type, *dynamic_filter.filter_data->filter, return_type);
809+
}
810+
811+
substrait::Expression *DuckDBToSubstrait::TransformExpressionFilter(uint64_t col_idx, const LogicalType &column_type, const TableFilter &dfilter, const LogicalType &return_type) {
812+
auto s_expr = new substrait::Expression();
813+
auto &expr_filter = dfilter.Cast<ExpressionFilter>();
814+
815+
// Create a proper column reference for the ToExpression method
816+
auto column_ref = make_uniq<BoundReferenceExpression>(column_type, col_idx);
817+
auto bound_expr = expr_filter.ToExpression(*column_ref);
818+
819+
// Transform the properly bound expression
820+
TransformExpr(*bound_expr, *s_expr);
821+
return s_expr;
822+
}
823+
824+
substrait::Expression *DuckDBToSubstrait::TransformFilter(uint64_t col_idx, const LogicalType &column_type,
825+
const TableFilter &dfilter, const LogicalType &return_type) {
742826
switch (dfilter.filter_type) {
743-
case TableFilterType::IS_NOT_NULL:
744-
return TransformIsNotNullFilter(col_idx, column_type, dfilter, return_type);
745827
case TableFilterType::CONJUNCTION_AND:
746-
return TransformConjuctionAndFilter(col_idx, column_type, dfilter, return_type);
828+
return TransformConjunctionAndFilter(col_idx, column_type, dfilter, return_type);
829+
case TableFilterType::CONJUNCTION_OR:
830+
return TransformConjunctionOrFilter(col_idx, column_type, dfilter, return_type);
747831
case TableFilterType::CONSTANT_COMPARISON:
748832
return TransformConstantComparisonFilter(col_idx, column_type, dfilter, return_type);
833+
case TableFilterType::DYNAMIC_FILTER:
834+
return TransformDynamicFilter(col_idx, column_type, dfilter, return_type);
835+
case TableFilterType::EXPRESSION_FILTER:
836+
return TransformExpressionFilter(col_idx, column_type, dfilter, return_type);
837+
case TableFilterType::IN_FILTER:
838+
return TransformInFilter(col_idx, column_type, dfilter, return_type);
839+
case TableFilterType::IS_NOT_NULL:
840+
return TransformIsNotNullFilter(col_idx, column_type, dfilter, return_type);
841+
case TableFilterType::IS_NULL:
842+
return TransformIsNullFilter(col_idx, column_type, dfilter, return_type);
749843
case TableFilterType::OPTIONAL_FILTER:
750844
return nullptr;
751-
default:
752-
throw InternalException("Unsupported table filter type");
845+
case TableFilterType::STRUCT_EXTRACT:
846+
return TransformStructExtractFilter(col_idx, column_type, dfilter, return_type);
847+
default:
848+
throw NotImplementedException("Unsupported table filter type: %s",
849+
EnumUtil::ToString(dfilter.filter_type));
753850
}
754851
}
755852

test/c/test_substrait_c_api.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ TEST_CASE_METHOD(DataDirectoryFixture, "Test C Function Varchar Literal", "[subs
291291
DuckDB db(nullptr);
292292
Connection con(db);
293293

294+
// Iceberg requires AVRO. Remove after AVRO auto-loads.
295+
REQUIRE_NO_FAIL(con.Query("INSTALL avro;"));
296+
REQUIRE_NO_FAIL(con.Query("LOAD avro;"));
294297
REQUIRE_NO_FAIL(con.Query("INSTALL iceberg;"));
295298
REQUIRE_NO_FAIL(con.Query("LOAD iceberg;"));
296299

@@ -397,6 +400,8 @@ TEST_CASE_METHOD(DataDirectoryFixture, "Test C Iceberg Substrait with Substrait
397400
DuckDB db(nullptr);
398401
Connection con(db);
399402

403+
REQUIRE_NO_FAIL(con.Query("INSTALL avro;"));
404+
REQUIRE_NO_FAIL(con.Query("LOAD avro;"));
400405
REQUIRE_NO_FAIL(con.Query("INSTALL iceberg;"));
401406
REQUIRE_NO_FAIL(con.Query("LOAD iceberg;"));
402407

@@ -476,6 +481,8 @@ TEST_CASE_METHOD(DataDirectoryFixture, "Test C Iceberg Substrait Snapshot ID wit
476481
DuckDB db(nullptr);
477482
Connection con(db);
478483

484+
REQUIRE_NO_FAIL(con.Query("INSTALL avro;"));
485+
REQUIRE_NO_FAIL(con.Query("LOAD avro;"));
479486
REQUIRE_NO_FAIL(con.Query("INSTALL iceberg;"));
480487
REQUIRE_NO_FAIL(con.Query("LOAD iceberg;"));
481488

@@ -556,6 +563,8 @@ TEST_CASE_METHOD(DataDirectoryFixture, "Test C Iceberg Substrait Snapshot Timest
556563
DuckDB db(nullptr);
557564
Connection con(db);
558565

566+
REQUIRE_NO_FAIL(con.Query("INSTALL avro;"));
567+
REQUIRE_NO_FAIL(con.Query("LOAD avro;"));
559568
REQUIRE_NO_FAIL(con.Query("INSTALL iceberg;"));
560569
REQUIRE_NO_FAIL(con.Query("LOAD iceberg;"));
561570

0 commit comments

Comments
 (0)