Skip to content

Commit 800be49

Browse files
authored
Merge pull request #105 from pdet/read_without_projection
Accept table scan as top root node in from_substrait
2 parents 55922a3 + 98e5931 commit 800be49

File tree

5 files changed

+123
-33
lines changed

5 files changed

+123
-33
lines changed

duckdb

Submodule duckdb updated 2003 files

src/from_substrait.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121

2222
#include "duckdb/parser/expression/comparison_expression.hpp"
2323

24-
#include "substrait/plan.pb.h"
25-
#include "google/protobuf/util/json_util.h"
2624
#include "duckdb/main/client_data.hpp"
25+
#include "google/protobuf/util/json_util.h"
26+
#include "substrait/plan.pb.h"
27+
28+
#include "duckdb/main/relation/table_relation.hpp"
2729

2830
namespace duckdb {
2931
const std::unordered_map<std::string, std::string> SubstraitToDuckDB::function_names_remap = {
@@ -661,21 +663,22 @@ int32_t SkipColumnNames(const LogicalType &type) {
661663
return columns_to_skip;
662664
}
663665

664-
Relation *GetProjectionRelation(Relation &relation, string &error) {
666+
Relation *GetProjectionOrTableRelation(Relation &relation, string &error) {
665667
error += RelationTypeToString(relation.type);
666668
switch (relation.type) {
669+
case RelationType::TABLE_RELATION:
667670
case RelationType::PROJECTION_RELATION:
668671
error += " -> ";
669672
return &relation;
670673
case RelationType::LIMIT_RELATION:
671674
error += " -> ";
672-
return GetProjectionRelation(*relation.Cast<LimitRelation>().child, error);
675+
return GetProjectionOrTableRelation(*relation.Cast<LimitRelation>().child, error);
673676
case RelationType::ORDER_RELATION:
674677
error += " -> ";
675-
return GetProjectionRelation(*relation.Cast<OrderRelation>().child, error);
678+
return GetProjectionOrTableRelation(*relation.Cast<OrderRelation>().child, error);
676679
case RelationType::SET_OPERATION_RELATION:
677680
error += " -> ";
678-
return GetProjectionRelation(*relation.Cast<SetOpRelation>().right, error);
681+
return GetProjectionOrTableRelation(*relation.Cast<SetOpRelation>().right, error);
679682
default:
680683
throw NotImplementedException(
681684
"Relation %s is not yet implemented as a possible root chain type of from_substrait function", error);
@@ -684,15 +687,20 @@ Relation *GetProjectionRelation(Relation &relation, string &error) {
684687

685688
shared_ptr<Relation> SubstraitToDuckDB::TransformRootOp(const substrait::RelRoot &sop) {
686689
vector<string> aliases;
687-
auto column_names = sop.names();
690+
const auto &column_names = sop.names();
688691
vector<unique_ptr<ParsedExpression>> expressions;
689692
int id = 1;
690693
auto child = TransformOp(sop.input());
691694
string error;
692-
auto first_projection = GetProjectionRelation(*child, error);
693-
auto &columns = first_projection->Cast<ProjectionRelation>().columns;
695+
auto first_projection_or_table = GetProjectionOrTableRelation(*child, error);
696+
vector<ColumnDefinition> *column_definitions;
697+
if (first_projection_or_table->type == RelationType::PROJECTION_RELATION) {
698+
column_definitions = &first_projection_or_table->Cast<ProjectionRelation>().columns;
699+
} else {
700+
column_definitions = &first_projection_or_table->Cast<TableRelation>().description->columns;
701+
}
694702
int32_t i = 0;
695-
for (auto &column : columns) {
703+
for (auto &column : *column_definitions) {
696704
aliases.push_back(column_names[i++]);
697705
auto column_type = column.GetType();
698706
i += SkipColumnNames(column.GetType());

src/substrait_extension.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ static void ToSubFunction(ClientContext &context, TableFunctionInput &data_p, Da
163163
return;
164164
}
165165
auto new_conn = Connection(*context.db);
166+
// If error(varchar) gets implemented in substrait this can be removed
167+
new_conn.Query("SET scalar_subquery_error_on_multiple_rows=false;");
166168

167169
unique_ptr<LogicalOperator> query_plan;
168170
string serialized;
@@ -195,6 +197,8 @@ static void ToJsonFunction(ClientContext &context, TableFunctionInput &data_p, D
195197
return;
196198
}
197199
auto new_conn = Connection(*context.db);
200+
// If error(varchar) gets implemented in substrait this can be removed
201+
new_conn.Query("SET scalar_subquery_error_on_multiple_rows=false;");
198202

199203
unique_ptr<LogicalOperator> query_plan;
200204
string serialized;

test/sql/test_direct_scan.test

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# name: test/sql/test_direct_scan.test
2+
# description: Test that a direct table scan works
3+
# group: [sql]
4+
5+
require substrait
6+
7+
statement ok
8+
PRAGMA enable_verification
9+
10+
statement ok
11+
create table users (user_id varchar, name varchar, paid_for_service bool);
12+
13+
statement ok
14+
insert into users values ('1', 'Pedro', false);
15+
16+
statement ok
17+
CALL get_substrait('FROM users')
18+
19+
query III
20+
CALL from_substrait_json('{
21+
"relations": [
22+
{
23+
"root": {
24+
"input": {
25+
"read": {
26+
"common": {
27+
"direct": {}
28+
},
29+
"baseSchema": {
30+
"names": [
31+
"user_id",
32+
"name",
33+
"paid_for_service"
34+
],
35+
"struct": {
36+
"types": [
37+
{
38+
"string": {
39+
"nullability": "NULLABILITY_NULLABLE"
40+
}
41+
},
42+
{
43+
"string": {
44+
"nullability": "NULLABILITY_NULLABLE"
45+
}
46+
},
47+
{
48+
"bool": {
49+
"nullability": "NULLABILITY_NULLABLE"
50+
}
51+
}
52+
],
53+
"nullability": "NULLABILITY_REQUIRED"
54+
}
55+
},
56+
"namedTable": {
57+
"names": [
58+
"users"
59+
]
60+
}
61+
}
62+
},
63+
"names": [
64+
"user_id",
65+
"name",
66+
"paid_for_service"
67+
]
68+
}
69+
}
70+
],
71+
"version": {
72+
"minorNumber": 52,
73+
"producer": "spark-substrait-gateway"
74+
}
75+
}
76+
')
77+
----
78+
1 Pedro false

0 commit comments

Comments
 (0)