Skip to content

Commit e661cda

Browse files
authored
Fix ORDER BY missing column (#7240)
1 parent ce9e302 commit e661cda

File tree

9 files changed

+148
-131
lines changed

9 files changed

+148
-131
lines changed

ydb/library/yql/parser/pg_wrapper/postgresql/src/include/port/win32.h.orig

Lines changed: 0 additions & 98 deletions
This file was deleted.

ydb/library/yql/sql/v1/node.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1333,7 +1333,7 @@ bool MaybeAutogenerated(const TString& name) {
13331333

13341334
}
13351335

1336-
bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) {
1336+
bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) const {
13371337
if (All || Real.contains(name) || Artificial.contains(name)) {
13381338
return true;
13391339
}
@@ -1536,6 +1536,10 @@ bool TColumnNode::IsUseSourceAsColumn() const {
15361536
return UseSourceAsColumn;
15371537
}
15381538

1539+
bool TColumnNode::IsUseSource() const {
1540+
return UseSource;
1541+
}
1542+
15391543
bool TColumnNode::IsReliable() const {
15401544
return Reliable;
15411545
}

ydb/library/yql/sql/v1/node.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,7 @@ namespace NSQLTranslationV1 {
723723
void Merge(const TColumns& columns);
724724
void SetPrefix(const TString& prefix);
725725
void SetAll();
726-
bool IsColumnPossible(TContext& ctx, const TString& column);
726+
bool IsColumnPossible(TContext& ctx, const TString& column) const;
727727
};
728728

729729
class TSortSpecification: public TSimpleRefCount<TSortSpecification> {
@@ -838,6 +838,7 @@ namespace NSQLTranslationV1 {
838838
void SetAsNotReliable();
839839
bool IsReliable() const;
840840
bool IsUseSourceAsColumn() const;
841+
bool IsUseSource() const;
841842
bool CanBeType() const;
842843

843844
private:

ydb/library/yql/sql/v1/select.cpp

Lines changed: 54 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,6 +1455,17 @@ class TCompositeSelect: public IRealSource {
14551455
TSet<TString> GroupingCols;
14561456
};
14571457

1458+
namespace {
1459+
TString FullColumnName(const TColumnNode& column) {
1460+
YQL_ENSURE(column.GetColumnName());
1461+
TString columnName = *column.GetColumnName();
1462+
if (column.IsUseSource()) {
1463+
columnName = DotJoin(*column.GetSourceName(), columnName);
1464+
}
1465+
return columnName;
1466+
}
1467+
}
1468+
14581469
/// \todo simplify class
14591470
class TSelectCore: public IRealSource, public IComposableSource {
14601471
public:
@@ -1848,42 +1859,54 @@ class TSelectCore: public IRealSource, public IComposableSource {
18481859
}
18491860

18501861
TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
1851-
if (OrderByInit && Source->GetJoin()) {
1852-
column.SetAsNotReliable();
1853-
auto maybeExist = IRealSource::AddColumn(ctx, column);
1854-
if (maybeExist && maybeExist.GetRef()) {
1855-
return true;
1856-
}
1857-
return Source->AddColumn(ctx, column);
1858-
}
1859-
1860-
if (OrderByInit && !Distinct && !GroupBy) {
1861-
bool reliable = column.IsReliable();
1862+
const bool aggregated = Source->HasAggregations() || Distinct;
1863+
if (OrderByInit && (Source->GetJoin() || !aggregated)) {
1864+
// ORDER BY will try to find column not only in projection items, but also in Source.
1865+
// ```SELECT a, b FROM T ORDER BY c``` should work if c is present in T
1866+
const bool reliable = column.IsReliable();
18621867
column.SetAsNotReliable();
18631868
auto maybeExist = IRealSource::AddColumn(ctx, column);
1864-
if (reliable) {
1869+
if (reliable && !Source->GetJoin()) {
18651870
column.ResetAsReliable();
18661871
}
1867-
if (maybeExist && maybeExist.GetRef()) {
1868-
return true;
1872+
if (!maybeExist || !maybeExist.GetRef()) {
1873+
maybeExist = Source->AddColumn(ctx, column);
18691874
}
1870-
1871-
auto maybeSourceExist = Source->AddColumn(ctx, column);
1872-
if (!maybeSourceExist.Defined()) {
1873-
return maybeSourceExist;
1875+
if (!maybeExist.Defined()) {
1876+
return maybeExist;
18741877
}
1875-
1876-
// order by references column which is missing in projection, but may exists in source
1877-
const auto columnName = column.GetColumnName();
1878-
if (columnName) {
1879-
ExtraSortColumns.emplace(*columnName, TNodePtr(&column));
1878+
if (!aggregated && column.GetColumnName() && IsMissingInProjection(ctx, column)) {
1879+
ExtraSortColumns[FullColumnName(column)] = &column;
18801880
}
1881-
return true;
1881+
return maybeExist;
18821882
}
18831883

18841884
return IRealSource::AddColumn(ctx, column);
18851885
}
18861886

1887+
bool IsMissingInProjection(TContext& ctx, const TColumnNode& column) const {
1888+
TString columnName = FullColumnName(column);
1889+
if (Columns.Real.contains(columnName) || Columns.Artificial.contains(columnName)) {
1890+
return false;
1891+
}
1892+
1893+
if (!Columns.IsColumnPossible(ctx, columnName)) {
1894+
return true;
1895+
}
1896+
1897+
for (auto without: Without) {
1898+
auto name = *without->GetColumnName();
1899+
if (Source && Source->GetJoin()) {
1900+
name = DotJoin(*without->GetSourceName(), name);
1901+
}
1902+
if (name == columnName) {
1903+
return true;
1904+
}
1905+
}
1906+
1907+
return false;
1908+
}
1909+
18871910
TNodePtr PrepareWithout(const TNodePtr& base) {
18881911
auto terms = base;
18891912
if (Without) {
@@ -2211,14 +2234,14 @@ class TSelectCore: public IRealSource, public IComposableSource {
22112234
++column;
22122235
++isNamedColumn;
22132236
}
2237+
}
22142238

2215-
for (const auto& [columnName, column]: ExtraSortColumns) {
2216-
auto body = Y();
2217-
body = L(body, Y("let", "res", column));
2218-
TPosition pos = column->GetPos();
2219-
auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res"));
2220-
sqlProjectArgs = L(sqlProjectArgs, projectItem);
2221-
}
2239+
for (const auto& [columnName, column]: ExtraSortColumns) {
2240+
auto body = Y();
2241+
body = L(body, Y("let", "res", column));
2242+
TPosition pos = column->GetPos();
2243+
auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res"));
2244+
sqlProjectArgs = L(sqlProjectArgs, projectItem);
22222245
}
22232246

22242247
auto block(Y(Y("let", "projectCoreType", Y("TypeOf", "core"))));

ydb/library/yql/sql/v1/sql_ut.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3315,6 +3315,10 @@ Y_UNIT_TEST_SUITE(SqlToYQLErrors) {
33153315
Y_UNIT_TEST(ErrorsInOrderByWhenColumnIsMissingInProjection) {
33163316
ExpectFailWithError("select subkey from (select 1 as subkey) order by key", "<main>:1:50: Error: Column key is not in source column set\n");
33173317
ExpectFailWithError("select subkey from plato.Input as a order by x.key", "<main>:1:46: Error: Unknown correlation name: x\n");
3318+
ExpectFailWithError("select distinct a, b from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n");
3319+
ExpectFailWithError("select count(*) as a from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n");
3320+
ExpectFailWithError("select count(*) as a, b, from plato.Input group by b order by c", "<main>:1:63: Error: Column c is not in source column set. Did you mean a?\n");
3321+
UNIT_ASSERT(SqlToYql("select a, b from plato.Input order by c").IsOk());
33183322
}
33193323

33203324
Y_UNIT_TEST(SelectAggregatedWhere) {

ydb/library/yql/tests/sql/sql2yql/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11535,6 +11535,13 @@
1153511535
"uri": "https://{canondata_backend}/1937429/114961b26476797b95763679349a6ea2e9f8cea8/resource.tar.gz#test_sql2yql.test_order_by-order_by_missing_project_column_/sql.yql"
1153611536
}
1153711537
],
11538+
"test_sql2yql.test[order_by-order_by_missing_project_column_join_types]": [
11539+
{
11540+
"checksum": "9ea8f0a0e4052e5ab683483361fd7533",
11541+
"size": 20772,
11542+
"uri": "https://{canondata_backend}/1781765/c04cbb08c79f2b2bfd260dde1ea706c5a9450568/resource.tar.gz#test_sql2yql.test_order_by-order_by_missing_project_column_join_types_/sql.yql"
11543+
}
11544+
],
1153811545
"test_sql2yql.test[order_by-order_by_mul_columns]": [
1153911546
{
1154011547
"checksum": "3e69e4405b6dff02d1127352fe46de57",
@@ -30925,6 +30932,13 @@
3092530932
"uri": "https://{canondata_backend}/1937429/114961b26476797b95763679349a6ea2e9f8cea8/resource.tar.gz#test_sql_format.test_order_by-order_by_missing_project_column_/formatted.sql"
3092630933
}
3092730934
],
30935+
"test_sql_format.test[order_by-order_by_missing_project_column_join_types]": [
30936+
{
30937+
"checksum": "a56231421da19446c15656d1b5f6b9fc",
30938+
"size": 2115,
30939+
"uri": "https://{canondata_backend}/1781765/c04cbb08c79f2b2bfd260dde1ea706c5a9450568/resource.tar.gz#test_sql_format.test_order_by-order_by_missing_project_column_join_types_/formatted.sql"
30940+
}
30941+
],
3092830942
"test_sql_format.test[order_by-order_by_mul_columns]": [
3092930943
{
3093030944
"checksum": "ecd62b80c49799026e8275d8c3daeaef",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
providers yt
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/* syntax version 1 */
2+
/* postgres can not */
3+
/* dq can not */
4+
/* dqfile can not */
5+
/* yt can not */
6+
7+
$src = [
8+
<|a:5, b:50, date:500|>,
9+
<|a:4, b:40, date:400|>,
10+
<|a:3, b:30, date:300|>,
11+
<|a:2, b:20, date:200|>,
12+
<|a:1, b:10, date:100|>,
13+
];
14+
15+
$src1 = [
16+
<|e:5, f:50|>,
17+
<|e:4, f:40|>,
18+
<|e:3, f:30|>,
19+
<|e:2, f:20|>,
20+
<|e:1, f:10|>,
21+
];
22+
23+
24+
$src = select * from as_table($src);
25+
$src1 = select * from as_table($src1);
26+
27+
select a, b from $src order by date + 1;
28+
select x.a, b from $src as x order by x.date + 1;
29+
30+
select * without b, a from $src order by date + 1;
31+
select * without b, a, date from $src order by date + 1;
32+
33+
select * without x.b, x.a from $src as x order by date + 1;
34+
select * without x.b, x.a, date from $src as x order by date + 1;
35+
36+
select a, b, x.* without b, a from $src as x order by date + 1;
37+
select a, b, x.* without b, a, x.date from $src as x order by date + 1;
38+
select a, b, x.* without b, a, x.date from $src as x order by x.date + 1;
39+
40+
select y.e, y.f from $src as x join $src1 as y on x.a = y.e order by x.date;
41+
select * without x.a, x.b, from $src as x join $src1 as y on x.a = y.e order by date;
42+
select x.* without x.date from $src as x join $src1 as y on x.a = y.e order by x.date;
43+
44+
select x.*, unwrap(x.date) as date, without x.a, x.date from $src as x order by date;
45+
select x.*, unwrap(x.date) as date, without x.a, x.date from $src as x join $src1 as y on x.a = y.e order by x.date;
46+
select x.*, unwrap(x.date) as date, without x.a, x.date from $src as x join $src1 as y on x.a = y.e order by date;
47+

ydb/library/yql/tests/sql/yt_native_file/part16/canondata/result.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,27 @@
19801980
"uri": "https://{canondata_backend}/937458/4ef5c22fa3a7e5fd4e0ef2192cee531d67d22319/resource.tar.gz#test.test_order_by-order_by_expr_mul_cols--Results_/results.txt"
19811981
}
19821982
],
1983+
"test.test[order_by-order_by_missing_project_column_join_types--Debug]": [
1984+
{
1985+
"checksum": "b6f3b88cce441fbd773a50ec73c14130",
1986+
"size": 3721,
1987+
"uri": "https://{canondata_backend}/1781765/6a39fa2f2df8e36985edf9da7373ed2eee1c9ce1/resource.tar.gz#test.test_order_by-order_by_missing_project_column_join_types--Debug_/opt.yql"
1988+
}
1989+
],
1990+
"test.test[order_by-order_by_missing_project_column_join_types--Plan]": [
1991+
{
1992+
"checksum": "b3ea53bbce5fdb1641500c3310d88f50",
1993+
"size": 6300,
1994+
"uri": "https://{canondata_backend}/1781765/6a39fa2f2df8e36985edf9da7373ed2eee1c9ce1/resource.tar.gz#test.test_order_by-order_by_missing_project_column_join_types--Plan_/plan.txt"
1995+
}
1996+
],
1997+
"test.test[order_by-order_by_missing_project_column_join_types--Results]": [
1998+
{
1999+
"checksum": "2954a28d160061ff35176059a3b2a682",
2000+
"size": 19140,
2001+
"uri": "https://{canondata_backend}/1781765/6a39fa2f2df8e36985edf9da7373ed2eee1c9ce1/resource.tar.gz#test.test_order_by-order_by_missing_project_column_join_types--Results_/results.txt"
2002+
}
2003+
],
19832004
"test.test[pg-cbo_pragma2-default.txt-Debug]": [
19842005
{
19852006
"checksum": "5472f386c7c0e0c9165fa37393053203",

0 commit comments

Comments
 (0)