Skip to content

Commit 0cf6cba

Browse files
nepalKamil Khamitov
authored andcommitted
Rework equality filter over EquiJoin
commit_hash:10510b8bb813fc6d43f7269501390d20c65ab52f
1 parent e08d17f commit 0cf6cba

File tree

11 files changed

+760
-10
lines changed

11 files changed

+760
-10
lines changed

yql/essentials/core/common_opt/yql_flatmap_over_join.cpp

Lines changed: 386 additions & 10 deletions
Large diffs are not rendered by default.

yql/essentials/tests/sql/minirun/part0/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,20 @@
698698
"uri": "https://{canondata_backend}/995452/c22e7d9867fa56e2ee0c270fded39566aaa63a48/resource.tar.gz#test.test_flexible_types-group_by2-default.txt-Results_/results.txt"
699699
}
700700
],
701+
"test.test[join-eq_over_join_bad_rotate-default.txt-Debug]": [
702+
{
703+
"checksum": "75bc473eee49f48848cfac6902f99607",
704+
"size": 1784,
705+
"uri": "https://{canondata_backend}/1946324/fbb1b986a8af3f4a5932aa11a3d24263ba935543/resource.tar.gz#test.test_join-eq_over_join_bad_rotate-default.txt-Debug_/opt.yql"
706+
}
707+
],
708+
"test.test[join-eq_over_join_bad_rotate-default.txt-Results]": [
709+
{
710+
"checksum": "e8ae895d664f93e239570274b7b66d8d",
711+
"size": 7624,
712+
"uri": "https://{canondata_backend}/1946324/fbb1b986a8af3f4a5932aa11a3d24263ba935543/resource.tar.gz#test.test_join-eq_over_join_bad_rotate-default.txt-Results_/results.txt"
713+
}
714+
],
701715
"test.test[json-json_query/common_syntax-default.txt-Debug]": [
702716
{
703717
"checksum": "f9ce44fdf704adf735b1895d221411f0",

yql/essentials/tests/sql/minirun/part7/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,20 @@
555555
"uri": "https://{canondata_backend}/1937429/ab4dd66771a60c21f698c24d93aafbe26098e494/resource.tar.gz#test.test_in-in_with_nulls_and_optionals_extra_ansi-default.txt-Results_/results.txt"
556556
}
557557
],
558+
"test.test[join-eq_over_join_basic-default.txt-Debug]": [
559+
{
560+
"checksum": "da1603d7d463a99c9b81d07ab562f3c7",
561+
"size": 1625,
562+
"uri": "https://{canondata_backend}/937458/0493b86f10f9e96f10e8955e2d365f91a1c2a439/resource.tar.gz#test.test_join-eq_over_join_basic-default.txt-Debug_/opt.yql"
563+
}
564+
],
565+
"test.test[join-eq_over_join_basic-default.txt-Results]": [
566+
{
567+
"checksum": "ce622e48816b3c6eb5f8255906683831",
568+
"size": 5679,
569+
"uri": "https://{canondata_backend}/937458/0493b86f10f9e96f10e8955e2d365f91a1c2a439/resource.tar.gz#test.test_join-eq_over_join_basic-default.txt-Results_/results.txt"
570+
}
571+
],
558572
"test.test[join-inmem_with_set_key-default.txt-Debug]": [
559573
{
560574
"checksum": "847a008013add2841f187dbf17a8dc0f",

yql/essentials/tests/sql/minirun/part9/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,20 @@
751751
"uri": "https://{canondata_backend}/1942173/f37f53f2bdf2e81f0ffdb8cf146faecaff60e7af/resource.tar.gz#test.test_in-large_in_YQL-19183--Results_/results.txt"
752752
}
753753
],
754+
"test.test[join-eq_over_join_same_keys-default.txt-Debug]": [
755+
{
756+
"checksum": "d333803a811e7a49abcfacfde8ce6a45",
757+
"size": 1023,
758+
"uri": "https://{canondata_backend}/1937424/b341ff273ddff0681036174e754c5aaffb3b2e18/resource.tar.gz#test.test_join-eq_over_join_same_keys-default.txt-Debug_/opt.yql"
759+
}
760+
],
761+
"test.test[join-eq_over_join_same_keys-default.txt-Results]": [
762+
{
763+
"checksum": "206c052c946dbbcaa9a7fb357139610a",
764+
"size": 2220,
765+
"uri": "https://{canondata_backend}/1942415/88908f3f1c2c172f76ce2e0c2ad414ba2d95573e/resource.tar.gz#test.test_join-eq_over_join_same_keys-default.txt-Results_/results.txt"
766+
}
767+
],
754768
"test.test[join-left_join_with_self_aggr-default.txt-Debug]": [
755769
{
756770
"checksum": "270003dd9cfab29ddd670dfc824c2915",

yql/essentials/tests/sql/sql2yql/canondata/result.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3793,6 +3793,27 @@
37933793
"uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_join-cross_join_with_lazy_list_/sql.yql"
37943794
}
37953795
],
3796+
"test_sql2yql.test[join-eq_over_join_bad_rotate]": [
3797+
{
3798+
"checksum": "47d27e638402a2dc26e78efb0e097629",
3799+
"size": 7441,
3800+
"uri": "https://{canondata_backend}/1946324/891082d4c661d16090f3d65c259aac9e885ad06c/resource.tar.gz#test_sql2yql.test_join-eq_over_join_bad_rotate_/sql.yql"
3801+
}
3802+
],
3803+
"test_sql2yql.test[join-eq_over_join_basic]": [
3804+
{
3805+
"checksum": "91b591c1976c48ea0a66269257975354",
3806+
"size": 5943,
3807+
"uri": "https://{canondata_backend}/937458/9c1b5511bc814fa8de5eebef1b58eb00227de013/resource.tar.gz#test_sql2yql.test_join-eq_over_join_basic_/sql.yql"
3808+
}
3809+
],
3810+
"test_sql2yql.test[join-eq_over_join_same_keys]": [
3811+
{
3812+
"checksum": "19517c88b9123deeb88f95937f20b46d",
3813+
"size": 3745,
3814+
"uri": "https://{canondata_backend}/1130705/cce783534f6c1bc1ef0dac74b138f4dd17bb6df8/resource.tar.gz#test_sql2yql.test_join-eq_over_join_same_keys_/sql.yql"
3815+
}
3816+
],
37963817
"test_sql2yql.test[join-inmem_by_uncomparable_structs]": [
37973818
{
37983819
"checksum": "800f3ffe362c85dc001eb5237220bfd7",
@@ -10003,6 +10024,21 @@
1000310024
"uri": "file://test_sql_format.test_join-cross_join_with_lazy_list_/formatted.sql"
1000410025
}
1000510026
],
10027+
"test_sql_format.test[join-eq_over_join_bad_rotate]": [
10028+
{
10029+
"uri": "file://test_sql_format.test_join-eq_over_join_bad_rotate_/formatted.sql"
10030+
}
10031+
],
10032+
"test_sql_format.test[join-eq_over_join_basic]": [
10033+
{
10034+
"uri": "file://test_sql_format.test_join-eq_over_join_basic_/formatted.sql"
10035+
}
10036+
],
10037+
"test_sql_format.test[join-eq_over_join_same_keys]": [
10038+
{
10039+
"uri": "file://test_sql_format.test_join-eq_over_join_same_keys_/formatted.sql"
10040+
}
10041+
],
1000610042
"test_sql_format.test[join-inmem_by_uncomparable_structs]": [
1000710043
{
1000810044
"uri": "file://test_sql_format.test_join-inmem_by_uncomparable_structs_/formatted.sql"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin');
2+
PRAGMA AnsiOptionalAs;
3+
4+
-- part of tpcds-6
5+
$item = (
6+
SELECT
7+
*
8+
FROM
9+
as_table([
10+
<|i_current_price: Just(1.0f), i_category: Just('aaa'), i_item_sk: Just(125l)|>,
11+
<|i_current_price: Just(2.0f), i_category: Just('bbb'), i_item_sk: Just(999l)|>,
12+
])
13+
);
14+
15+
$sub2 = (
16+
SELECT
17+
i_current_price,
18+
i_category
19+
FROM
20+
$item
21+
);
22+
23+
$customer_address = (
24+
SELECT
25+
*
26+
FROM
27+
as_table([
28+
<|ca_address_sk: Just(120l)|>,
29+
<|ca_address_sk: Just(150l)|>,
30+
])
31+
);
32+
33+
$customer = (
34+
SELECT
35+
*
36+
FROM
37+
as_table([
38+
<|c_current_addr_sk: Just(150l), c_customer_sk: Just(4l)|>,
39+
<|c_current_addr_sk: Just(120l), c_customer_sk: Just(2l)|>,
40+
])
41+
);
42+
43+
$store_sales = (
44+
SELECT
45+
*
46+
FROM
47+
as_table([
48+
<|ss_sold_date_sk: Just(1l), ss_customer_sk: Just(2l), ss_item_sk: Just(3l)|>,
49+
<|ss_sold_date_sk: Just(3l), ss_customer_sk: Just(4l), ss_item_sk: Just(5l)|>,
50+
])
51+
);
52+
53+
$date_dim = (
54+
SELECT
55+
*
56+
FROM
57+
as_table([
58+
<|d_date_sk: Just(1l)|>,
59+
<|d_date_sk: Just(2l)|>,
60+
])
61+
);
62+
63+
$item = (
64+
SELECT
65+
*
66+
FROM
67+
as_table([
68+
<|i_category: Just('aaa'), i_item_sk: Just(3l)|>,
69+
<|i_category: Just('bbb'), i_item_sk: Just(5l)|>,
70+
])
71+
);
72+
73+
SELECT
74+
JoinTableRow() cnt
75+
FROM
76+
$customer_address a
77+
CROSS JOIN
78+
$customer c
79+
CROSS JOIN
80+
$store_sales s
81+
CROSS JOIN
82+
$date_dim d
83+
CROSS JOIN
84+
$item i
85+
LEFT JOIN
86+
$sub2 AS j
87+
ON
88+
i.i_category == j.i_category
89+
WHERE
90+
s.ss_sold_date_sk == d.d_date_sk
91+
AND a.ca_address_sk == c.c_current_addr_sk
92+
AND c.c_customer_sk == s.ss_customer_sk
93+
AND s.ss_item_sk == i.i_item_sk
94+
;
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin');
2+
3+
$a = (
4+
SELECT
5+
*
6+
FROM
7+
as_table([
8+
<|x: Just(1), t: 1, u: 1, extra: 1|>,
9+
<|x: 1, t: 1, u: 5, extra: 2|>,
10+
])
11+
);
12+
13+
$b = (
14+
SELECT
15+
*
16+
FROM
17+
as_table([
18+
<|y: 1|>,
19+
<|y: 1|>,
20+
])
21+
);
22+
23+
$c = (
24+
SELECT
25+
*
26+
FROM
27+
as_table([
28+
<|z: 1|>,
29+
<|z: 1|>,
30+
])
31+
);
32+
33+
$d = (
34+
SELECT
35+
*
36+
FROM
37+
as_table([
38+
<|c: 2, d: 3|>,
39+
<|c: 3, d: 3|>,
40+
])
41+
);
42+
43+
SELECT
44+
*
45+
FROM (
46+
SELECT
47+
c.z AS cz,
48+
b.y AS by,
49+
a.u AS au,
50+
a.t AS at,
51+
a.x AS ax,
52+
d.c AS dc,
53+
d.d AS dd
54+
FROM
55+
$a AS a
56+
RIGHT JOIN
57+
$b AS b
58+
ON
59+
a.x == b.y
60+
CROSS JOIN
61+
$d AS d
62+
FULL JOIN
63+
$c AS c
64+
ON
65+
b.y == c.z
66+
)
67+
WHERE
68+
cz == at AND by == au AND ax == by AND dc == dd
69+
;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
PRAGMA config.flags('OptimizerFlags', 'EqualityFilterOverJoin');
2+
3+
$p = 1;
4+
5+
$simpleKey = (
6+
SELECT
7+
*
8+
FROM
9+
as_table([<|Key: Just(1), Value: 'qqq'|>, <|Key: Just(2), Value: 'aaa'|>])
10+
);
11+
12+
$complexKey = (
13+
SELECT
14+
*
15+
FROM
16+
as_table([<|Key: Just(2), Fk: 2, Value: 'zzz'|>, <|Key: Just(2), Fk: 3, Value: 'ttt'|>])
17+
);
18+
19+
SELECT
20+
l.Key,
21+
l.Fk,
22+
l.Value,
23+
r.Key,
24+
r.Value
25+
FROM
26+
$simpleKey AS r
27+
INNER JOIN
28+
$complexKey AS l
29+
ON
30+
l.Fk == r.Key
31+
WHERE
32+
l.Key == 1 + $p AND l.Key == l.Key
33+
ORDER BY
34+
r.Value
35+
;
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin");
2+
3+
pragma AnsiOptionalAs;
4+
5+
-- part of tpcds-6
6+
7+
$item = select * from as_table([
8+
<|i_current_price:Just(1.0f), i_category:Just("aaa"), i_item_sk:Just(125l)|>,
9+
<|i_current_price:Just(2.0f), i_category:Just("bbb"), i_item_sk:Just(999l)|>,
10+
]);
11+
12+
$sub2 = (select i_current_price, i_category from $item);
13+
14+
$customer_address = select * from as_table([
15+
<|ca_address_sk:Just(120l)|>,
16+
<|ca_address_sk:Just(150l)|>,
17+
]);
18+
19+
$customer = select * from as_table([
20+
<|c_current_addr_sk:Just(150l), c_customer_sk:Just(4l)|>,
21+
<|c_current_addr_sk:Just(120l), c_customer_sk:Just(2l)|>,
22+
]);
23+
24+
$store_sales = select * from as_table([
25+
<|ss_sold_date_sk:Just(1l), ss_customer_sk:Just(2l), ss_item_sk:Just(3l)|>,
26+
<|ss_sold_date_sk:Just(3l), ss_customer_sk:Just(4l), ss_item_sk:Just(5l)|>,
27+
]);
28+
29+
$date_dim = select * from as_table([
30+
<|d_date_sk:Just(1l)|>,
31+
<|d_date_sk:Just(2l)|>,
32+
]);
33+
34+
$item = select * from as_table([
35+
<|i_category:Just("aaa"), i_item_sk:Just(3l)|>,
36+
<|i_category:Just("bbb"), i_item_sk:Just(5l)|>,
37+
]);
38+
39+
select JoinTableRow() cnt
40+
from $customer_address a
41+
cross join $customer c
42+
cross join $store_sales s
43+
cross join $date_dim d
44+
cross join $item i
45+
left join $sub2 as j on i.i_category = j.i_category
46+
where
47+
s.ss_sold_date_sk = d.d_date_sk
48+
and a.ca_address_sk = c.c_current_addr_sk
49+
and c.c_customer_sk = s.ss_customer_sk
50+
and s.ss_item_sk = i.i_item_sk
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
pragma config.flags("OptimizerFlags", "EqualityFilterOverJoin");
2+
3+
$a = select * from as_table([
4+
<|x:Just(1), t:1, u:1, extra:1|>,
5+
<|x:1, t:1, u:5, extra:2|>,
6+
]);
7+
8+
$b = select * from as_table([
9+
<|y:1|>,
10+
<|y:1|>,
11+
]);
12+
13+
$c = select * from as_table([
14+
<|z:1|>,
15+
<|z:1|>,
16+
]);
17+
18+
$d = select * from as_table([
19+
<|c:2, d:3|>,
20+
<|c:3, d:3|>,
21+
]);
22+
23+
24+
select * from (
25+
select c.z as cz, b.y as by, a.u as au, a.t as at, a.x as ax, d.c as dc, d.d as dd from
26+
$a as a right join $b as b on a.x=b.y
27+
cross join $d as d
28+
full join $c as c on b.y = c.z
29+
)
30+
where cz = at and by = au and ax = by and dc = dd;

0 commit comments

Comments
 (0)