Skip to content

Commit e923efd

Browse files
authored
fix streamlookup keys join on unmatched rows (#8422)
1 parent 1b2baab commit e923efd

File tree

2 files changed

+59
-11
lines changed

2 files changed

+59
-11
lines changed

ydb/library/yql/dq/actors/input_transforms/dq_input_transform_lookup.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class TInputTransformStreamLookupBase
111111
outputRowItems[i] = wideInputRow[index];
112112
break;
113113
case EOutputRowItemSource::LookupKey:
114-
outputRowItems[i] = lookupKey.GetElement(index);
114+
outputRowItems[i] = lookupPayload && *lookupPayload ? lookupKey.GetElement(index) : NUdf::TUnboxedValue {};
115115
break;
116116
case EOutputRowItemSource::LookupOther:
117117
if (lookupPayload && *lookupPayload) {

ydb/tests/fq/generic/test_streaming_join.py

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from ydb.tests.tools.datastreams_helpers.test_yds_base import TestYdsBase
1111
from ydb.tests.fq.generic.utils.settings import Settings
1212

13+
DEBUG = 0
1314
TESTCASES = [
1415
# 0
1516
(
@@ -187,6 +188,7 @@
187188
$enriched = select e.id as id,
188189
$formatTime(DateTime::ParseIso8601(e.ts)) as ts,
189190
e.user as user_id,
191+
u.id as uid,
190192
u.name as name,
191193
u.age as age
192194
from
@@ -201,35 +203,80 @@
201203
[
202204
(
203205
'{"id":1,"ts":"20240701T113344","ev_type":"foo1","user":2}',
204-
'{"id":1,"ts":"11:33:44","user_id":2,"name":"Petr","age":25}',
206+
'{"id":1,"ts":"11:33:44","uid":2,"user_id":2,"name":"Petr","age":25}',
205207
),
206208
(
207209
'{"id":2,"ts":"20240701T112233","ev_type":"foo2","user":1}',
208-
'{"id":2,"ts":"11:22:33","user_id":1,"name":"Anya","age":15}',
210+
'{"id":2,"ts":"11:22:33","uid":1,"user_id":1,"name":"Anya","age":15}',
209211
),
210212
(
211213
'{"id":3,"ts":"20240701T113355","ev_type":"foo3","user":100}',
212-
'{"id":3,"ts":"11:33:55","user_id":100,"name":null,"age":null}',
214+
'{"id":3,"ts":"11:33:55","uid":null,"user_id":100,"name":null,"age":null}',
213215
),
214216
(
215217
'{"id":4,"ts":"20240701T113356","ev_type":"foo4","user":3}',
216-
'{"id":4,"ts":"11:33:56","user_id":3,"name":"Masha","age":17}',
218+
'{"id":4,"ts":"11:33:56","uid":3,"user_id":3,"name":"Masha","age":17}',
217219
),
218220
(
219221
'{"id":5,"ts":"20240701T113357","ev_type":"foo5","user":3}',
220-
'{"id":5,"ts":"11:33:57","user_id":3,"name":"Masha","age":17}',
222+
'{"id":5,"ts":"11:33:57","uid":3,"user_id":3,"name":"Masha","age":17}',
221223
),
222224
(
223225
'{"id":6,"ts":"20240701T112238","ev_type":"foo6","user":1}',
224-
'{"id":6,"ts":"11:22:38","user_id":1,"name":"Anya","age":15}',
226+
'{"id":6,"ts":"11:22:38","uid":1,"user_id":1,"name":"Anya","age":15}',
225227
),
226228
(
227229
'{"id":7,"ts":"20240701T113349","ev_type":"foo7","user":2}',
228-
'{"id":7,"ts":"11:33:49","user_id":2,"name":"Petr","age":25}',
230+
'{"id":7,"ts":"11:33:49","uid":2,"user_id":2,"name":"Petr","age":25}',
229231
),
230232
]
231233
* 1000,
232234
),
235+
# 5
236+
(
237+
R'''
238+
$input = SELECT * FROM myyds.`{input_topic}`
239+
WITH (
240+
FORMAT=json_each_row,
241+
SCHEMA (
242+
id Int32,
243+
ts String,
244+
ev_type String,
245+
user Int32,
246+
)
247+
) ;
248+
249+
$enriched = select e.id as id,
250+
e.user as user_id,
251+
u.id as uid
252+
from
253+
$input as e
254+
left join {streamlookup} ydb_conn_{table_name}.`users` as u
255+
on(e.user = u.id)
256+
;
257+
258+
insert into myyds.`{output_topic}`
259+
select Unwrap(Yson::SerializeJson(Yson::From(TableRow()))) from $enriched;
260+
''',
261+
[
262+
(
263+
'{"id":1,"ts":"20240701T113344","ev_type":"foo1","user":2}',
264+
'{"id":1,"uid":2,"user_id":2}',
265+
),
266+
(
267+
'{"id":2,"ts":"20240701T112233","ev_type":"foo2","user":1}',
268+
'{"id":2,"uid":1,"user_id":1}',
269+
),
270+
(
271+
'{"id":3,"ts":"20240701T113355","ev_type":"foo3","user":100}',
272+
'{"id":3,"uid":null,"user_id":100}',
273+
),
274+
(
275+
'{"id":4,"ts":"20240701T113356","ev_type":"foo4","user":3}',
276+
'{"id":4,"uid":3,"user_id":3}',
277+
),
278+
],
279+
),
233280
]
234281

235282

@@ -324,9 +371,10 @@ def test_streamlookup(
324371
offset += 500
325372

326373
read_data = self.read_stream(len(messages))
327-
print(streamlookup, testcase, file=sys.stderr)
328-
print(sql, file=sys.stderr)
329-
print(*zip(messages, read_data), file=sys.stderr, sep="\n")
374+
if DEBUG:
375+
print(streamlookup, testcase, file=sys.stderr)
376+
print(sql, file=sys.stderr)
377+
print(*zip(messages, read_data), file=sys.stderr, sep="\n")
330378
for r, exp in zip(read_data, messages):
331379
r = json.loads(r)
332380
exp = json.loads(exp[1])

0 commit comments

Comments
 (0)