Skip to content

Commit d44f3d4

Browse files
committed
YQL-19715 optional int support
commit_hash:31b59370a1423bb42b78161908b85ef57b800e5c
1 parent 5bd08ba commit d44f3d4

File tree

9 files changed

+192
-45
lines changed

9 files changed

+192
-45
lines changed

yt/yql/providers/yt/gateway/native/yql_yt_native.cpp

Lines changed: 111 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include <util/stream/str.h>
6565
#include <util/stream/input.h>
6666
#include <util/stream/file.h>
67+
#include <util/string/type.h>
6768
#include <util/system/execpath.h>
6869
#include <util/system/guard.h>
6970
#include <util/system/shellcommand.h>
@@ -214,46 +215,50 @@ TString DebugPath(NYT::TRichYPath path) {
214215
return NYT::NodeToCanonicalYsonString(NYT::PathToNode(path), NYT::NYson::EYsonFormat::Text) + " (" + std::to_string(numColumns) + " columns)";
215216
}
216217

217-
void GetIntegerConstraints(const TExprNode::TPtr& column, bool& isSigned, ui64& minValueAbs, ui64& maxValueAbs) {
218-
EDataSlot toType = column->GetTypeAnn()->Cast<TDataExprType>()->GetSlot();
218+
void GetIntegerConstraints(const TExprNode::TPtr& column, bool& isSigned, ui64& minValueAbs, ui64& maxValueAbs, bool& isOptional) {
219+
const TDataExprType* dataType = nullptr;
220+
const bool columnHasDataType = IsDataOrOptionalOfData(column->GetTypeAnn(), isOptional, dataType);
221+
YQL_ENSURE(columnHasDataType, "YtQLFilter: unsupported type of column " << column->Dump());
222+
YQL_ENSURE(dataType);
223+
const EDataSlot dataSlot = dataType->Cast<TDataExprType>()->GetSlot();
219224

220-
// AllowIntegralConversion (may consider some refactoring)
221-
if (toType == EDataSlot::Uint8) {
225+
// looks like AllowIntegralConversion (may consider some refactoring)
226+
if (dataSlot == EDataSlot::Uint8) {
222227
isSigned = false;
223228
minValueAbs = 0;
224229
maxValueAbs = Max<ui8>();
225230
}
226-
else if (toType == EDataSlot::Uint16) {
231+
else if (dataSlot == EDataSlot::Uint16) {
227232
isSigned = false;
228233
minValueAbs = 0;
229234
maxValueAbs = Max<ui16>();
230235
}
231-
else if (toType == EDataSlot::Uint32) {
236+
else if (dataSlot == EDataSlot::Uint32) {
232237
isSigned = false;
233238
minValueAbs = 0;
234239
maxValueAbs = Max<ui32>();
235240
}
236-
else if (toType == EDataSlot::Uint64) {
241+
else if (dataSlot == EDataSlot::Uint64) {
237242
isSigned = false;
238243
minValueAbs = 0;
239244
maxValueAbs = Max<ui64>();
240245
}
241-
else if (toType == EDataSlot::Int8) {
246+
else if (dataSlot == EDataSlot::Int8) {
242247
isSigned = true;
243248
minValueAbs = (ui64)Max<i8>() + 1;
244249
maxValueAbs = (ui64)Max<i8>();
245250
}
246-
else if (toType == EDataSlot::Int16) {
251+
else if (dataSlot == EDataSlot::Int16) {
247252
isSigned = true;
248253
minValueAbs = (ui64)Max<i16>() + 1;
249254
maxValueAbs = (ui64)Max<i16>();
250255
}
251-
else if (toType == EDataSlot::Int32) {
256+
else if (dataSlot == EDataSlot::Int32) {
252257
isSigned = true;
253258
minValueAbs = (ui64)Max<i32>() + 1;
254259
maxValueAbs = (ui64)Max<i32>();
255260
}
256-
else if (toType == EDataSlot::Int64) {
261+
else if (dataSlot == EDataSlot::Int64) {
257262
isSigned = true;
258263
minValueAbs = (ui64)Max<i64>() + 1;
259264
maxValueAbs = (ui64)Max<i64>();
@@ -286,51 +291,102 @@ void ConvertComparisonForQL(const TStringBuf& opName, TStringBuilder& result) {
286291
}
287292
}
288293

289-
void GenerateInputQueryIntegerComparison(const TStringBuf& opName, const TExprNode::TPtr& intColumn, const TExprNode::TPtr& intValue, TStringBuilder& result) {
294+
void GenerateInputQueryIntegerComparison(const TStringBuf& opName, const TExprNode::TPtr& intColumn, const TExprNode::TPtr& intValue, const std::optional<bool>& nullValue, TStringBuilder& result) {
295+
if (TMaybeNode<TCoNull>(intValue) || TMaybeNode<TCoNothing>(intValue)) {
296+
YQL_ENSURE(nullValue.has_value(), "YtQLFilter: optional type without coalesce is not supported");
297+
if (nullValue.value()) {
298+
result << "TRUE";
299+
} else {
300+
result << "FALSE";
301+
}
302+
return;
303+
}
304+
305+
TMaybeNode<TCoIntegralCtor> maybeIntValue;
306+
if (auto maybeJustValue = TMaybeNode<TCoJust>(intValue)) {
307+
maybeIntValue = TMaybeNode<TCoIntegralCtor>(maybeJustValue.Cast().Input().Ptr());
308+
} else {
309+
maybeIntValue = TMaybeNode<TCoIntegralCtor>(intValue);
310+
}
311+
YQL_ENSURE(maybeIntValue);
312+
290313
bool columnsIsSigned;
291314
ui64 minValueAbs;
292315
ui64 maxValueAbs;
293-
GetIntegerConstraints(intColumn, columnsIsSigned, minValueAbs, maxValueAbs);
316+
bool columnIsOptional;
317+
GetIntegerConstraints(intColumn, columnsIsSigned, minValueAbs, maxValueAbs, columnIsOptional);
318+
YQL_ENSURE(!columnIsOptional || columnIsOptional && nullValue.has_value(), "YtQLFilter: optional type without coalesce is not supported");
294319

295-
const auto maybeInt = TMaybeNode<TCoIntegralCtor>(intValue);
296-
YQL_ENSURE(maybeInt);
297320
bool hasSign;
298321
bool isSigned;
299322
ui64 valueAbs;
300-
ExtractIntegralValue(maybeInt.Ref(), false, hasSign, isSigned, valueAbs);
323+
ExtractIntegralValue(maybeIntValue.Ref(), false, hasSign, isSigned, valueAbs);
301324

325+
std::optional<bool> constantFilter;
302326
if (!hasSign && valueAbs > maxValueAbs) {
303-
// value is greater than maximum
327+
// Value is greater than maximum.
304328
if (opName == ">" || opName == ">=" || opName == "==") {
305-
result << "FALSE";
329+
constantFilter = false;
306330
} else {
307-
result << "TRUE";
331+
constantFilter = true;
308332
}
309333
} else if (hasSign && valueAbs > minValueAbs) {
310-
// value is less than minimum
334+
// Value is less than minimum.
311335
if (opName == "<" || opName == "<=" || opName == "==") {
312-
result << "FALSE";
336+
constantFilter = false;
313337
} else {
314-
result << "TRUE";
338+
constantFilter = true;
339+
}
340+
}
341+
342+
const auto columnName = intColumn->ChildPtr(1)->Content();
343+
if (!constantFilter.has_value()) {
344+
// Value is in the range, comparison is not constant.
345+
if (columnIsOptional) {
346+
const bool isLess = opName == "<" || opName == "<=";
347+
if (isLess && !nullValue.value()) {
348+
// QL will handle 'x [operation] NULL' as TRUE here, but we need FALSE.
349+
QuoteColumnForQL(columnName, result);
350+
result << " != NULL AND ";
351+
} else if (!isLess && nullValue.value()) {
352+
// QL will handle 'x [operation] NULL' as FALSE here, but we need TRUE.
353+
QuoteColumnForQL(columnName, result);
354+
result << " = NULL OR ";
355+
}
315356
}
316-
} else {
317-
// value is in the range
318-
const auto columnName = intColumn->ChildPtr(1)->Content();
319-
const auto valueStr = maybeInt.Cast().Literal().Value();
320357
QuoteColumnForQL(columnName, result);
321358
result << " ";
322359
ConvertComparisonForQL(opName, result);
360+
const auto valueStr = maybeIntValue.Cast().Literal().Value();
323361
result << " " << valueStr;
362+
} else if (constantFilter.value()) {
363+
// Value is out of the range, comparison is always TRUE.
364+
if (columnIsOptional && !nullValue.value()) {
365+
// Handle comparison with NULL as FALSE.
366+
QuoteColumnForQL(columnName, result);
367+
result << " IS NOT NULL";
368+
} else {
369+
result << "TRUE";
370+
}
371+
} else {
372+
// Value is out of the range, comparison is always FALSE.
373+
if (columnIsOptional && nullValue.value()) {
374+
// Handle comparison with NULL as TRUE.
375+
QuoteColumnForQL(columnName, result);
376+
result << " IS NULL";
377+
} else {
378+
result << "FALSE";
379+
}
324380
}
325381
}
326382

327-
void GenerateInputQueryComparison(const TCoCompare& op, TStringBuilder& result) {
383+
void GenerateInputQueryComparison(const TCoCompare& op, const std::optional<bool>& nullValue, TStringBuilder& result) {
328384
YQL_ENSURE(op.Ref().IsCallable({"<", "<=", ">", ">=", "==", "!="}));
329385
const auto left = op.Left().Ptr();
330386
const auto right = op.Right().Ptr();
331387

332388
if (left->IsCallable("Member")) {
333-
GenerateInputQueryIntegerComparison(op.CallableName(), left, right, result);
389+
GenerateInputQueryIntegerComparison(op.CallableName(), left, right, nullValue, result);
334390
} else {
335391
YQL_ENSURE(right->IsCallable("Member"));
336392
auto invertedOp = op.CallableName();
@@ -343,17 +399,29 @@ void GenerateInputQueryComparison(const TCoCompare& op, TStringBuilder& result)
343399
} else if (invertedOp == ">=") {
344400
invertedOp = "<=";
345401
}
346-
GenerateInputQueryIntegerComparison(invertedOp, right, left, result);
402+
GenerateInputQueryIntegerComparison(invertedOp, right, left, nullValue, result);
347403
}
348404
}
349405

350406
void GenerateInputQueryWhereExpression(const TExprNode::TPtr& node, TStringBuilder& result) {
351407
if (const auto maybeCompare = TMaybeNode<TCoCompare>(node)) {
352-
GenerateInputQueryComparison(maybeCompare.Cast(), result);
408+
GenerateInputQueryComparison(maybeCompare.Cast(), {}, result);
353409
} else if (node->IsCallable("Not")) {
354-
result << "NOT (";
410+
const auto child = node->ChildPtr(0);
411+
if (child->IsCallable("Exists")) {
412+
// Do not generate NOT (x IS NOT NULL).
413+
result << "(";
414+
GenerateInputQueryWhereExpression(child->ChildPtr(0), result);
415+
result << ") IS NULL";
416+
} else {
417+
result << "NOT (";
418+
GenerateInputQueryWhereExpression(child, result);
419+
result << ")";
420+
}
421+
} else if (node->IsCallable("Exists")) {
422+
result << "(";
355423
GenerateInputQueryWhereExpression(node->ChildPtr(0), result);
356-
result << ")";
424+
result << ") IS NOT NULL";
357425
} else if (node->IsCallable({"And", "Or"})) {
358426
const TStringBuf op = node->IsCallable("And") ? "AND" : "OR";
359427

@@ -367,6 +435,17 @@ void GenerateInputQueryWhereExpression(const TExprNode::TPtr& node, TStringBuild
367435
GenerateInputQueryWhereExpression(node->Child(i), result);
368436
result << ")";
369437
};
438+
} else if (node->IsCallable("Coalesce")) {
439+
YQL_ENSURE(node->ChildrenSize() == 2);
440+
const auto op = TMaybeNode<TCoCompare>(node->Child(0)).Cast();
441+
const auto nullValueStr = TMaybeNode<TCoBool>(node->Child(1)).Cast().Literal().Value();
442+
const std::optional<bool> nullValue(IsTrue(nullValueStr));
443+
GenerateInputQueryComparison(op, nullValue, result);
444+
} else if (const auto maybeBool = TMaybeNode<TCoBool>(node)) {
445+
result << maybeBool.Cast().Literal().Value();
446+
} else if (node->IsCallable("Member")) {
447+
const auto columnName = node->ChildPtr(1)->Content();
448+
QuoteColumnForQL(columnName, result);
370449
} else {
371450
YQL_ENSURE(false, "unexpected node type");
372451
}

yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_ytql.cpp

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ using namespace NPrivate;
1515

1616
namespace {
1717

18-
bool NodeHasQLCompatibleType(const TExprNode::TPtr& node) {
18+
bool NodeHasQLCompatibleType(const TExprNode::TPtr& node, bool allowOptional) {
1919
bool isOptional = false;
2020
const TDataExprType* dataType = nullptr;
2121
if (!IsDataOrOptionalOfData(node->GetTypeAnn(), isOptional, dataType)) {
2222
return false;
2323
}
24-
if (isOptional) {
24+
if (!allowOptional && isOptional) {
2525
return false;
2626
}
2727
if (!dataType) {
@@ -33,8 +33,8 @@ bool NodeHasQLCompatibleType(const TExprNode::TPtr& node) {
3333
return true;
3434
}
3535

36-
TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg) {
37-
if (!NodeHasQLCompatibleType(node)) {
36+
TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, bool allowOptional) {
37+
if (!NodeHasQLCompatibleType(node, allowOptional)) {
3838
return nullptr;
3939
}
4040
if (IsDepended(*node, *rowArg)) {
@@ -43,7 +43,7 @@ TExprNode::TPtr CheckQLConst(const TExprNode::TPtr& node, const TExprNode::TPtr&
4343
return node;
4444
}
4545

46-
TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) {
46+
TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx, bool allowOptional = false) {
4747
if (!node->IsCallable("Member")) {
4848
return nullptr;
4949
}
@@ -55,22 +55,22 @@ TExprNode::TPtr ConvertQLMember(const TExprNode::TPtr& node, const TExprNode::TP
5555
if (memberName.StartsWith("_yql_sys_")) {
5656
return nullptr;
5757
}
58-
if (!NodeHasQLCompatibleType(node)) {
58+
if (!NodeHasQLCompatibleType(node, allowOptional)) {
5959
return nullptr;
6060
}
6161
auto arg = newRowArg;
6262
return ctx.ChangeChild(*node, 0, std::move(arg));
6363
}
6464

65-
TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) {
65+
TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx, bool allowOptional = false) {
6666
YQL_ENSURE(node->ChildrenSize() == 2);
6767
TExprNode::TPtr childLeft;
6868
TExprNode::TPtr childRight;
69-
if (childLeft = ConvertQLMember(node->ChildPtr(0), rowArg, newRowArg, ctx)) {
70-
childRight = CheckQLConst(node->ChildPtr(1), rowArg);
69+
if (childLeft = ConvertQLMember(node->ChildPtr(0), rowArg, newRowArg, ctx, allowOptional)) {
70+
childRight = CheckQLConst(node->ChildPtr(1), rowArg, allowOptional);
7171
}
72-
else if (childRight = ConvertQLMember(node->ChildPtr(1), rowArg, newRowArg, ctx)) {
73-
childLeft = CheckQLConst(node->ChildPtr(0), rowArg);
72+
else if (childRight = ConvertQLMember(node->ChildPtr(1), rowArg, newRowArg, ctx, allowOptional)) {
73+
childLeft = CheckQLConst(node->ChildPtr(0), rowArg, allowOptional);
7474
}
7575
if (!childLeft || !childRight) {
7676
return nullptr;
@@ -79,7 +79,7 @@ TExprNode::TPtr ConvertQLComparison(const TExprNode::TPtr& node, const TExprNode
7979
}
8080

8181
TExprNode::TPtr ConvertQLSubTree(const TExprNode::TPtr& node, const TExprNode::TPtr& rowArg, const TExprNode::TPtr& newRowArg, TExprContext& ctx) {
82-
if (node->IsCallable({"And", "Or", "Not"})) {
82+
if (node->IsCallable({"And", "Or", "Not", "Exists"})) {
8383
TExprNode::TListType convertedChildren;
8484
for (const auto& child : node->ChildrenList()) {
8585
const auto converted = ConvertQLSubTree(child, rowArg, newRowArg, ctx);
@@ -90,9 +90,33 @@ TExprNode::TPtr ConvertQLSubTree(const TExprNode::TPtr& node, const TExprNode::T
9090
};
9191
return ctx.ChangeChildren(*node, std::move(convertedChildren));
9292
}
93+
if (node->IsCallable("Coalesce")) {
94+
if (node->ChildrenSize() != 2) {
95+
return nullptr;
96+
}
97+
const auto comparison = node->Child(0);
98+
if (!comparison->IsCallable({"<", "<=", ">", ">=", "==", "!="})) {
99+
return nullptr;
100+
}
101+
const auto nullValue = node->Child(1);
102+
if (!nullValue->IsCallable("Bool")) {
103+
return nullptr;
104+
}
105+
const auto convertedComparison = ConvertQLComparison(comparison, rowArg, newRowArg, ctx, /*allowOptional*/ true);
106+
if (!convertedComparison) {
107+
return nullptr;
108+
}
109+
return ctx.ChangeChildren(*node, {convertedComparison, nullValue});
110+
}
93111
if (node->IsCallable({"<", "<=", ">", ">=", "==", "!="})) {
94112
return ConvertQLComparison(node, rowArg, newRowArg, ctx);
95113
}
114+
if (node->IsCallable("Bool")) {
115+
return node;
116+
}
117+
if (node->IsCallable("Member")) {
118+
return ConvertQLMember(node, rowArg, newRowArg, ctx);
119+
}
96120
return nullptr;
97121
}
98122

yt/yql/providers/yt/provider/yql_yt_helpers.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ void GetNodesToCalculateFromQLFilter(const TExprNode& qlFilter, TExprNode::TList
714714
YQL_ENSURE(qlFilter.IsCallable("YtQLFilter"));
715715
const auto lambdaBody = qlFilter.Child(1)->Child(1);
716716
VisitExpr(lambdaBody, [&needCalc, &uniqNodes](const TExprNode::TPtr& node) {
717-
if (node->IsCallable({"And", "Or", "Not", "<", "<=", ">", ">=", "==", "!="})) {
717+
if (node->IsCallable({"And", "Or", "Not", "Coalesce", "Exists", "<", "<=", ">", ">=", "==", "!="})) {
718718
return true;
719719
}
720720
if (node->IsCallable("Member")) {
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
in Input integer_optional.txt
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pragma yt.UseQLFilter;
2+
3+
select *
4+
from plato.Input
5+
where
6+
not (a <= 5)
7+
and
8+
b < 0
9+
and
10+
c > Just(5);
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{"a"=1;"b"=1;"c"=1u;"d"=1;"e"=1u};
2+
{"a"=#;"b"=-2;"c"=2u;"d"=10;"e"=10u};
3+
{"a"=-3;"b"=3;"c"=3u;"d"=-100;"e"=100u};
4+
{"a"=4;"b"=#;"c"=4u;"d"=1000;"e"=1000u};
5+
{"a"=5;"b"=-5;"c"=#;"d"=10000;"e"=10000u};
6+
{"a"=-6;"b"=6;"c"=6u;"d"=-100000;"e"=100000u};
7+
{"a"=#;"b"=7;"c"=7u;"d"=1000000;"e"=1000000u};
8+
{"a"=8;"b"=-8;"c"=8u;"d"=#;"e"=10000000u};
9+
{"a"=-9;"b"=9;"c"=#;"d"=-100000000;"e"=100000000u};
10+
{"a"=10;"b"=#;"c"=10u;"d"=1000000000;"e"=#};
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"_yql_row_spec"={
3+
"Type"=["StructType";[
4+
["a";["OptionalType";["DataType";"Int32"]]];
5+
["b";["OptionalType";["DataType";"Int32"]]];
6+
["c";["OptionalType";["DataType";"Uint32"]]];
7+
["d";["OptionalType";["DataType";"Int64"]]];
8+
["e";["OptionalType";["DataType";"Uint64"]]]
9+
]]
10+
}
11+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
in Input integer_optional.txt

0 commit comments

Comments
 (0)