Skip to content

Commit ccd1e97

Browse files
committed
Pushdown ilike (#18540)
1 parent 206b1d0 commit ccd1e97

File tree

9 files changed

+101
-34
lines changed

9 files changed

+101
-34
lines changed

ydb/core/kqp/expr_nodes/kqp_expr_nodes.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,8 @@
697697
"Match": {"Type": "Callable", "Name": "KqpOlapApply"},
698698
"Children": [
699699
{"Index": 0, "Name": "Lambda", "Type": "TCoLambda"},
700-
{"Index": 1, "Name": "Args", "Type": "TExprList"}
700+
{"Index": 1, "Name": "Args", "Type": "TExprList"},
701+
{"Index": 2, "Name": "KernelName", "Type": "TCoAtom"}
701702
]
702703
},
703704
{

ydb/core/kqp/host/kqp_type_ann.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ TStatus AnnotateOlapApplyColumnArg(const TExprNode::TPtr& node, TExprContext& ct
11531153
}
11541154

11551155
TStatus AnnotateOlapApply(const TExprNode::TPtr& node, TExprContext& ctx) {
1156-
if (!EnsureArgsCount(*node, 2U, ctx)) {
1156+
if (!EnsureArgsCount(*node, 3U, ctx)) {
11571157
return TStatus::Error;
11581158
}
11591159

@@ -1177,6 +1177,10 @@ TStatus AnnotateOlapApply(const TExprNode::TPtr& node, TExprContext& ctx) {
11771177
return TStatus::Repeat;
11781178
}
11791179

1180+
if (!EnsureAtom(*node->Child(TKqpOlapApply::idx_KernelName), ctx)) {
1181+
return TStatus::Error;
1182+
}
1183+
11801184
node->SetTypeAnn(lambda->GetTypeAnn());
11811185
return TStatus::Ok;
11821186
}

ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ TMaybeNode<TExprBase> YqlApplyPushdown(const TExprBase& apply, const TExprNode&
253253
return Build<TKqpOlapApply>(ctx, apply.Pos())
254254
.Lambda(ctx.NewLambda(apply.Pos(), ctx.NewArguments(argument.Pos(), std::move(lambdaArgs)), ctx.ReplaceNodes(apply.Ptr(), replacements)))
255255
.Args().Add(std::move(realArgs)).Build()
256+
.KernelName(ctx.NewAtom(apply.Pos(), ""))
256257
.Done();
257258
}
258259

@@ -466,6 +467,32 @@ TExprBase BuildOneElementComparison(const std::pair<TExprBase, TExprBase>& param
466467
.Done();
467468
}
468469

470+
if (const auto* stringUdfFunction = IgnoreCaseSubstringMatchFunctions.FindPtr(predicate.CallableName())) {
471+
const auto& leftArg = ctx.NewArgument(pos, "left");
472+
const auto& rightArg = ctx.NewArgument(pos, "right");
473+
474+
const auto& callUdfLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, {leftArg, rightArg}),
475+
ctx.Builder(pos)
476+
.Callable("Apply")
477+
.Callable(0, "Udf")
478+
.Atom(0, *stringUdfFunction)
479+
.Seal()
480+
.Add(1, leftArg)
481+
.Add(2, rightArg)
482+
.Seal()
483+
.Build()
484+
);
485+
486+
return Build<TKqpOlapApply>(ctx, pos)
487+
.Lambda(callUdfLambda)
488+
.Args()
489+
.Add(parameter.first)
490+
.Add(parameter.second)
491+
.Build()
492+
.KernelName(ctx.NewAtom(pos, *stringUdfFunction))
493+
.Done();
494+
}
495+
469496
std::string compareOperator = "";
470497

471498
if (predicate.Maybe<TCoCmpEqual>()) {

ydb/core/kqp/opt/physical/predicate_collector.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@ namespace NKikimr::NKqp::NOpt {
99
using namespace NYql;
1010
using namespace NYql::NNodes;
1111

12+
THashMap<TString, TString> IgnoreCaseSubstringMatchFunctions = {
13+
{"EqualsIgnoreCase", "String.AsciiEqualsIgnoreCase"},
14+
{"StartsWithIgnoreCase", "String.AsciiStartsWithIgnoreCase"},
15+
{"EndsWithIgnoreCase", "String.AsciiEndsWithIgnoreCase"},
16+
{"StringContainsIgnoreCase", "String.AsciiContainsIgnoreCase"}
17+
};
18+
1219
namespace {
1320

1421
bool IsSupportedPredicate(const TCoCompare& predicate) {
@@ -295,6 +302,18 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
295302
}
296303
}
297304

305+
if (options.PushdownSubstring) { //EnableSimpleIlikePushdown FF
306+
if (IgnoreCaseSubstringMatchFunctions.contains(compare.CallableName())) {
307+
const auto& right = compare.Right().Ref();
308+
YQL_ENSURE(right.IsCallable("String") || right.IsCallable("Utf8"));
309+
const auto pattern = right.Child(0);
310+
YQL_ENSURE(pattern->IsAtom());
311+
if (UTF8Detect(pattern->Content()) != ASCII) {
312+
return false;
313+
}
314+
}
315+
}
316+
298317
return true;
299318
}
300319

ydb/core/kqp/opt/physical/predicate_collector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ struct TPushdownOptions {
2121
bool PushdownSubstring;
2222
};
2323

24+
extern THashMap<TString, TString> IgnoreCaseSubstringMatchFunctions;
25+
2426
void CollectPredicates(const NNodes::TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody, const TPushdownOptions& options);
2527

2628
}

ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,7 @@ TTypedColumn CompileYqlKernelScalarApply(const TKqpOlapApply& apply, TKqpOlapCom
608608
const auto idx = ctx.GetKernelRequestBuilder().AddScalarApply(apply.Lambda().Ref(), argTypes, ctx.ExprCtx());
609609
function->SetKernelIdx(idx);
610610
function->SetFunctionType(TProgram::YQL_KERNEL);
611+
function->SetKernelName(apply.KernelName().StringValue());
611612
std::for_each(ids.cbegin(), ids.cend(), [function] (ui64 id) { function->AddArguments()->SetId(id); });
612613
return {command->GetColumn().GetId(), ctx.ExprCtx().MakeType<TBlockExprType>(apply.Lambda().Body().Ref().GetTypeAnn())};
613614
}

ydb/core/kqp/ut/olap/combinatory/select.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ TConclusionStatus TSelectCommand::DoExecute(TKikimrRunner& kikimr) {
1616
const i64 headerApproveStart = controller->GetHeadersApprovedOnSelect().Val();
1717
const i64 headerNoDataStart = controller->GetHeadersSkippedNoData().Val();
1818

19-
Cerr << "EXECUTE: " << Command << Endl;
19+
const auto command = "PRAGMA OptimizeSimpleILIKE; PRAGMA AnsiLIke;" + Command;
20+
Cerr << "EXECUTE: " << command << Endl;
2021
auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession();
21-
auto it = kikimr.GetQueryClient().StreamExecuteQuery(Command, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync();
22+
auto it = kikimr.GetQueryClient().StreamExecuteQuery(command, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync();
2223
UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), NYdb::EStatus::SUCCESS, it.GetIssues().ToString());
2324
TString output = StreamResultToYson(it);
2425
if (Compare) {

ydb/core/kqp/ut/olap/json_ut.cpp

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -633,22 +633,22 @@ Y_UNIT_TEST_SUITE(KqpOlapJson) {
633633
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") like "%1b4%" ORDER BY Col1;
634634
EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
635635
IDX_ND_SKIP_APPROVE: 0, 4, 1
636-
# ------
637-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1b4%" ORDER BY Col1;
638-
# EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
639-
# IDX_ND_SKIP_APPROVE: 0, 4, 1
640-
# ------
641-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1B4" ORDER BY Col1;
642-
# EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
643-
# IDX_ND_SKIP_APPROVE: 0, 4, 1
644-
# ------
645-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "1b5" ORDER BY Col1;
646-
# EXPECTED: []
647-
# IDX_ND_SKIP_APPROVE: 0, 5, 0
648-
# ------
649-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.a") = "1b5" ORDER BY Col1;
650-
# EXPECTED: []
651-
# IDX_ND_SKIP_APPROVE: 0, 5, 0
636+
------
637+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1b4%" ORDER BY Col1;
638+
EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
639+
IDX_ND_SKIP_APPROVE: 0, 4, 1
640+
------
641+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1B4" ORDER BY Col1;
642+
EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
643+
IDX_ND_SKIP_APPROVE: 0, 4, 1
644+
------
645+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "1b5" ORDER BY Col1;
646+
EXPECTED: []
647+
IDX_ND_SKIP_APPROVE: 0, 5, 0
648+
------
649+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.a") = "1b5" ORDER BY Col1;
650+
EXPECTED: []
651+
IDX_ND_SKIP_APPROVE: 0, 5, 0
652652
------
653653
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.a") = "a4" ORDER BY Col1;
654654
EXPECTED: [[4u;["{\"a\":\"a4\",\"b.c.d\":\"b4\"}"]];[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
@@ -800,18 +800,18 @@ Y_UNIT_TEST_SUITE(KqpOlapJson) {
800800
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") like "%1b4%" ORDER BY Col1;
801801
EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
802802
IDX_ND_SKIP_APPROVE: 0, 4, 1
803-
# ------
804-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1b4%" ORDER BY Col1;
805-
# EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
806-
# IDX_ND_SKIP_APPROVE: 0, 4, 1
807-
# ------
808-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1B4" ORDER BY Col1;
809-
# EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
810-
# IDX_ND_SKIP_APPROVE: 0, 4, 1
811-
# ------
812-
# READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "1b5" ORDER BY Col1;
813-
# EXPECTED: []
814-
# IDX_ND_SKIP_APPROVE: 0, 5, 0
803+
------
804+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1b4%" ORDER BY Col1;
805+
EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
806+
IDX_ND_SKIP_APPROVE: 0, 4, 1
807+
------
808+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "%1B4" ORDER BY Col1;
809+
EXPECTED: [[14u;["{\"a\":\"a4\",\"b.c.d\":\"1b4\"}"]]]
810+
IDX_ND_SKIP_APPROVE: 0, 4, 1
811+
------
812+
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.\"b.c.d\"") ilike "1b5" ORDER BY Col1;
813+
EXPECTED: []
814+
IDX_ND_SKIP_APPROVE: 0, 5, 0
815815
------
816816
READ: SELECT * FROM `/Root/ColumnTable` WHERE JSON_VALUE(Col2, "$.a") = "1b5" ORDER BY Col1;
817817
EXPECTED: []

ydb/core/tx/program/builder.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,20 @@ namespace NKikimr::NArrow::NSSA {
2020
TConclusion<std::shared_ptr<IStepFunction>> TProgramBuilder::MakeFunction(const TColumnInfo& name,
2121
const NKikimrSSA::TProgram::TAssignment::TFunction& func, std::shared_ptr<NArrow::NSSA::IKernelLogic>& kernelLogic,
2222
std::vector<TColumnChainInfo>& arguments) const {
23-
if (func.GetKernelName()) {
24-
kernelLogic.reset(IKernelLogic::TFactory::Construct(func.GetKernelName()));
23+
if (const auto& kernelName = func.GetKernelName(); !kernelName.empty()) {
24+
if (kernelName == "String.AsciiEqualsIgnoreCase") {
25+
kernelLogic = std::make_shared<TLogicMatchString>(TIndexCheckOperation::EOperation::Contains, false, false);
26+
} else if (kernelName == "String.AsciiContainsIgnoreCase") {
27+
kernelLogic = std::make_shared<TLogicMatchString>(TIndexCheckOperation::EOperation::Contains, false, false);
28+
} else if (kernelName == "String.AsciiContainsIgnoreCase") {
29+
kernelLogic = std::make_shared<TLogicMatchString>(TIndexCheckOperation::EOperation::Contains, false, false);
30+
} else if (kernelName == "String.AsciiStartsWithIgnoreCase") {
31+
kernelLogic = std::make_shared<TLogicMatchString>(TIndexCheckOperation::EOperation::StartsWith, false, false);
32+
} else if (kernelName == "String.AsciiEndsWithIgnoreCase") {
33+
kernelLogic = std::make_shared<TLogicMatchString>(TIndexCheckOperation::EOperation::EndsWith, false, false);
34+
} else {
35+
kernelLogic.reset(IKernelLogic::TFactory::Construct(kernelName));
36+
}
2537
} else if (func.HasYqlOperationId()) {
2638
kernelLogic = std::make_shared<TSimpleKernelLogic>(func.GetYqlOperationId());
2739
} else {

0 commit comments

Comments
 (0)