Skip to content

Commit 91dd3b3

Browse files
authored
YQ-3847 RD support regexp pushdown (#11626)
1 parent 8f7b4e4 commit 91dd3b3

File tree

11 files changed

+318
-76
lines changed

11 files changed

+318
-76
lines changed

ydb/library/yql/providers/common/pushdown/collection.cpp

Lines changed: 105 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -325,18 +325,18 @@ std::vector<TExprBase> GetComparisonNodes(const TExprBase& node) {
325325
return res;
326326
}
327327

328-
bool IsMemberColumn(const TCoMember& member, const TExprNode* lambdaArg) {
329-
return member.Struct().Raw() == lambdaArg;
328+
bool IsMemberColumn(const TCoMember& member, const TExprBase& lambdaArg) {
329+
return member.Struct().Raw() == lambdaArg.Raw();
330330
}
331331

332-
bool IsMemberColumn(const TExprBase& node, const TExprNode* lambdaArg) {
332+
bool IsMemberColumn(const TExprBase& node, const TExprBase& lambdaArg) {
333333
if (auto member = node.Maybe<TCoMember>()) {
334334
return IsMemberColumn(member.Cast(), lambdaArg);
335335
}
336336
return false;
337337
}
338338

339-
bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lambdaArg, const TSettings& settings) {
339+
bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
340340
if (auto maybeSafeCast = node.Maybe<TCoSafeCast>()) {
341341
return IsSupportedCast(maybeSafeCast.Cast(), settings);
342342
} else if (auto maybeData = node.Maybe<TCoDataCtor>()) {
@@ -358,17 +358,40 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
358358
} else if (settings.IsEnabled(TSettings::EFeatureFlag::ParameterExpression) && node.Maybe<TCoParameter>()) {
359359
return true;
360360
} else if (const auto op = node.Maybe<TCoUnaryArithmetic>(); op && settings.IsEnabled(TSettings::EFeatureFlag::UnaryOperators)) {
361-
return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg, settings);
361+
return CheckExpressionNodeForPushdown(op.Cast().Arg(), lambdaArg, lambdaBody, settings);
362362
} else if (const auto op = node.Maybe<TCoBinaryArithmetic>(); op && settings.IsEnabled(TSettings::EFeatureFlag::ArithmeticalExpressions)) {
363363
if (!settings.IsEnabled(TSettings::EFeatureFlag::DivisionExpressions) && (op.Maybe<TCoDiv>() || op.Maybe<TCoMod>())) {
364364
return false;
365365
}
366-
return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg, settings) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg, settings);
366+
return CheckExpressionNodeForPushdown(op.Cast().Left(), lambdaArg, lambdaBody, settings) && CheckExpressionNodeForPushdown(op.Cast().Right(), lambdaArg, lambdaBody, settings);
367+
} else if (settings.IsEnabled(TSettings::EFeatureFlag::JustPassthroughOperators) && (node.Maybe<TCoCoalesce>() || node.Maybe<TCoJust>())) {
368+
for (const auto& childNodePtr : node.Ref().Children()) {
369+
if (!CheckExpressionNodeForPushdown(TExprBase(childNodePtr), lambdaArg, lambdaBody, settings)) {
370+
return false;
371+
}
372+
}
373+
return true;
374+
} else if (auto maybeIf = node.Maybe<TCoIf>()) {
375+
if (!settings.IsEnabled(TSettings::EFeatureFlag::JustPassthroughOperators)) {
376+
return false;
377+
}
378+
379+
const auto& sqlIf = maybeIf.Cast();
380+
const auto& predicate = sqlIf.Predicate();
381+
382+
// Check if predicate pushdown
383+
TPredicateNode ifPredicate(predicate);
384+
CollectPredicates(TExprBase(predicate), ifPredicate, lambdaArg, lambdaBody, settings);
385+
386+
// Check if expressions pushdown
387+
return ifPredicate.CanBePushed
388+
&& CheckExpressionNodeForPushdown(sqlIf.ThenValue(), lambdaArg, lambdaBody, settings)
389+
&& CheckExpressionNodeForPushdown(sqlIf.ElseValue(), lambdaArg, lambdaBody, settings);
367390
}
368391
return false;
369392
}
370393

371-
bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input, const TSettings& settings) {
394+
bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExprBase& lambdaArg, const TExprBase& input, const TSettings& settings) {
372395
const TTypeAnnotationNode* inputType = input.Ptr()->GetTypeAnn();
373396
switch (inputType->GetKind()) {
374397
case ETypeAnnotationKind::Flow:
@@ -396,7 +419,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
396419
YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
397420

398421
for (size_t i = 0; i < leftList.size(); ++i) {
399-
if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, settings)) {
422+
if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, input, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, input, settings)) {
400423
return false;
401424
}
402425

@@ -415,7 +438,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
415438
return true;
416439
}
417440

418-
bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
441+
bool CompareCanBePushed(const TCoCompare& compare, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
419442
if (!IsSupportedPredicate(compare, settings)) {
420443
return false;
421444
}
@@ -427,11 +450,11 @@ bool CompareCanBePushed(const TCoCompare& compare, const TExprNode* lambdaArg, c
427450
return true;
428451
}
429452

430-
bool SqlInCanBePushed(const TCoSqlIn& sqlIn, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
453+
bool SqlInCanBePushed(const TCoSqlIn& sqlIn, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
431454
const TExprBase& expr = sqlIn.Collection();
432455
const TExprBase& lookup = sqlIn.Lookup();
433456

434-
if (!CheckExpressionNodeForPushdown(lookup, lambdaArg, settings)) {
457+
if (!CheckExpressionNodeForPushdown(lookup, lambdaArg, lambdaBody, settings)) {
435458
return false;
436459
}
437460

@@ -446,7 +469,7 @@ bool SqlInCanBePushed(const TCoSqlIn& sqlIn, const TExprNode* lambdaArg, const T
446469

447470
const TTypeAnnotationNode* inputType = lambdaBody.Ptr()->GetTypeAnn();
448471
for (auto& child : collection->Children()) {
449-
if (!CheckExpressionNodeForPushdown(TExprBase(child), lambdaArg, settings)) {
472+
if (!CheckExpressionNodeForPushdown(TExprBase(child), lambdaArg, lambdaBody, settings)) {
450473
return false;
451474
}
452475

@@ -459,14 +482,14 @@ bool SqlInCanBePushed(const TCoSqlIn& sqlIn, const TExprNode* lambdaArg, const T
459482
return true;
460483
}
461484

462-
bool IsDistinctCanBePushed(const TExprBase& predicate, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
485+
bool IsDistinctCanBePushed(const TExprBase& predicate, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
463486
if (predicate.Ref().ChildrenSize() != 2 ) {
464487
return false;
465488
}
466489
auto expr1 = TExprBase(predicate.Ref().Child(0));
467490
auto expr2 = TExprBase(predicate.Ref().Child(1));
468-
if (!CheckExpressionNodeForPushdown(expr1, lambdaArg, settings)
469-
|| !CheckExpressionNodeForPushdown(expr2, lambdaArg, settings)) {
491+
if (!CheckExpressionNodeForPushdown(expr1, lambdaArg, lambdaBody, settings)
492+
|| !CheckExpressionNodeForPushdown(expr2, lambdaArg, lambdaBody, settings)) {
470493
return false;
471494
}
472495
if (!settings.IsEnabled(TSettings::EFeatureFlag::DoNotCheckCompareArgumentsTypes)
@@ -476,7 +499,7 @@ bool IsDistinctCanBePushed(const TExprBase& predicate, const TExprNode* lambdaAr
476499
return true;
477500
}
478501

479-
bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg, const TSettings& settings) {
502+
bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
480503
/*
481504
* There are three ways of comparison in following format:
482505
*
@@ -497,7 +520,7 @@ bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg,
497520
YQL_ENSURE(leftList.size() == rightList.size(), "Different sizes of lists in comparison!");
498521

499522
for (size_t i = 0; i < leftList.size(); ++i) {
500-
if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, settings)) {
523+
if (!CheckExpressionNodeForPushdown(leftList[i], lambdaArg, lambdaBody, settings) || !CheckExpressionNodeForPushdown(rightList[i], lambdaArg, lambdaBody, settings)) {
501524
return false;
502525
}
503526
}
@@ -520,7 +543,7 @@ bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg,
520543
return true;
521544
}
522545

523-
bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprNode* lambdaArg) {
546+
bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprBase& lambdaArg) {
524547
auto maybeMember = jsonExists.Json().Maybe<TCoMember>();
525548
if (!maybeMember || !jsonExists.JsonPath().Maybe<TCoUtf8>()) {
526549
// Currently we support only simple columns in pushdown
@@ -532,7 +555,7 @@ bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprNode* lam
532555
return true;
533556
}
534557

535-
bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
558+
bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
536559
if (!coalesce.Value().Maybe<TCoBool>()) {
537560
return false;
538561
}
@@ -541,7 +564,7 @@ bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg
541564
if (auto maybeCompare = predicate.Maybe<TCoCompare>()) {
542565
return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody, settings);
543566
} else if (auto maybeFlatmap = predicate.Maybe<TCoFlatMap>()) {
544-
return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg, settings);
567+
return SafeCastCanBePushed(maybeFlatmap.Cast(), lambdaArg, lambdaBody, settings);
545568
} else if (settings.IsEnabled(TSettings::EFeatureFlag::JsonExistsOperator) && predicate.Maybe<TCoJsonExists>()) {
546569
auto jsonExists = predicate.Cast<TCoJsonExists>();
547570
return JsonExistsCanBePushed(jsonExists, lambdaArg);
@@ -550,11 +573,67 @@ bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg
550573
return false;
551574
}
552575

553-
bool ExistsCanBePushed(const TCoExists& exists, const TExprNode* lambdaArg) {
576+
bool ExistsCanBePushed(const TCoExists& exists, const TExprBase& lambdaArg) {
554577
return IsMemberColumn(exists.Optional(), lambdaArg);
555578
}
556579

557-
void CollectChildrenPredicates(const TExprNode& opNode, TPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
580+
bool UdfCanBePushed(const TCoUdf& udf, const TExprNode::TListType& children, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
581+
const TString functionName(udf.MethodName());
582+
if (!settings.IsEnabledFunction(functionName)) {
583+
return false;
584+
}
585+
586+
if (functionName == "Re2.Grep") {
587+
if (children.size() != 2) {
588+
// Expected exactly one argument (first child of apply is callable)
589+
return false;
590+
}
591+
592+
const auto& udfSettings = udf.Settings();
593+
if (udfSettings && !udfSettings.Cast().Empty()) {
594+
// Expected empty udf settings
595+
return false;
596+
}
597+
598+
const auto& maybeRunConfig = udf.RunConfigValue();
599+
if (!maybeRunConfig) {
600+
// Expected non empty run config
601+
return false;
602+
}
603+
const auto& runConfig = maybeRunConfig.Cast().Ref();
604+
605+
if (runConfig.ChildrenSize() != 2) {
606+
// Expected exactly two run config settings
607+
return false;
608+
}
609+
if (!TExprBase(runConfig.Child(1)).Maybe<TCoNothing>()) {
610+
// Expected empty regexp settings
611+
return false;
612+
}
613+
614+
return CheckExpressionNodeForPushdown(TExprBase(runConfig.Child(0)), lambdaArg, lambdaBody, settings);
615+
}
616+
return false;
617+
}
618+
619+
bool ApplyCanBePushed(const TCoApply& apply, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
620+
// Check callable
621+
if (auto udf = apply.Callable().Maybe<TCoUdf>()) {
622+
if (!UdfCanBePushed(udf.Cast(), apply.Ref().ChildrenList(), lambdaArg, lambdaBody, settings)) {
623+
return false;
624+
}
625+
}
626+
627+
// Check arguments
628+
for (size_t i = 1; i < apply.Ref().ChildrenSize(); ++i) {
629+
if (!CheckExpressionNodeForPushdown(TExprBase(apply.Ref().Child(i)), lambdaArg, lambdaBody, settings)) {
630+
return false;
631+
}
632+
}
633+
return true;
634+
}
635+
636+
void CollectChildrenPredicates(const TExprNode& opNode, TPredicateNode& predicateTree, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
558637
predicateTree.Children.reserve(opNode.ChildrenSize());
559638
predicateTree.CanBePushed = true;
560639
for (const auto& childNodePtr: opNode.Children()) {
@@ -569,13 +648,13 @@ void CollectChildrenPredicates(const TExprNode& opNode, TPredicateNode& predicat
569648
}
570649
}
571650

572-
void CollectExpressionPredicate(TPredicateNode& predicateTree, const TCoMember& member, const TExprNode* lambdaArg) {
651+
void CollectExpressionPredicate(TPredicateNode& predicateTree, const TCoMember& member, const TExprBase& lambdaArg) {
573652
predicateTree.CanBePushed = IsMemberColumn(member, lambdaArg);
574653
}
575654

576655
} // anonymous namespace end
577656

578-
void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
657+
void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree, const TExprBase& lambdaArg, const TExprBase& lambdaBody, const TSettings& settings) {
579658
if (predicate.Maybe<TCoCoalesce>()) {
580659
if (settings.IsEnabled(TSettings::EFeatureFlag::JustPassthroughOperators))
581660
CollectChildrenPredicates(predicate.Ref(), predicateTree, lambdaArg, lambdaBody, settings);
@@ -618,6 +697,8 @@ void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree
618697
} else if (settings.IsEnabled(TSettings::EFeatureFlag::IsDistinctOperator) &&
619698
(predicate.Ref().IsCallable({"IsNotDistinctFrom", "IsDistinctFrom"}))) {
620699
predicateTree.CanBePushed = IsDistinctCanBePushed(predicate, lambdaArg, lambdaBody, settings);
700+
} else if (auto maybeApply = predicate.Maybe<TCoApply>()) {
701+
predicateTree.CanBePushed = ApplyCanBePushed(maybeApply.Cast(), lambdaArg, lambdaBody, settings);
621702
} else {
622703
predicateTree.CanBePushed = false;
623704
}

ydb/library/yql/providers/common/pushdown/collection.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace NYql::NPushdown {
99

1010
// Collects subpredicate that we can then push down
1111
void CollectPredicates(const NNodes::TExprBase& predicate, TPredicateNode& predicateTree,
12-
const TExprNode* lambdaArg, const NNodes::TExprBase& lambdaBody,
12+
const NNodes::TExprBase& lambdaArg, const NNodes::TExprBase& lambdaBody,
1313
const TSettings& settings);
1414

1515
} // namespace NYql::NPushdown

ydb/library/yql/providers/common/pushdown/physical_opt.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ TMaybeNode<TCoLambda> MakePushdownPredicate(const TCoLambda& lambda, TExprContex
5151

5252
TCoOptionalIf optionalIf = maybeOptionalIf.Cast();
5353
NPushdown::TPredicateNode predicateTree(optionalIf.Predicate());
54-
NPushdown::CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg.Get(), TExprBase(lambdaArg), settings);
54+
NPushdown::CollectPredicates(optionalIf.Predicate(), predicateTree, TExprBase(lambdaArg), TExprBase(lambdaArg), settings);
5555
YQL_ENSURE(predicateTree.IsValid(), "Collected filter predicates are invalid");
5656

5757
NPushdown::TPredicateNode predicateToPush = SplitForPartialPushdown(predicateTree, ctx, pos, settings);

ydb/library/yql/providers/common/pushdown/settings.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,16 @@ void TSettings::Enable(ui64 flagsMask, bool set) {
1010
}
1111
}
1212

13+
void TSettings::EnableFunction(const TString& functionName) {
14+
EnabledFunctions.insert(functionName);
15+
}
16+
1317
bool TSettings::IsEnabled(EFeatureFlag flagMask) const {
1418
return (FeatureFlags & flagMask) != 0;
1519
}
1620

21+
bool TSettings::IsEnabledFunction(const TString& functionName) const {
22+
return EnabledFunctions.contains(functionName);
23+
}
24+
1725
} // namespace NYql::NPushdown

ydb/library/yql/providers/common/pushdown/settings.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#include <util/system/types.h>
55

6+
#include <unordered_set>
7+
68
namespace NYql::NPushdown {
79

810
struct TSettings {
@@ -48,15 +50,20 @@ struct TSettings {
4850

4951
void Enable(ui64 flagsMask, bool set = true);
5052

53+
void EnableFunction(const TString& functionName);
54+
5155
bool IsEnabled(EFeatureFlag flagMask) const;
5256

57+
bool IsEnabledFunction(const TString& functionName) const;
58+
5359
NLog::EComponent GetLogComponent() const {
5460
return LogComponent;
5561
}
5662

5763
private:
5864
const NLog::EComponent LogComponent;
5965
ui64 FeatureFlags = 0;
66+
std::unordered_set<TString> EnabledFunctions;
6067
};
6168

6269
} // namespace NYql::NPushdown

0 commit comments

Comments
 (0)