@@ -782,7 +782,146 @@ std::pair<TVector<TOLAPPredicateNode>, TVector<TOLAPPredicateNode>> SplitForPart
782
782
return {pushable, remaining};
783
783
}
784
784
785
- } // anonymous namespace end
785
+ bool IsSuitableToCollectProjection (TExprBase node) {
786
+ // Currently support only `JsonDocument`.
787
+ if (auto maybeJsonValue = node.Maybe <TCoJsonValue>()) {
788
+ auto jsonMember = maybeJsonValue.Cast ().Json ().Maybe <TCoMember>();
789
+ auto jsonPath = maybeJsonValue.Cast ().JsonPath ().Maybe <TCoUtf8>();
790
+ return jsonMember && jsonPath;
791
+ }
792
+ return false ;
793
+ }
794
+
795
+ // Collects all operations for projections and returns a vector of pair - [columName, olap operation].
796
+ TVector<std::pair<TString, TExprNode::TPtr>> CollectOlapOperationsForProjections (const TExprNode::TPtr& node, const TExprNode& arg,
797
+ TNodeOnNodeOwnedMap& replaces,
798
+ const THashSet<TString>& predicateMembers, TExprContext& ctx) {
799
+ auto asStructPred = [](const TExprNode::TPtr& node) -> bool { return !!TMaybeNode<TCoAsStruct>(node); };
800
+ auto memberPred = [](const TExprNode::TPtr& node) { return !!TMaybeNode<TCoMember>(node); };
801
+
802
+ TVector<std::pair<TString, TExprNode::TPtr>> olapOperationsForProjections;
803
+ // Expressions for projections are placed in `AsStruct` callable.
804
+ if (auto asStruct = FindNode (node, asStructPred)) {
805
+ // Process each child for `AsStruct` callable.
806
+ for (auto child : TExprBase (asStruct).Cast <TCoAsStruct>()) {
807
+ if (IsSuitableToCollectProjection (child.Item (1 ))) {
808
+ // Search for the `TCoMember` in expression, we need expression with only one `TCoMember`.
809
+ if (auto originalMembers = FindNodes (child.Item (1 ).Ptr (), memberPred); originalMembers.size () == 1 ) {
810
+ // Convert YQL op to OLAP op.
811
+ if (auto olapOperations = ConvertComparisonNode (TExprBase (child.Item (1 )), arg, ctx, node->Pos (), false );
812
+ olapOperations.size () == 1 ) {
813
+ auto originalMember = TExprBase (originalMembers.front ()).Cast <TCoMember>();
814
+
815
+ auto originalMemberName = TString (originalMember.Name ());
816
+ // We cannot push projection if some predicate for the same column still not pushed.
817
+ if (!predicateMembers.contains (originalMemberName)) {
818
+ auto newMember = Build<TCoMember>(ctx, node->Pos ())
819
+ .Struct (originalMember.Struct ())
820
+ .Name (originalMember.Name ())
821
+ .Done ();
822
+
823
+ auto olapOperation = olapOperations.front ();
824
+ // Replace full expression with only member.
825
+ replaces[child.Item (1 ).Raw ()] = newMember.Ptr ();
826
+ olapOperationsForProjections.emplace_back (TString (newMember.Name ()), olapOperation.Ptr ());
827
+
828
+ YQL_CLOG (TRACE, ProviderKqp)
829
+ << " [OLAP PROJECTION] Operation in olap dialect: " << KqpExprToPrettyString (olapOperation, ctx);
830
+ }
831
+ }
832
+ }
833
+ }
834
+ }
835
+ }
836
+
837
+ return olapOperationsForProjections;
838
+ }
839
+
840
+ void CollectPredicateMembers (TExprNode::TPtr predicate, THashSet<TString>& predicateMembers) {
841
+ auto memberPred = [](const TExprNode::TPtr& node) { return !!TMaybeNode<TCoMember>(node); };
842
+ auto members = FindNodes (predicate, memberPred);
843
+ for (const auto & member : members) {
844
+ predicateMembers.insert (TString (TExprBase (member).Cast <TCoMember>().Name ()));
845
+ }
846
+ }
847
+
848
+ } // anonymous namespace end
849
+
850
+ TExprBase KqpPushOlapProjections (TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx,
851
+ TTypeAnnotationContext& typesCtx)
852
+ {
853
+ Y_UNUSED (typesCtx);
854
+ if (!(kqpCtx.Config ->HasOptEnableOlapPushdown () && kqpCtx.Config ->HasOptEnableOlapPushdownProjections ())) {
855
+ return node;
856
+ }
857
+
858
+ if (!node.Maybe <TCoFlatMap>().Input ().Maybe <TKqpReadOlapTableRanges>()) {
859
+ return node;
860
+ }
861
+
862
+ auto flatmap = node.Cast <TCoFlatMap>();
863
+ const auto & lambda = flatmap.Lambda ();
864
+
865
+ // Collect `TCoMembers` from predicate, we cannot push projection if some predicate for the same column still not pushed.
866
+ THashSet<TString> predicateMembers;
867
+ if (auto maybeOptionalIf = lambda.Body ().Maybe <TCoOptionalIf>()) {
868
+ CollectPredicateMembers (maybeOptionalIf.Cast ().Predicate ().Ptr (), predicateMembers);
869
+ }
870
+
871
+ // Combinations of `OlapAgg` and `OlapProjections` are not supported yet.
872
+ auto olapAggPred = [](const TExprNode::TPtr& node) -> bool { return !!TMaybeNode<TKqpOlapAgg>(node); };
873
+ if (auto maybeOlapAgg = FindNode (lambda.Body ().Ptr (), olapAggPred)) {
874
+ return node;
875
+ }
876
+
877
+ const auto & lambdaArg = lambda.Args ().Arg (0 ).Ref ();
878
+ auto read = flatmap.Input ().Cast <TKqpReadOlapTableRanges>();
879
+
880
+ TNodeOnNodeOwnedMap replaces;
881
+ auto olapOperationsForProjections = CollectOlapOperationsForProjections (flatmap.Ptr (), lambdaArg, replaces, predicateMembers, ctx);
882
+ if (olapOperationsForProjections.empty ()) {
883
+ return node;
884
+ }
885
+
886
+ TVector<TExprBase> projections;
887
+ for (const auto & [columnName, olapOperation] : olapOperationsForProjections) {
888
+ auto olapProjection = Build<TKqpOlapProjection>(ctx, node.Pos ())
889
+ .OlapOperation (olapOperation)
890
+ .ColumnName ().Build (columnName)
891
+ .Done ();
892
+ projections.push_back (olapProjection);
893
+ }
894
+
895
+ auto olapProjections = Build<TKqpOlapProjections>(ctx, node.Pos ())
896
+ .Input (read.Process ().Body ())
897
+ .Projections ()
898
+ .Add (projections)
899
+ .Build ()
900
+ .Done ();
901
+
902
+ auto newLambda = Build<TCoLambda>(ctx, node.Pos ())
903
+ .Args ({" arg" })
904
+ .Body <TExprApplier>()
905
+ .Apply (olapProjections)
906
+ .With (read.Process ().Args ().Arg (0 ), " arg" )
907
+ .Build ()
908
+ .Done ();
909
+
910
+ auto newRead = Build<TKqpReadOlapTableRanges>(ctx, node.Pos ())
911
+ .Table (read.Table ())
912
+ .Ranges (read.Ranges ())
913
+ .Columns (read.Columns ())
914
+ .Settings (read.Settings ())
915
+ .ExplainPrompt (read.ExplainPrompt ())
916
+ .Process (newLambda)
917
+ .Done ();
918
+
919
+ replaces[read.Raw ()] = newRead.Ptr ();
920
+ auto newFlatmap = TExprBase (TExprBase (ctx.ReplaceNodes (flatmap.Ptr (), replaces)).Cast <TCoFlatMap>());
921
+
922
+ YQL_CLOG (TRACE, ProviderKqp) << " [OLAP PROJECTION] After rewrite: " << KqpExprToPrettyString (newFlatmap, ctx);
923
+ return newFlatmap;
924
+ }
786
925
787
926
TExprBase KqpPushOlapFilter (TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx,
788
927
TTypeAnnotationContext& typesCtx)
@@ -808,7 +947,6 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
808
947
809
948
const auto & lambda = flatmap.Lambda ();
810
949
const auto & lambdaArg = lambda.Args ().Arg (0 ).Ref ();
811
-
812
950
YQL_CLOG (TRACE, ProviderKqp) << " Initial OLAP lambda: " << KqpExprToPrettyString (lambda, ctx);
813
951
814
952
const auto maybeOptionalIf = lambda.Body ().Maybe <TCoOptionalIf>();
@@ -819,6 +957,8 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
819
957
const auto & optionaIf = maybeOptionalIf.Cast ();
820
958
auto predicate = optionaIf.Predicate ();
821
959
auto value = optionaIf.Value ();
960
+ // Use original value in final flatmap, because we need an original ast for the given value in `KqpPushOlapProjection`.
961
+ auto originalValue = value;
822
962
823
963
TOLAPPredicateNode predicateTree;
824
964
predicateTree.ExprNode = predicate.Ptr ();
@@ -938,7 +1078,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
938
1078
.With (lambda.Args ().Arg (0 ), " new_arg" )
939
1079
.Build ()
940
1080
.Value <TExprApplier>()
941
- .Apply (value )
1081
+ .Apply (originalValue )
942
1082
.With (lambda.Args ().Arg (0 ), " new_arg" )
943
1083
.Build ()
944
1084
.Build ()
0 commit comments