@@ -49,7 +49,7 @@ bool IsSupportedDataType(const TCoDataCtor& node, bool allowOlapApply) {
49
49
return false ;
50
50
}
51
51
52
- bool IsSupportedCast (const TCoSafeCast& cast, bool allowOlapApply= false ) {
52
+ bool IsSupportedCast (const TCoSafeCast& cast, bool allowOlapApply) {
53
53
auto maybeDataType = cast.Type ().Maybe <TCoDataType>();
54
54
if (!maybeDataType) {
55
55
if (const auto maybeOptionalType = cast.Type ().Maybe <TCoOptionalType>()) {
@@ -111,8 +111,26 @@ bool IsGoodTypeForComparsionPushdown(const TTypeAnnotationNode& type, bool allow
111
111
NUdf::EDataTypeFeatures::TimeIntervalType) & features) && !(NUdf::EDataTypeFeatures::TzDateType & features)));
112
112
}
113
113
114
+ bool CanPushdownStringUdf (const TExprNode& udf, bool pushdownSubstring) {
115
+ if (!pushdownSubstring) {
116
+ return false ;
117
+ }
118
+ const auto & name = udf.Head ().Content ();
119
+ static const THashSet<TString> substringMatchUdfs = {
120
+ " String.AsciiEqualsIgnoreCase" ,
121
+
122
+ " String.Contains" ,
123
+ " String.AsciiContainsIgnoreCase" ,
124
+ " String.StartsWith" ,
125
+ " String.AsciiStartsWithIgnoreCase" ,
126
+ " String.EndsWith" ,
127
+ " String.AsciiEndsWithIgnoreCase"
128
+ };
129
+ return substringMatchUdfs.contains (name);
130
+ }
131
+
114
132
[[maybe_unused]]
115
- bool AbstractTreeCanBePushed (const TExprBase& expr, const TExprNode* ) {
133
+ bool AbstractTreeCanBePushed (const TExprBase& expr, const TExprNode*, bool pushdownSubstring ) {
116
134
if (!expr.Ref ().IsCallable ({" Apply" , " Coalesce" , " NamedApply" , " IfPresent" , " Visit" })) {
117
135
return false ;
118
136
}
@@ -132,7 +150,7 @@ bool AbstractTreeCanBePushed(const TExprBase& expr, const TExprNode* ) {
132
150
for (const auto & apply : applies) {
133
151
const auto & udf = SkipCallables (apply->Head (), {" AssumeStrict" });
134
152
const auto & udfName = udf.Head ();
135
- if (!(udfName.Content ().starts_with (" Json2." ) || udfName.Content ().starts_with (" Re2." ))) {
153
+ if (!(udfName.Content ().starts_with (" Json2." ) || udfName.Content ().starts_with (" Re2." ) || CanPushdownStringUdf (udf, pushdownSubstring) )) {
136
154
return false ;
137
155
}
138
156
@@ -159,8 +177,8 @@ bool IfPresentCanBePushed(const TCoIfPresent& ifPresent, const TExprNode* lambda
159
177
return allowOlapApply;
160
178
}
161
179
162
- bool CheckExpressionNodeForPushdown (const TExprBase& node, const TExprNode* lambdaArg, bool allowOlapApply ) {
163
- if (allowOlapApply ) {
180
+ bool CheckExpressionNodeForPushdown (const TExprBase& node, const TExprNode* lambdaArg, const TPushdownOptions& options ) {
181
+ if (options. AllowOlapApply ) {
164
182
if (node.Maybe <TCoJust>() || node.Maybe <TCoCoalesce>()) {
165
183
return true ;
166
184
}
@@ -174,9 +192,9 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
174
192
}
175
193
176
194
if (const auto maybeSafeCast = node.Maybe <TCoSafeCast>()) {
177
- return IsSupportedCast (maybeSafeCast.Cast (), allowOlapApply );
195
+ return IsSupportedCast (maybeSafeCast.Cast (), options. AllowOlapApply );
178
196
} else if (const auto maybeData = node.Maybe <TCoDataCtor>()) {
179
- return IsSupportedDataType (maybeData.Cast (), allowOlapApply );
197
+ return IsSupportedDataType (maybeData.Cast (), options. AllowOlapApply );
180
198
} else if (const auto maybeMember = node.Maybe <TCoMember>()) {
181
199
return IsMemberColumn (maybeMember.Cast (), lambdaArg);
182
200
} else if (const auto maybeJsonValue = node.Maybe <TCoJsonValue>()) {
@@ -187,23 +205,23 @@ bool CheckExpressionNodeForPushdown(const TExprBase& node, const TExprNode* lamb
187
205
}
188
206
189
207
if (const auto op = node.Maybe <TCoUnaryArithmetic>()) {
190
- return CheckExpressionNodeForPushdown (op.Cast ().Arg (), lambdaArg, allowOlapApply ) && IsGoodTypeForArithmeticPushdown (*op.Cast ().Ref ().GetTypeAnn (), allowOlapApply );
208
+ return CheckExpressionNodeForPushdown (op.Cast ().Arg (), lambdaArg, options ) && IsGoodTypeForArithmeticPushdown (*op.Cast ().Ref ().GetTypeAnn (), options. AllowOlapApply );
191
209
} else if (const auto op = node.Maybe <TCoBinaryArithmetic>()) {
192
- return CheckExpressionNodeForPushdown (op.Cast ().Left (), lambdaArg, allowOlapApply ) && CheckExpressionNodeForPushdown (op.Cast ().Right (), lambdaArg, allowOlapApply )
193
- && IsGoodTypeForArithmeticPushdown (*op.Cast ().Ref ().GetTypeAnn (), allowOlapApply ) && !op.Cast ().Maybe <TCoAggrAdd>();
210
+ return CheckExpressionNodeForPushdown (op.Cast ().Left (), lambdaArg, options ) && CheckExpressionNodeForPushdown (op.Cast ().Right (), lambdaArg, options )
211
+ && IsGoodTypeForArithmeticPushdown (*op.Cast ().Ref ().GetTypeAnn (), options. AllowOlapApply ) && !op.Cast ().Maybe <TCoAggrAdd>();
194
212
}
195
213
196
- if (allowOlapApply ) {
214
+ if (options. AllowOlapApply ) {
197
215
if (const auto maybeIfPresent = node.Maybe <TCoIfPresent>()) {
198
- return IfPresentCanBePushed (maybeIfPresent.Cast (), lambdaArg, allowOlapApply );
216
+ return IfPresentCanBePushed (maybeIfPresent.Cast (), lambdaArg, options. AllowOlapApply );
199
217
}
200
- return AbstractTreeCanBePushed (node, lambdaArg);
218
+ return AbstractTreeCanBePushed (node, lambdaArg, options. PushdownSubstring );
201
219
}
202
220
203
221
return false ;
204
222
}
205
223
206
- bool IsGoodTypesForPushdownCompare (const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo, bool allowOlapApply ) {
224
+ bool IsGoodTypesForPushdownCompare (const TTypeAnnotationNode& typeOne, const TTypeAnnotationNode& typeTwo, const TPushdownOptions& options ) {
207
225
const auto & rawOne = RemoveOptionality (typeOne);
208
226
const auto & rawTwo = RemoveOptionality (typeTwo);
209
227
if (IsSameAnnotation (rawOne, rawTwo))
@@ -225,22 +243,22 @@ bool IsGoodTypesForPushdownCompare(const TTypeAnnotationNode& typeOne, const TTy
225
243
if (size != itemsTwo.size ())
226
244
return false ;
227
245
for (auto i = 0U ; i < size; ++i) {
228
- if (!IsGoodTypesForPushdownCompare (*itemsOne[i], *itemsTwo[i], allowOlapApply )) {
246
+ if (!IsGoodTypesForPushdownCompare (*itemsOne[i], *itemsTwo[i], options )) {
229
247
return false ;
230
248
}
231
249
}
232
250
return true ;
233
251
}
234
252
case ETypeAnnotationKind::Data: {
235
- return IsGoodTypeForComparsionPushdown (typeOne, allowOlapApply ) && IsGoodTypeForComparsionPushdown (typeTwo, allowOlapApply );
253
+ return IsGoodTypeForComparsionPushdown (typeOne, options. AllowOlapApply ) && IsGoodTypeForComparsionPushdown (typeTwo, options. AllowOlapApply );
236
254
}
237
255
default :
238
256
break ;
239
257
}
240
258
return false ;
241
259
}
242
260
243
- bool CheckComparisonParametersForPushdown (const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input, bool allowOlapApply ) {
261
+ bool CheckComparisonParametersForPushdown (const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& input, const TPushdownOptions& options ) {
244
262
const auto * inputType = input.Ref ().GetTypeAnn ();
245
263
switch (inputType->GetKind ()) {
246
264
case ETypeAnnotationKind::Flow:
@@ -263,7 +281,7 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
263
281
return false ;
264
282
}
265
283
266
- if (!IsGoodTypesForPushdownCompare (*compare.Left ().Ref ().GetTypeAnn (), *compare.Right ().Ref ().GetTypeAnn (), allowOlapApply )) {
284
+ if (!IsGoodTypesForPushdownCompare (*compare.Left ().Ref ().GetTypeAnn (), *compare.Right ().Ref ().GetTypeAnn (), options )) {
267
285
return false ;
268
286
}
269
287
@@ -272,19 +290,19 @@ bool CheckComparisonParametersForPushdown(const TCoCompare& compare, const TExpr
272
290
YQL_ENSURE (leftList.size () == rightList.size (), " Different sizes of lists in comparison!" );
273
291
274
292
for (size_t i = 0 ; i < leftList.size (); ++i) {
275
- if (!CheckExpressionNodeForPushdown (leftList[i], lambdaArg, allowOlapApply ) || !CheckExpressionNodeForPushdown (rightList[i], lambdaArg, allowOlapApply )) {
293
+ if (!CheckExpressionNodeForPushdown (leftList[i], lambdaArg, options ) || !CheckExpressionNodeForPushdown (rightList[i], lambdaArg, options )) {
276
294
return false ;
277
295
}
278
296
}
279
297
280
298
return true ;
281
299
}
282
300
283
- bool CompareCanBePushed (const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply ) {
284
- return IsSupportedPredicate (compare) && CheckComparisonParametersForPushdown (compare, lambdaArg, lambdaBody, allowOlapApply );
301
+ bool CompareCanBePushed (const TCoCompare& compare, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TPushdownOptions& options ) {
302
+ return IsSupportedPredicate (compare) && CheckComparisonParametersForPushdown (compare, lambdaArg, lambdaBody, options );
285
303
}
286
304
287
- bool SafeCastCanBePushed (const TCoFlatMap& flatmap, const TExprNode* lambdaArg, bool allowOlapApply ) {
305
+ bool SafeCastCanBePushed (const TCoFlatMap& flatmap, const TExprNode* lambdaArg, const TPushdownOptions& options ) {
288
306
/*
289
307
* There are three ways of comparison in following format:
290
308
*
@@ -305,7 +323,7 @@ bool SafeCastCanBePushed(const TCoFlatMap& flatmap, const TExprNode* lambdaArg,
305
323
YQL_ENSURE (leftList.size () == rightList.size (), " Different sizes of lists in comparison!" );
306
324
307
325
for (size_t i = 0 ; i < leftList.size (); ++i) {
308
- if (!CheckExpressionNodeForPushdown (leftList[i], lambdaArg, allowOlapApply ) || !CheckExpressionNodeForPushdown (rightList[i], lambdaArg, allowOlapApply )) {
326
+ if (!CheckExpressionNodeForPushdown (leftList[i], lambdaArg, options ) || !CheckExpressionNodeForPushdown (rightList[i], lambdaArg, options )) {
309
327
return false ;
310
328
}
311
329
}
@@ -339,20 +357,20 @@ bool JsonExistsCanBePushed(const TCoJsonExists& jsonExists, const TExprNode* lam
339
357
return true ;
340
358
}
341
359
342
- bool CoalesceCanBePushed (const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply ) {
360
+ bool CoalesceCanBePushed (const TCoCoalesce& coalesce, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TPushdownOptions& options ) {
343
361
if (!coalesce.Value ().Maybe <TCoBool>()) {
344
362
return false ;
345
363
}
346
364
347
365
const auto predicate = coalesce.Predicate ();
348
366
if (const auto maybeCompare = predicate.Maybe <TCoCompare>()) {
349
- return CompareCanBePushed (maybeCompare.Cast (), lambdaArg, lambdaBody, allowOlapApply );
367
+ return CompareCanBePushed (maybeCompare.Cast (), lambdaArg, lambdaBody, options );
350
368
} else if (const auto maybeFlatmap = predicate.Maybe <TCoFlatMap>()) {
351
- return SafeCastCanBePushed (maybeFlatmap.Cast (), lambdaArg, allowOlapApply );
369
+ return SafeCastCanBePushed (maybeFlatmap.Cast (), lambdaArg, options );
352
370
} else if (const auto maybeJsonExists = predicate.Maybe <TCoJsonExists>()) {
353
371
return JsonExistsCanBePushed (maybeJsonExists.Cast (), lambdaArg);
354
372
} else if (const auto maybeIfPresent = predicate.Maybe <TCoIfPresent>()) {
355
- return IfPresentCanBePushed (maybeIfPresent.Cast (), lambdaArg, allowOlapApply );
373
+ return IfPresentCanBePushed (maybeIfPresent.Cast (), lambdaArg, options. AllowOlapApply );
356
374
}
357
375
358
376
return false ;
@@ -362,7 +380,7 @@ bool ExistsCanBePushed(const TCoExists& exists, const TExprNode* lambdaArg) {
362
380
return IsMemberColumn (exists.Optional (), lambdaArg);
363
381
}
364
382
365
- void CollectChildrenPredicates (const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply ) {
383
+ void CollectChildrenPredicates (const TExprNode& opNode, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TPushdownOptions& options ) {
366
384
predicateTree.Children .reserve (opNode.ChildrenSize ());
367
385
predicateTree.CanBePushed = true ;
368
386
predicateTree.CanBePushedApply = true ;
@@ -374,8 +392,9 @@ void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& pred
374
392
child.CanBePushed = IsSupportedDataType (maybeCtor.Cast (), false );
375
393
child.CanBePushedApply = IsSupportedDataType (maybeCtor.Cast (), true );
376
394
}
377
- else
378
- CollectPredicates (TExprBase (child.ExprNode ), child, lambdaArg, lambdaBody, allowOlapApply);
395
+ else {
396
+ CollectPredicates (TExprBase (child.ExprNode ), child, lambdaArg, lambdaBody, options);
397
+ }
379
398
predicateTree.Children .emplace_back (child);
380
399
predicateTree.CanBePushed &= child.CanBePushed ;
381
400
predicateTree.CanBePushedApply &= child.CanBePushedApply ;
@@ -384,15 +403,15 @@ void CollectChildrenPredicates(const TExprNode& opNode, TOLAPPredicateNode& pred
384
403
385
404
} // namespace
386
405
387
- void CollectPredicates (const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, bool allowOlapApply ) {
406
+ void CollectPredicates (const TExprBase& predicate, TOLAPPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody, const TPushdownOptions& options ) {
388
407
if (predicate.Maybe <TCoNot>() || predicate.Maybe <TCoAnd>() || predicate.Maybe <TCoOr>() || predicate.Maybe <TCoXor>()) {
389
- CollectChildrenPredicates (predicate.Ref (), predicateTree, lambdaArg, lambdaBody, allowOlapApply );
408
+ CollectChildrenPredicates (predicate.Ref (), predicateTree, lambdaArg, lambdaBody, options );
390
409
} else if (const auto maybeCoalesce = predicate.Maybe <TCoCoalesce>()) {
391
- predicateTree.CanBePushed = CoalesceCanBePushed (maybeCoalesce.Cast (), lambdaArg, lambdaBody, false );
392
- predicateTree.CanBePushedApply = CoalesceCanBePushed (maybeCoalesce.Cast (), lambdaArg, lambdaBody, true );
410
+ predicateTree.CanBePushed = CoalesceCanBePushed (maybeCoalesce.Cast (), lambdaArg, lambdaBody, { false , options. PushdownSubstring } );
411
+ predicateTree.CanBePushedApply = CoalesceCanBePushed (maybeCoalesce.Cast (), lambdaArg, lambdaBody, { true , options. PushdownSubstring } );
393
412
} else if (const auto maybeCompare = predicate.Maybe <TCoCompare>()) {
394
- predicateTree.CanBePushed = CompareCanBePushed (maybeCompare.Cast (), lambdaArg, lambdaBody, false );
395
- predicateTree.CanBePushedApply = CompareCanBePushed (maybeCompare.Cast (), lambdaArg, lambdaBody, true );
413
+ predicateTree.CanBePushed = CompareCanBePushed (maybeCompare.Cast (), lambdaArg, lambdaBody, { false , options. PushdownSubstring } );
414
+ predicateTree.CanBePushedApply = CompareCanBePushed (maybeCompare.Cast (), lambdaArg, lambdaBody, { false , options. PushdownSubstring } );
396
415
} else if (const auto maybeExists = predicate.Maybe <TCoExists>()) {
397
416
predicateTree.CanBePushed = ExistsCanBePushed (maybeExists.Cast (), lambdaArg);
398
417
predicateTree.CanBePushedApply = predicateTree.CanBePushed ;
@@ -401,12 +420,12 @@ void CollectPredicates(const TExprBase& predicate, TOLAPPredicateNode& predicate
401
420
predicateTree.CanBePushedApply = predicateTree.CanBePushed ;
402
421
}
403
422
404
- if (allowOlapApply && !predicateTree.CanBePushedApply ){
423
+ if (options. AllowOlapApply && !predicateTree.CanBePushedApply ){
405
424
if (predicate.Maybe <TCoIf>() || predicate.Maybe <TCoJust>() || predicate.Maybe <TCoCoalesce>()) {
406
- CollectChildrenPredicates (predicate.Ref (), predicateTree, lambdaArg, lambdaBody, true );
425
+ CollectChildrenPredicates (predicate.Ref (), predicateTree, lambdaArg, lambdaBody, { true , options. PushdownSubstring } );
407
426
}
408
427
if (!predicateTree.CanBePushedApply ) {
409
- predicateTree.CanBePushedApply = AbstractTreeCanBePushed (predicate, lambdaArg);
428
+ predicateTree.CanBePushedApply = AbstractTreeCanBePushed (predicate, lambdaArg, options. PushdownSubstring );
410
429
}
411
430
}
412
431
}
0 commit comments