@@ -163,6 +163,52 @@ namespace {
163
163
} \
164
164
}
165
165
166
+ #define STRING_ASCII_CMP_IGNORE_CASE_UDF (udfName, function ) \
167
+ TUnboxedValuePod udfName##Impl(const TUnboxedValuePod* args) { \
168
+ if (args[0 ]) { \
169
+ const TString haystack (args[0 ].AsStringRef ()); \
170
+ const TString needle (args[1 ].AsStringRef ()); \
171
+ return TUnboxedValuePod (function (haystack, needle)); \
172
+ } else { \
173
+ return TUnboxedValuePod (false ); \
174
+ } \
175
+ } \
176
+ \
177
+ struct T ##udfName##KernelExec \
178
+ : public TBinaryKernelExec<T##udfName##KernelExec> \
179
+ { \
180
+ template <typename TSink> \
181
+ static void Process (const IValueBuilder*, TBlockItem arg1, \
182
+ TBlockItem arg2, const TSink& sink) \
183
+ { \
184
+ if (arg1) { \
185
+ const TString haystack (arg1.AsStringRef ()); \
186
+ const TString needle (arg2.AsStringRef ()); \
187
+ sink (TBlockItem (function (haystack, needle))); \
188
+ } else { \
189
+ sink (TBlockItem (false )); \
190
+ } \
191
+ } \
192
+ }; \
193
+ \
194
+ BEGIN_SIMPLE_STRICT_ARROW_UDF (T##udfName, \
195
+ bool (TOptional<char *>, char *)) \
196
+ { \
197
+ Y_UNUSED (valueBuilder); \
198
+ return udfName##Impl (args); \
199
+ } \
200
+ \
201
+ END_SIMPLE_ARROW_UDF (T##udfName, T##udfName##KernelExec::Do) \
202
+ \
203
+ BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_##udfName, \
204
+ bool (TOptional<char *>, char *)) \
205
+ { \
206
+ Y_UNUSED (valueBuilder); \
207
+ return udfName##Impl (args); \
208
+ } \
209
+ \
210
+ END_SIMPLE_ARROW_UDF (T_yql_##udfName, T##udfName##KernelExec::Do)
211
+
166
212
#define IS_ASCII_UDF (function ) \
167
213
BEGIN_SIMPLE_STRICT_ARROW_UDF (T##function, bool (TOptional<char *>)) { \
168
214
Y_UNUSED (valueBuilder); \
@@ -368,6 +414,11 @@ namespace {
368
414
XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \
369
415
XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase)
370
416
417
+ #define STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP (XX ) \
418
+ XX (AsciiStartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
419
+ XX(AsciiEndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
420
+ XX(AsciiEqualsIgnoreCase, AsciiEqualsIgnoreCase)
421
+
371
422
// NOTE: The functions below are marked as deprecated, so block implementation
372
423
// is not required for them. Hence, STROKA_UDF provides only the scalar one at
373
424
// the moment.
@@ -449,6 +500,60 @@ namespace {
449
500
450
501
END_SIMPLE_ARROW_UDF (TContains, TContainsKernelExec::Do);
451
502
503
+ static bool IgnoreCaseComparator (char a, char b) {
504
+ return AsciiToUpper (a) == AsciiToUpper (b);
505
+ }
506
+
507
+ struct TAsciiContainsIgnoreCaseKernelExec
508
+ : public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec>
509
+ {
510
+ template <typename TSink>
511
+ static void Process (const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
512
+ if (!arg1) {
513
+ return sink (TBlockItem (arg2 ? false : true ));
514
+ }
515
+
516
+ const TString haystack (arg1.AsStringRef ());
517
+ const TString needle (arg2.AsStringRef ());
518
+ if (haystack.empty ()) {
519
+ return sink (TBlockItem ((needle.empty ())));
520
+ }
521
+ const auto found = std::search (haystack.cbegin (), haystack.cend (),
522
+ needle.cbegin (), needle.cend (), IgnoreCaseComparator);
523
+ sink (TBlockItem (found != haystack.cend ()));
524
+ }
525
+ };
526
+
527
+ TUnboxedValuePod AsciiContainsIgnoreCaseImpl (const TUnboxedValuePod* args) {
528
+ if (!args[0 ]) {
529
+ return TUnboxedValuePod (false );
530
+ }
531
+
532
+ const TString haystack (args[0 ].AsStringRef ());
533
+ const TString needle (args[1 ].AsStringRef ());
534
+ if (haystack.empty ()) {
535
+ return TUnboxedValuePod (needle.empty ());
536
+ }
537
+ const auto found = std::search (haystack.cbegin (), haystack.cend (),
538
+ needle.cbegin (), needle.cend (), IgnoreCaseComparator);
539
+ return TUnboxedValuePod (found != haystack.cend ());
540
+ }
541
+
542
+ BEGIN_SIMPLE_STRICT_ARROW_UDF (TAsciiContainsIgnoreCase, bool (TOptional<char *>, char *))
543
+ {
544
+ Y_UNUSED (valueBuilder);
545
+ return AsciiContainsIgnoreCaseImpl (args);
546
+ }
547
+
548
+ END_SIMPLE_ARROW_UDF (TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do);
549
+
550
+ BEGIN_SIMPLE_STRICT_ARROW_UDF (T_yql_AsciiContainsIgnoreCase, bool (TOptional<char *>, char *))
551
+ {
552
+ Y_UNUSED (valueBuilder);
553
+ return AsciiContainsIgnoreCaseImpl (args);
554
+ }
555
+
556
+ END_SIMPLE_ARROW_UDF (T_yql_AsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do);
452
557
453
558
BEGIN_SIMPLE_STRICT_ARROW_UDF (TReplaceAll, char *(TAutoMap<char *>, char *, char *)) {
454
559
if (TString result (args[0 ].AsStringRef ()); SubstGlobal (result, args[1 ].AsStringRef (), args[2 ].AsStringRef ()))
@@ -874,6 +979,7 @@ namespace {
874
979
}
875
980
876
981
#define STRING_REGISTER_UDF (udfName, ...) T##udfName,
982
+ #define STRING_OPT_REGISTER_UDF (udfName, ...) T_yql_##udfName,
877
983
878
984
STRING_UDF_MAP (STRING_UDF)
879
985
STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF)
@@ -882,6 +988,7 @@ namespace {
882
988
STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF)
883
989
STROKA_FIND_UDF_MAP(STROKA_FIND_UDF)
884
990
STRING_TWO_ARGS_UDF_MAP(STRING_TWO_ARGS_UDF)
991
+ STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF)
885
992
IS_ASCII_UDF_MAP(IS_ASCII_UDF)
886
993
887
994
static constexpr ui64 padLim = 1000000;
@@ -898,6 +1005,8 @@ namespace {
898
1005
STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF)
899
1006
STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF)
900
1007
STRING_TWO_ARGS_UDF_MAP(STRING_REGISTER_UDF)
1008
+ // STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) not going to be expoesed until 2025.2
1009
+ STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_OPT_REGISTER_UDF)
901
1010
IS_ASCII_UDF_MAP(STRING_REGISTER_UDF)
902
1011
STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
903
1012
STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
@@ -911,6 +1020,8 @@ namespace {
911
1020
TRemoveFirst,
912
1021
TRemoveLast,
913
1022
TContains,
1023
+ // TAsciiContainsIgnoreCase, not going to be expoesed until 2025.2
1024
+ T_yql_AsciiContainsIgnoreCase,
914
1025
TFind,
915
1026
TReverseFind,
916
1027
TSubstring,
0 commit comments