Skip to content

Commit b6cfc35

Browse files
igormunkinzverevgeny
authored andcommitted
YQL-19923: Add Ascii{Contains,Equals}IgnoreCase functions to String UDF
commit_hash:6717ca10951c933df9cf8757763cfbae15facbbf Conflicts: yql/essentials/docs/en/changelog/2025.02.md yql/essentials/docs/ru/changelog/2025.02.md yql/essentials/udfs/common/string/string_udf.cpp
1 parent 13611d0 commit b6cfc35

File tree

7 files changed

+147
-4
lines changed

7 files changed

+147
-4
lines changed

yql/essentials/docs/en/udf/list/string.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ Functions for ASCII strings:
3232

3333
* `String::CollapseText(String{Flags:AutoMap}, Uint64) -> String`
3434

35+
* `String::AsciiEqualsIgnoreCase(String?, String) -> Bool` Added in the version [2025.02](../../changelog/2025.02.md#string-module)
36+
3537
* `String::Contains(String?, String) -> Bool`
3638

39+
* `String::AsciiContainsIgnoreCase(String?, String) -> Bool` Added in the version [2025.02](../../changelog/2025.02.md#string-module)
40+
3741
* `String::Find(String{Flags:AutoMap}, String, [Uint64?]) -> Int64`: Returns the first position found or -1. The optional argument is the offset from the beginning of the string.
3842

3943
* `String::ReverseFind(String{Flags:AutoMap}, String, [Uint64?]) -> Int64`: Returns the last position found or -1. The optional argument is the offset from the beginning of the string.

yql/essentials/docs/ru/udf/list/string.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ SELECT String::Strip("YQL "); -- "YQL"
4747

4848
Проверяет наличие подстроки в строке.
4949

50+
* `String::AsciiContainsIgnoreCase(string:String?, substring:String) -> Bool`
51+
* `String::AsciiEqualsIgnoreCase(left:String?, right:String) -> Bool`
52+
Проверяют наличие подстроки или полное равенство строк без учета регистра символов.
53+
5054
* `String::Find(string:String{Flags:AutoMap}, String, [Uint64?]) -> Int64` - Устаревшая: используйте встроенную функцию [Find](../../builtins/basic.md#find)
5155
* `String::ReverseFind(string:String{Flags:AutoMap}, String, [Uint64?]) -> Int64` - Устаревшая: используйте встроенную функцию [RFind](../../builtins/basic.md#rfind)
5256
* `String::Substring(string:String{Flags:AutoMap}, [Uint64?, Uint64?]) -> String` - Устаревшая: используйте встроенную функцию [Substring](../../builtins/basic.md#substring)

yql/essentials/udfs/common/string/string_udf.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,12 @@ namespace {
404404
XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \
405405
XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase)
406406

407+
#define STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(XX) \
408+
XX(AsciiStartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
409+
XX(AsciiEndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
410+
XX(AsciiEqualsIgnoreCase, AsciiEqualsIgnoreCase)
411+
412+
// NOTE: The functions below are marked as deprecated, so block implementation
407413
// is not required for them. Hence, STROKA_UDF provides only the scalar one at
408414
// the moment.
409415
#define STROKA_UDF_MAP(XX) \
@@ -483,6 +489,50 @@ namespace {
483489

484490
END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do);
485491

492+
static bool IgnoreCaseComparator(char a, char b) {
493+
return AsciiToUpper(a) == AsciiToUpper(b);
494+
}
495+
496+
BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*),
497+
builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2)))
498+
{
499+
Y_UNUSED(valueBuilder);
500+
if (!args[0]) {
501+
return TUnboxedValuePod(false);
502+
}
503+
504+
const TString haystack(args[0].AsStringRef());
505+
const TString needle(args[1].AsStringRef());
506+
if (haystack.empty()) {
507+
return TUnboxedValuePod(needle.empty());
508+
}
509+
const auto found = std::search(haystack.cbegin(), haystack.cend(),
510+
needle.cbegin(), needle.cend(), IgnoreCaseComparator);
511+
return TUnboxedValuePod(found != haystack.cend());
512+
}
513+
514+
struct TAsciiContainsIgnoreCaseKernelExec
515+
: public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec>
516+
{
517+
template <typename TSink>
518+
static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) {
519+
if (!arg1) {
520+
return sink(TBlockItem(arg2 ? false : true));
521+
}
522+
523+
const TString haystack(arg1.AsStringRef());
524+
const TString needle(arg2.AsStringRef());
525+
if (haystack.empty()) {
526+
return sink(TBlockItem((needle.empty())));
527+
}
528+
const auto found = std::search(haystack.cbegin(), haystack.cend(),
529+
needle.cbegin(), needle.cend(), IgnoreCaseComparator);
530+
sink(TBlockItem(found != haystack.cend()));
531+
}
532+
};
533+
534+
END_SIMPLE_ARROW_UDF(TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do);
535+
486536

487537
BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) {
488538
if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef()))
@@ -947,6 +997,7 @@ namespace {
947997
TRemoveFirst,
948998
TRemoveLast,
949999
TContains,
1000+
TAsciiContainsIgnoreCase,
9501001
TFind,
9511002
TReverseFind,
9521003
TSubstring,

yql/essentials/udfs/common/string/test/canondata/test.test_AsciiCmpIgnoreCase_/results.txt

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@
1414
"String"
1515
]
1616
];
17+
[
18+
"iccontains";
19+
[
20+
"DataType";
21+
"Bool"
22+
]
23+
];
24+
[
25+
"icempty";
26+
[
27+
"DataType";
28+
"Bool"
29+
]
30+
];
1731
[
1832
"icstarts";
1933
[
@@ -27,6 +41,13 @@
2741
"DataType";
2842
"Bool"
2943
]
44+
];
45+
[
46+
"icequals";
47+
[
48+
"DataType";
49+
"Bool"
50+
]
3051
]
3152
]
3253
]
@@ -35,31 +56,49 @@
3556
[
3657
"fdsa";
3758
%false;
38-
%false
59+
%true;
60+
%false;
61+
%false;
62+
%true
3963
];
4064
[
4165
"aswedfg";
4266
%true;
67+
%true;
68+
%true;
69+
%false;
4370
%false
4471
];
4572
[
4673
"asdadsaasd";
4774
%true;
75+
%true;
76+
%true;
77+
%false;
4878
%false
4979
];
5080
[
5181
"gdsfsassas";
82+
%true;
83+
%true;
5284
%false;
53-
%true
85+
%true;
86+
%false
5487
];
5588
[
5689
"";
5790
%false;
91+
%true;
92+
%false;
93+
%false;
5894
%false
5995
];
6096
[
6197
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
6298
%false;
99+
%true;
100+
%false;
101+
%false;
63102
%false
64103
]
65104
]

yql/essentials/udfs/common/string/test/canondata/test.test_BlockAsciiCmpIgnoreCase_/results.txt

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@
1414
"String"
1515
]
1616
];
17+
[
18+
"iccontains";
19+
[
20+
"DataType";
21+
"Bool"
22+
]
23+
];
24+
[
25+
"icempty";
26+
[
27+
"DataType";
28+
"Bool"
29+
]
30+
];
1731
[
1832
"icstarts";
1933
[
@@ -27,6 +41,13 @@
2741
"DataType";
2842
"Bool"
2943
]
44+
];
45+
[
46+
"icequals";
47+
[
48+
"DataType";
49+
"Bool"
50+
]
3051
]
3152
]
3253
]
@@ -35,31 +56,49 @@
3556
[
3657
"fdsa";
3758
%false;
38-
%false
59+
%true;
60+
%false;
61+
%false;
62+
%true
3963
];
4064
[
4165
"aswedfg";
4266
%true;
67+
%true;
68+
%true;
69+
%false;
4370
%false
4471
];
4572
[
4673
"asdadsaasd";
4774
%true;
75+
%true;
76+
%true;
77+
%false;
4878
%false
4979
];
5080
[
5181
"gdsfsassas";
82+
%true;
83+
%true;
5284
%false;
53-
%true
85+
%true;
86+
%false
5487
];
5588
[
5689
"";
5790
%false;
91+
%true;
92+
%false;
93+
%false;
5894
%false
5995
];
6096
[
6197
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
6298
%false;
99+
%true;
100+
%false;
101+
%false;
63102
%false
64103
]
65104
]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
SELECT
22
value,
3+
String::AsciiContainsIgnoreCase(value, "AS") AS iccontains,
4+
String::AsciiContainsIgnoreCase(value, "") AS icempty,
35
String::AsciiStartsWithIgnoreCase(value, "AS") AS icstarts,
46
String::AsciiEndsWithIgnoreCase(value, "AS") AS icends,
7+
String::AsciiEqualsIgnoreCase(value, "FDSA") AS icequals,
58
FROM Input;

yql/essentials/udfs/common/string/test/cases/BlockAsciiCmpIgnoreCase.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ PRAGMA UseBlocks;
33

44
SELECT
55
value,
6+
String::AsciiContainsIgnoreCase(value, "AS") AS iccontains,
7+
String::AsciiContainsIgnoreCase(value, "") AS icempty,
68
String::AsciiStartsWithIgnoreCase(value, "AS") AS icstarts,
79
String::AsciiEndsWithIgnoreCase(value, "AS") AS icends,
10+
String::AsciiEqualsIgnoreCase(value, "FDSA") AS icequals,
811
FROM Input;

0 commit comments

Comments
 (0)