Skip to content

Commit 13611d0

Browse files
igormunkinzverevgeny
authored andcommitted
YQL-19884: Add Ascii{Starts,Ends}WithIgnoreCase functions to String UDF
commit_hash:4b86982498876ec14632c0a018a940c3393bb5d6 Conflicts: yql/essentials/docs/en/changelog/2025.02.md yql/essentials/docs/ru/changelog/2025.02.md yql/essentials/udfs/common/string/string_udf.cpp
1 parent 22f38c2 commit 13611d0

File tree

14 files changed

+298
-30
lines changed

14 files changed

+298
-30
lines changed

yql/essentials/docs/en/udf/list/string.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,25 @@ Functions for ASCII strings:
3838

3939
* `String::ReverseFind(String{Flags:AutoMap}, String, [Uint64?]) -> Int64`: Returns the last position found or -1. The optional argument is the offset from the beginning of the string.
4040

41+
* `String::AsciiStartsWithIgnoreCase(String?, String) -> Bool` Added in the version [2025.02](../../changelog/2025.02.md#string-module)
42+
4143
* `String::HasPrefix(String?, String) -> Bool`
4244

43-
* `String::HasPrefixIgnoreCase(String?, String) -> Bool`
45+
* `String::HasPrefixIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module)
4446

4547
* `String::StartsWith(String?, String) -> Bool`
4648

47-
* `String::StartsWithIgnoreCase(String?, String) -> Bool`
49+
* `String::StartsWithIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module)
50+
51+
* `String::AsciiEndsWithIgnoreCase(String?, String) -> Bool` Added in the version [2025.02](../../changelog/2025.02.md#string-module)
4852

4953
* `String::HasSuffix(String?, String) -> Bool`
5054

51-
* `String::HasSuffixIgnoreCase(String?, String) -> Bool`
55+
* `String::HasSuffixIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module)
5256

5357
* `String::EndsWith(String?, String) -> Bool`
5458

55-
* `String::EndsWithIgnoreCase(String?, String) -> Bool`
59+
* `String::EndsWithIgnoreCase(String?, String) -> Bool` Removed in the version [2025.02](../../changelog/2025.02.md#string-module)
5660

5761
* `String::Substring(String{Flags:AutoMap}, [Uint64?, Uint64?]) -> String`
5862

yql/essentials/docs/ru/udf/list/string.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,12 @@ SELECT String::Strip("YQL "); -- "YQL"
5858

5959
Устаревшие функции, к использованию не рекомендуются.
6060

61-
* `String::HasPrefixIgnoreCase(string:String?, prefix:String) -> Bool`
62-
* `String::StartsWithIgnoreCase(string:String?, prefix:String) -> Bool`
63-
* `String::HasSuffixIgnoreCase(string:String?, suffix:String) -> Bool`
64-
* `String::EndsWithIgnoreCase(string:String?, suffix:String) -> Bool`
61+
* `String::AsciiStartsWithIgnoreCase(string:String?, prefix:String) -> Bool` - добавлена в версии [2025.02](../../changelog/2025.02.md#string-module)
62+
* `String::AsciiEndsWithIgnoreCase(string:String?, suffix:String) -> Bool` - добавлена в версии [2025.02](../../changelog/2025.02.md#string-module)
63+
* `String::HasPrefixIgnoreCase(string:String?, prefix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module)
64+
* `String::StartsWithIgnoreCase(string:String?, prefix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module)
65+
* `String::HasSuffixIgnoreCase(string:String?, suffix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module)
66+
* `String::EndsWithIgnoreCase(string:String?, suffix:String) -> Bool` - удалена в версии [2025.02](../../changelog/2025.02.md#string-module)
6567

6668
Проверяют наличие префикса или суффикса в строке без учёта региста символов.
6769

yql/essentials/public/udf/arrow/udf_arrow_helpers.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class TUdfKernelState : public arrow::compute::KernelState {
6262

6363
return *ScalarBuilder_;
6464
}
65-
65+
6666
const IValueBuilder& GetValueBuilder() {
6767
Y_ENSURE(ValueBuilder_);
6868
return *ValueBuilder_;
@@ -348,7 +348,7 @@ TScalarBuilderImpl* CastToScalarBuilderImpl(IScalarBuilder& builder) {
348348
template<typename TReader>
349349
TReader* CastToBlockReaderImpl(IBlockReader& reader) {
350350
static_assert(std::is_base_of_v<IBlockReader, TReader>);
351-
351+
352352
auto* readerImpl = dynamic_cast<TReader*>(&reader);
353353
Y_ENSURE(readerImpl, TStringBuilder() << "Got " << typeid(reader).name() << " as BlockReader");
354354
return readerImpl;
@@ -444,7 +444,7 @@ struct TBinaryKernelExec {
444444

445445
*res = MakeArray(outputArrays);
446446
} else if (arg1.is_array() && arg2.is_scalar()) {
447-
auto& array1 = *arg1.array();
447+
auto& array1 = *arg1.array();
448448
auto item2 = reader2Impl->GetScalarItem(*arg2.scalar());
449449
auto& builder = state.GetArrayBuilder();
450450
auto* builderImpl = CastToArrayBuilderImpl<TArrayBuilderImpl>(builder);
@@ -644,7 +644,7 @@ struct TUnaryUnsafeFixedSizeFilterKernel {
644644
}
645645
auto validMask = nullBuilder.Finish();
646646
validMask = MakeDenseBitmap(validMask->data(), length, GetYqlMemoryPool());
647-
647+
648648
auto inMask = inArray->buffers[0];
649649
if (inMask) {
650650
outArray->buffers[0] = AllocateBitmapWithReserve(length, GetYqlMemoryPool());
@@ -703,6 +703,10 @@ class TUnaryOverOptionalImpl : public TBoxedValue {
703703
BEGIN_ARROW_UDF_IMPL(udfName##_BlocksImpl, signatureFunc, 0, true) \
704704
UDF_IMPL(udfName, builder.SimpleSignature<signatureFunc>().SupportsBlocks().IsStrict();, ;, ;, "", "", udfName##_BlocksImpl)
705705

706+
#define BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(udfName, signatureFunc, options) \
707+
BEGIN_ARROW_UDF_IMPL(udfName##_BlocksImpl, signatureFunc, 0, true) \
708+
UDF_IMPL(udfName, builder.SimpleSignature<signatureFunc>().SupportsBlocks().IsStrict(); options;, ;, ;, "", "", udfName##_BlocksImpl)
709+
706710
#define BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(udfName, signatureFunc, optArgc) \
707711
BEGIN_ARROW_UDF_IMPL(udfName##_BlocksImpl, signatureFunc, optArgc, false) \
708712
UDF_IMPL(udfName, builder.SimpleSignature<signatureFunc>().SupportsBlocks().OptionalArgs(optArgc);, ;, ;, "", "", udfName##_BlocksImpl)
@@ -744,4 +748,4 @@ class TUnaryOverOptionalImpl : public TBoxedValue {
744748
END_ARROW_UDF(udfName##_BlocksImpl, exec)
745749

746750
#define END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(udfName, exec, nullHandling) \
747-
END_ARROW_UDF_WITH_NULL_HANDLING(udfName##_BlocksImpl, exec, nullHandling)
751+
END_ARROW_UDF_WITH_NULL_HANDLING(udfName##_BlocksImpl, exec, nullHandling)

yql/essentials/udfs/common/string/string_udf.cpp

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -151,18 +151,54 @@ namespace {
151151
} \
152152
}
153153

154-
#define STRING_TWO_ARGS_UDF(udfName, function) \
155-
SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
156-
Y_UNUSED(valueBuilder); \
157-
if (args[0]) { \
158-
const TString haystack(args[0].AsStringRef()); \
159-
const TString needle(args[1].AsStringRef()); \
160-
return TUnboxedValuePod(function(haystack, needle)); \
161-
} else { \
162-
return TUnboxedValuePod(false); \
163-
} \
154+
#define STRING_TWO_ARGS_UDF_DEPRECATED_2025_02(udfName, function) \
155+
SIMPLE_STRICT_UDF_OPTIONS(T##udfName, bool(TOptional<char*>, char*), \
156+
builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) \
157+
{ \
158+
Y_UNUSED(valueBuilder); \
159+
if (args[0]) { \
160+
const TString haystack(args[0].AsStringRef()); \
161+
const TString needle(args[1].AsStringRef()); \
162+
return TUnboxedValuePod(function(haystack, needle)); \
163+
} else { \
164+
return TUnboxedValuePod(false); \
165+
} \
164166
}
165167

168+
#define STRING_ASCII_CMP_IGNORE_CASE_UDF(udfName, function) \
169+
BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, \
170+
bool(TOptional<char*>, char*), \
171+
builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) \
172+
{ \
173+
Y_UNUSED(valueBuilder); \
174+
if (args[0]) { \
175+
const TString haystack(args[0].AsStringRef()); \
176+
const TString needle(args[1].AsStringRef()); \
177+
return TUnboxedValuePod(function(haystack, needle)); \
178+
} else { \
179+
return TUnboxedValuePod(false); \
180+
} \
181+
} \
182+
\
183+
struct T##udfName##KernelExec \
184+
: public TBinaryKernelExec<T##udfName##KernelExec> \
185+
{ \
186+
template <typename TSink> \
187+
static void Process(const IValueBuilder*, TBlockItem arg1, \
188+
TBlockItem arg2, const TSink& sink) \
189+
{ \
190+
if (arg1) { \
191+
const TString haystack(arg1.AsStringRef()); \
192+
const TString needle(arg2.AsStringRef()); \
193+
sink(TBlockItem(function(haystack, needle))); \
194+
} else { \
195+
sink(TBlockItem(false)); \
196+
} \
197+
} \
198+
}; \
199+
\
200+
END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
201+
166202
#define IS_ASCII_UDF(function) \
167203
BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \
168204
Y_UNUSED(valueBuilder); \
@@ -360,19 +396,17 @@ namespace {
360396
XX(HasSuffix, EndsWith)
361397

362398
// NOTE: The functions below are marked as deprecated, so block implementation
363-
// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the
364-
// scalar one at the moment.
365-
#define STRING_TWO_ARGS_UDF_MAP(XX) \
399+
// is not required for them. Hence, STRING_TWO_ARGS_UDF_DEPRECATED_2025_02
400+
// provides only the scalar one at the moment.
401+
#define STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(XX) \
366402
XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
367403
XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \
368404
XX(HasPrefixIgnoreCase, AsciiHasPrefixIgnoreCase) \
369405
XX(HasSuffixIgnoreCase, AsciiHasSuffixIgnoreCase)
370406

371-
// NOTE: The functions below are marked as deprecated, so block implementation
372407
// is not required for them. Hence, STROKA_UDF provides only the scalar one at
373408
// the moment.
374409
#define STROKA_UDF_MAP(XX) \
375-
XX(Reverse, ReverseInPlace)
376410

377411
#define IS_ASCII_UDF_MAP(XX) \
378412
XX(IsAscii) \
@@ -881,7 +915,8 @@ namespace {
881915
STROKA_CASE_UDF_MAP(STROKA_CASE_UDF)
882916
STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF)
883917
STROKA_FIND_UDF_MAP(STROKA_FIND_UDF)
884-
STRING_TWO_ARGS_UDF_MAP(STRING_TWO_ARGS_UDF)
918+
STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_TWO_ARGS_UDF_DEPRECATED_2025_02)
919+
STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF)
885920
IS_ASCII_UDF_MAP(IS_ASCII_UDF)
886921

887922
static constexpr ui64 padLim = 1000000;
@@ -897,7 +932,8 @@ namespace {
897932
STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF)
898933
STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF)
899934
STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF)
900-
STRING_TWO_ARGS_UDF_MAP(STRING_REGISTER_UDF)
935+
STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF)
936+
STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF)
901937
IS_ASCII_UDF_MAP(STRING_REGISTER_UDF)
902938
STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)
903939
STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF)

yql/essentials/udfs/common/string/test/canondata/result.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
"uri": "file://test.test_AsciiChecks_/results.txt"
55
}
66
],
7+
"test.test[AsciiCmpIgnoreCase]": [
8+
{
9+
"uri": "file://test.test_AsciiCmpIgnoreCase_/results.txt"
10+
}
11+
],
12+
"test.test[AsciiCmpIgnoreCase_2025_02]": [
13+
{
14+
"uri": "file://test.test_AsciiCmpIgnoreCase_2025_02_/extracted"
15+
}
16+
],
717
"test.test[Base32Decode]": [
818
{
919
"uri": "file://test.test_Base32Decode_/results.txt"
@@ -19,6 +29,11 @@
1929
"uri": "file://test.test_BlockAsciiChecks_/results.txt"
2030
}
2131
],
32+
"test.test[BlockAsciiCmpIgnoreCase]": [
33+
{
34+
"uri": "file://test.test_BlockAsciiCmpIgnoreCase_/results.txt"
35+
}
36+
],
2237
"test.test[BlockFind]": [
2338
{
2439
"uri": "file://test.test_BlockFind_/results.txt"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
[
2+
{
3+
"Write" = [
4+
{
5+
"Type" = [
6+
"ListType";
7+
[
8+
"StructType";
9+
[
10+
[
11+
"value";
12+
[
13+
"DataType";
14+
"String"
15+
]
16+
];
17+
[
18+
"icstarts";
19+
[
20+
"DataType";
21+
"Bool"
22+
]
23+
];
24+
[
25+
"icends";
26+
[
27+
"DataType";
28+
"Bool"
29+
]
30+
]
31+
]
32+
]
33+
];
34+
"Data" = [
35+
[
36+
"fdsa";
37+
%false;
38+
%false
39+
];
40+
[
41+
"aswedfg";
42+
%true;
43+
%false
44+
];
45+
[
46+
"asdadsaasd";
47+
%true;
48+
%false
49+
];
50+
[
51+
"gdsfsassas";
52+
%false;
53+
%true
54+
];
55+
[
56+
"";
57+
%false;
58+
%false
59+
];
60+
[
61+
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
62+
%false;
63+
%false
64+
]
65+
]
66+
}
67+
]
68+
}
69+
]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<tmp_path>/program.sql:<main>: Error: Type annotation
2+
3+
<tmp_path>/program.sql:<main>:4:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject
4+
SELECT
5+
^
6+
<tmp_path>/program.sql:<main>:4:1: Error: At function: SqlProjectItem
7+
SELECT
8+
^
9+
<tmp_path>/program.sql:<main>:8:13: Error: At function: Apply, At function: Udf
10+
String::HasPrefixIgnoreCase(value, "AS") AS icprefix,
11+
^
12+
<tmp_path>/program.sql:<main>:8:13: Error: UDF 'String.HasPrefixIgnoreCase' is not available after version 2025.01
13+
String::HasPrefixIgnoreCase(value, "AS") AS icprefix,
14+
^
15+
<tmp_path>/program.sql:<main>:4:1: Error: At function: SqlProjectItem
16+
SELECT
17+
^
18+
<tmp_path>/program.sql:<main>:10:13: Error: At function: Apply, At function: Udf
19+
String::StartsWithIgnoreCase(value, "AS") AS icstarts,
20+
^
21+
<tmp_path>/program.sql:<main>:10:13: Error: UDF 'String.StartsWithIgnoreCase' is not available after version 2025.01
22+
String::StartsWithIgnoreCase(value, "AS") AS icstarts,
23+
^
24+
<tmp_path>/program.sql:<main>:4:1: Error: At function: SqlProjectItem
25+
SELECT
26+
^
27+
<tmp_path>/program.sql:<main>:12:13: Error: At function: Apply, At function: Udf
28+
String::HasSuffixIgnoreCase(value, "AS") AS icsuffix,
29+
^
30+
<tmp_path>/program.sql:<main>:12:13: Error: UDF 'String.HasSuffixIgnoreCase' is not available after version 2025.01
31+
String::HasSuffixIgnoreCase(value, "AS") AS icsuffix,
32+
^
33+
<tmp_path>/program.sql:<main>:4:1: Error: At function: SqlProjectItem
34+
SELECT
35+
^
36+
<tmp_path>/program.sql:<main>:14:13: Error: At function: Apply, At function: Udf
37+
String::EndsWithIgnoreCase(value, "AS") AS icends,
38+
^
39+
<tmp_path>/program.sql:<main>:14:13: Error: UDF 'String.EndsWithIgnoreCase' is not available after version 2025.01
40+
String::EndsWithIgnoreCase(value, "AS") AS icends,
41+
^

0 commit comments

Comments
 (0)