Skip to content

Commit 07cda51

Browse files
committed
YQL-19884: Add block implementation for STRING_TWO_ARGS_UDF
commit_hash:22ad01050569869e2e75f89e65b06abf113791e4
1 parent 57c5606 commit 07cda51

File tree

5 files changed

+142
-14
lines changed

5 files changed

+142
-14
lines changed

yql/essentials/udfs/common/string/string_udf.cpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -153,17 +153,36 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>),
153153
} \
154154
}
155155

156-
#define STRING_TWO_ARGS_UDF(udfName, function) \
157-
SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
158-
Y_UNUSED(valueBuilder); \
159-
if (args[0]) { \
160-
const TString haystack(args[0].AsStringRef()); \
161-
const TString needle(args[1].AsStringRef()); \
162-
return TUnboxedValuePod(function(haystack, needle)); \
163-
} else { \
164-
return TUnboxedValuePod(false); \
165-
} \
166-
}
156+
#define STRING_TWO_ARGS_UDF(udfName, function) \
157+
BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, bool(TOptional<char*>, char*)) { \
158+
Y_UNUSED(valueBuilder); \
159+
if (args[0]) { \
160+
const TString haystack(args[0].AsStringRef()); \
161+
const TString needle(args[1].AsStringRef()); \
162+
return TUnboxedValuePod(function(haystack, needle)); \
163+
} else { \
164+
return TUnboxedValuePod(false); \
165+
} \
166+
} \
167+
\
168+
struct T##udfName##KernelExec \
169+
: public TBinaryKernelExec<T##udfName##KernelExec> \
170+
{ \
171+
template <typename TSink> \
172+
static void Process(const IValueBuilder*, TBlockItem arg1, \
173+
TBlockItem arg2, const TSink& sink) \
174+
{ \
175+
if (arg1) { \
176+
const TString haystack(arg1.AsStringRef()); \
177+
const TString needle(arg2.AsStringRef()); \
178+
sink(TBlockItem(function(haystack, needle))); \
179+
} else { \
180+
sink(TBlockItem(false)); \
181+
} \
182+
} \
183+
}; \
184+
\
185+
END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do)
167186

168187
#define IS_ASCII_UDF(function) \
169188
BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, bool(TOptional<char*>)) { \
@@ -361,9 +380,6 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>),
361380
XX(HasPrefix, StartsWith) \
362381
XX(HasSuffix, EndsWith)
363382

364-
// NOTE: The functions below are marked as deprecated, so block implementation
365-
// is not required for them. Hence, STRING_TWO_ARGS_UDF provides only the
366-
// scalar one at the moment.
367383
#define STRING_TWO_ARGS_UDF_MAP(XX) \
368384
XX(StartsWithIgnoreCase, AsciiHasPrefixIgnoreCase) \
369385
XX(EndsWithIgnoreCase, AsciiHasSuffixIgnoreCase) \

yql/essentials/udfs/common/string/test/canondata/test.test_BlockFind_/results.txt

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,34 @@
2121
"Bool"
2222
]
2323
];
24+
[
25+
"icprefix";
26+
[
27+
"DataType";
28+
"Bool"
29+
]
30+
];
31+
[
32+
"icstarts";
33+
[
34+
"DataType";
35+
"Bool"
36+
]
37+
];
38+
[
39+
"icsuffix";
40+
[
41+
"DataType";
42+
"Bool"
43+
]
44+
];
45+
[
46+
"icends";
47+
[
48+
"DataType";
49+
"Bool"
50+
]
51+
];
2452
[
2553
"levenstein";
2654
[
@@ -35,31 +63,55 @@
3563
[
3664
"fdsa";
3765
%false;
66+
%false;
67+
%false;
68+
%false;
69+
%false;
3870
"3"
3971
];
4072
[
4173
"aswedfg";
4274
%true;
75+
%true;
76+
%true;
77+
%false;
78+
%false;
4379
"5"
4480
];
4581
[
4682
"asdadsaasd";
4783
%true;
84+
%true;
85+
%true;
86+
%false;
87+
%false;
4888
"8"
4989
];
5090
[
5191
"gdsfsassas";
5292
%true;
93+
%false;
94+
%false;
95+
%true;
96+
%true;
5397
"8"
5498
];
5599
[
56100
"";
57101
%false;
102+
%false;
103+
%false;
104+
%false;
105+
%false;
58106
"2"
59107
];
60108
[
61109
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
62110
%false;
111+
%false;
112+
%false;
113+
%false;
114+
%false;
63115
"23"
64116
]
65117
]

yql/essentials/udfs/common/string/test/canondata/test.test_Find_/results.txt

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,34 @@
4949
"Bool"
5050
]
5151
];
52+
[
53+
"icprefix";
54+
[
55+
"DataType";
56+
"Bool"
57+
]
58+
];
59+
[
60+
"icstarts";
61+
[
62+
"DataType";
63+
"Bool"
64+
]
65+
];
66+
[
67+
"icsuffix";
68+
[
69+
"DataType";
70+
"Bool"
71+
]
72+
];
73+
[
74+
"icends";
75+
[
76+
"DataType";
77+
"Bool"
78+
]
79+
];
5280
[
5381
"find";
5482
[
@@ -81,6 +109,10 @@
81109
%false;
82110
%false;
83111
%false;
112+
%false;
113+
%false;
114+
%false;
115+
%false;
84116
"-1";
85117
"-1";
86118
"3"
@@ -92,6 +124,10 @@
92124
%true;
93125
%false;
94126
%false;
127+
%true;
128+
%true;
129+
%false;
130+
%false;
95131
"0";
96132
"0";
97133
"5"
@@ -103,6 +139,10 @@
103139
%true;
104140
%false;
105141
%false;
142+
%true;
143+
%true;
144+
%false;
145+
%false;
106146
"0";
107147
"7";
108148
"8"
@@ -114,6 +154,10 @@
114154
%false;
115155
%true;
116156
%true;
157+
%false;
158+
%false;
159+
%true;
160+
%true;
117161
"5";
118162
"8";
119163
"8"
@@ -125,6 +169,10 @@
125169
%false;
126170
%false;
127171
%false;
172+
%false;
173+
%false;
174+
%false;
175+
%false;
128176
"-1";
129177
"-1";
130178
"2"
@@ -136,6 +184,10 @@
136184
%false;
137185
%false;
138186
%false;
187+
%false;
188+
%false;
189+
%false;
190+
%false;
139191
"-1";
140192
"-1";
141193
"23"

yql/essentials/udfs/common/string/test/cases/BlockFind.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,9 @@ pragma UseBlocks;
33
SELECT
44
value,
55
String::Contains(value, "as") AS contains,
6+
String::HasPrefixIgnoreCase(value, "AS") AS icprefix,
7+
String::StartsWithIgnoreCase(value, "AS") AS icstarts,
8+
String::HasSuffixIgnoreCase(value, "AS") AS icsuffix,
9+
String::EndsWithIgnoreCase(value, "AS") AS icends,
610
String::LevensteinDistance(value, "as") AS levenstein
711
FROM Input;

yql/essentials/udfs/common/string/test/cases/Find.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ SELECT
66
String::StartsWith(value, "as") AS starts,
77
String::HasSuffix(value, "as") AS suffix,
88
String::EndsWith(value, "as") AS ends,
9+
String::HasPrefixIgnoreCase(value, "AS") AS icprefix,
10+
String::StartsWithIgnoreCase(value, "AS") AS icstarts,
11+
String::HasSuffixIgnoreCase(value, "AS") AS icsuffix,
12+
String::EndsWithIgnoreCase(value, "AS") AS icends,
913
String::Find(value, "as") AS find,
1014
String::ReverseFind(value, "as") AS rfind,
1115
String::LevensteinDistance(value, "as") AS levenstein

0 commit comments

Comments
 (0)