Skip to content

Commit a9058c8

Browse files
authored
Case-insensitive mode for searching modules and functions (#10842)
1 parent 95f4df7 commit a9058c8

File tree

21 files changed

+439
-61
lines changed

21 files changed

+439
-61
lines changed

ydb/library/yql/core/qplayer/udf_resolver/yql_qplayer_udf_resolver.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class TResolver : public IUdfResolver {
109109

110110
TString SaveValue(const TFunction* f) const {
111111
auto node = NYT::TNode()
112+
("NormalizedName", f->NormalizedName)
112113
("CallableType", TypeToYsonNode(f->CallableType));
113114
if (f->NormalizedUserType && f->NormalizedUserType->GetKind() != ETypeAnnotationKind::Void) {
114115
node("NormalizedUserType", TypeToYsonNode(f->NormalizedUserType));
@@ -131,6 +132,12 @@ class TResolver : public IUdfResolver {
131132

132133
void LoadValue(TFunction* f, const TString& value, TExprContext& ctx) const {
133134
auto node = NYT::NodeFromYsonString(value);
135+
if (node.HasKey("NormalizedName")) {
136+
f->NormalizedName = node["NormalizedName"].AsString();
137+
} else {
138+
f->NormalizedName = f->Name;
139+
}
140+
134141
f->CallableType = ParseTypeFromYson(node["CallableType"], ctx);
135142
if (node.HasKey("NormalizedUserType")) {
136143
f->NormalizedUserType = ParseTypeFromYson(node["NormalizedUserType"], ctx);

ydb/library/yql/core/type_ann/type_ann_core.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7590,11 +7590,16 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
75907590
return IGraphTransformer::TStatus::Error;
75917591
}
75927592

7593+
cached.NormalizedName = description.NormalizedName;
75937594
cached.FunctionType = description.CallableType;
75947595
cached.RunConfigType = description.RunConfigType ? description.RunConfigType : ctx.Expr.MakeType<TVoidExprType>();
75957596
cached.NormalizedUserType = description.NormalizedUserType ? description.NormalizedUserType : ctx.Expr.MakeType<TVoidExprType>();
75967597
cached.SupportsBlocks = description.SupportsBlocks;
75977598
cached.IsStrict = description.IsStrict;
7599+
7600+
if (name != cached.NormalizedName) {
7601+
ctx.Types.UdfTypeCache[std::make_tuple(cached.NormalizedName, TString(typeConfig), userType)] = cached;
7602+
}
75987603
}
75997604

76007605
TStringBuf typeConfig = "";
@@ -7623,7 +7628,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
76237628
TStringBuf fileAlias = udfInfo ? udfInfo->FileAlias : ""_sb;
76247629
auto ret = ctx.Expr.Builder(input->Pos())
76257630
.Callable("Udf")
7626-
.Add(0, input->HeadPtr())
7631+
.Atom(0, cached.NormalizedName)
76277632
.Add(1, runConfigValue)
76287633
.Add(2, ExpandType(input->Pos(), *cached.NormalizedUserType, ctx.Expr))
76297634
.Atom(3, typeConfig)

ydb/library/yql/core/ut/yql_udf_index_ut.cpp

Lines changed: 81 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ void EnsureLinksEqual(const TDownloadLink& link1, const TDownloadLink& link2) {
4343

4444
void EnsureContainsFunction(TUdfIndex::TPtr index, TString module, const TFunctionInfo& f) {
4545
TFunctionInfo existingFunc;
46-
UNIT_ASSERT(index->FindFunction(module, f.Name, existingFunc));
46+
UNIT_ASSERT(index->FindFunction(module, f.Name, existingFunc) == TUdfIndex::EStatus::Found);
4747
EnsureFunctionsEqual(f, existingFunc);
4848
}
4949
}
@@ -52,15 +52,15 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
5252
Y_UNIT_TEST(Empty) {
5353
auto index1 = MakeIntrusive<TUdfIndex>();
5454

55-
UNIT_ASSERT(!index1->ContainsModule("M1"));
55+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::NotFound);
5656
UNIT_ASSERT(index1->FindResourceByModule("M1") == nullptr);
5757
TFunctionInfo f1;
58-
UNIT_ASSERT(!index1->FindFunction("M1", "M1.F1", f1));
58+
UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F1", f1), TUdfIndex::EStatus::NotFound);
5959

6060
auto index2 = index1->Clone();
61-
UNIT_ASSERT(!index2->ContainsModule("M1"));
61+
UNIT_ASSERT_EQUAL(index2->ContainsModule("M1"), TUdfIndex::EStatus::NotFound);
6262
UNIT_ASSERT(index2->FindResourceByModule("M1") == nullptr);
63-
UNIT_ASSERT(!index2->FindFunction("M1", "M1.F1", f1));
63+
UNIT_ASSERT_EQUAL(index2->FindFunction("M1", "M1.F1", f1), TUdfIndex::EStatus::NotFound);
6464
}
6565

6666
Y_UNIT_TEST(SingleModuleAndFunction) {
@@ -72,28 +72,28 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
7272
b.AddFunction(func1);
7373

7474
index1->RegisterResource(b.Build(), TUdfIndex::EOverrideMode::RaiseError);
75-
UNIT_ASSERT(index1->ContainsModule("M1"));
76-
UNIT_ASSERT(!index1->ContainsModule("M2"));
75+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found);
76+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M2"), TUdfIndex::EStatus::NotFound);
7777

7878
UNIT_ASSERT(index1->FindResourceByModule("M2") == nullptr);
7979
auto resource1 = index1->FindResourceByModule("M1");
8080
UNIT_ASSERT(resource1 != nullptr);
8181
EnsureLinksEqual(resource1->Link, link1);
8282

8383
TFunctionInfo f1;
84-
UNIT_ASSERT(!index1->FindFunction("M2", "M2.F1", f1));
84+
UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M2.F1", f1), TUdfIndex::EStatus::NotFound);
8585

86-
UNIT_ASSERT(index1->FindFunction("M1", "M1.F1", f1));
86+
UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F1", f1), TUdfIndex::EStatus::Found);
8787
EnsureFunctionsEqual(f1, func1);
8888

8989
// ensure both indexes contain the same info
9090
auto index2 = index1->Clone();
9191

92-
UNIT_ASSERT(index1->ContainsModule("M1"));
93-
UNIT_ASSERT(index2->ContainsModule("M1"));
92+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found);
93+
UNIT_ASSERT_EQUAL(index2->ContainsModule("M1"), TUdfIndex::EStatus::Found);
9494

9595
TFunctionInfo f2;
96-
UNIT_ASSERT(index2->FindFunction("M1", "M1.F1", f2));
96+
UNIT_ASSERT_EQUAL(index2->FindFunction("M1", "M1.F1", f2), TUdfIndex::EStatus::Found);
9797
EnsureFunctionsEqual(f1, f2);
9898

9999
auto resource2 = index2->FindResourceByModule("M1");
@@ -140,11 +140,11 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
140140
EnsureLinksEqual(r22->Link, link2);
141141

142142
// check modules
143-
UNIT_ASSERT(index1->ContainsModule("M1"));
144-
UNIT_ASSERT(index1->ContainsModule("M2"));
145-
UNIT_ASSERT(index1->ContainsModule("M3"));
146-
UNIT_ASSERT(index1->ContainsModule("M4"));
147-
UNIT_ASSERT(!index1->ContainsModule("M5"));
143+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M1"), TUdfIndex::EStatus::Found);
144+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M2"), TUdfIndex::EStatus::Found);
145+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M3"), TUdfIndex::EStatus::Found);
146+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M4"), TUdfIndex::EStatus::Found);
147+
UNIT_ASSERT_EQUAL(index1->ContainsModule("M5"), TUdfIndex::EStatus::NotFound);
148148

149149
EnsureContainsFunction(index1, "M1", func11);
150150
EnsureContainsFunction(index1, "M1", func12);
@@ -157,8 +157,8 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
157157

158158
TFunctionInfo f;
159159
// known func, but non-existent module
160-
UNIT_ASSERT(!index1->FindFunction("M5", "M1.F1", f));
161-
UNIT_ASSERT(!index1->FindFunction("M2", "M3.F1", f));
160+
UNIT_ASSERT_EQUAL(index1->FindFunction("M5", "M1.F1", f), TUdfIndex::EStatus::NotFound);
161+
UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M3.F1", f), TUdfIndex::EStatus::NotFound);
162162
}
163163

164164
Y_UNIT_TEST(ConflictRaiseError) {
@@ -199,7 +199,7 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
199199
EnsureContainsFunction(index1, "M2", func13);
200200

201201
TFunctionInfo f;
202-
UNIT_ASSERT(!index1->FindFunction("M3", "M3.F1", f));
202+
UNIT_ASSERT_EQUAL(index1->FindFunction("M3", "M3.F1", f), TUdfIndex::EStatus::NotFound);
203203
}
204204

205205
Y_UNIT_TEST(ConflictPreserveExisting) {
@@ -240,7 +240,7 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
240240
EnsureContainsFunction(index1, "M2", func13);
241241

242242
TFunctionInfo f;
243-
UNIT_ASSERT(!index1->FindFunction("M3", "M3.F1", f));
243+
UNIT_ASSERT_EQUAL(index1->FindFunction("M3", "M3.F1", f), TUdfIndex::EStatus::NotFound);
244244
}
245245

246246
Y_UNIT_TEST(ConflictReplace1WithNew) {
@@ -299,9 +299,9 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
299299

300300
// not here anymore
301301
TFunctionInfo f;
302-
UNIT_ASSERT(!index1->FindFunction("M1", "M1.F1", f));
303-
UNIT_ASSERT(!index1->FindFunction("M1", "M1.F2", f));
304-
UNIT_ASSERT(!index1->FindFunction("M2", "M2.F1", f));
302+
UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F1", f), TUdfIndex::EStatus::NotFound);
303+
UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F2", f), TUdfIndex::EStatus::NotFound);
304+
UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M2.F1", f), TUdfIndex::EStatus::NotFound);
305305
}
306306

307307
Y_UNIT_TEST(ConflictReplace2WithNew) {
@@ -359,10 +359,63 @@ Y_UNIT_TEST_SUITE(TUdfIndexTests) {
359359

360360
// not here anymore
361361
TFunctionInfo f;
362-
UNIT_ASSERT(!index1->FindFunction("M1", "M1.F2", f));
363-
UNIT_ASSERT(!index1->FindFunction("M2", "M2.F1", f));
362+
UNIT_ASSERT_EQUAL(index1->FindFunction("M1", "M1.F2", f), TUdfIndex::EStatus::NotFound);
363+
UNIT_ASSERT_EQUAL(index1->FindFunction("M2", "M2.F1", f), TUdfIndex::EStatus::NotFound);
364364

365-
UNIT_ASSERT(!index1->FindFunction("M3", "M3.F3", f));
366-
UNIT_ASSERT(!index1->FindFunction("M4", "M4.F4", f));
365+
UNIT_ASSERT_EQUAL(index1->FindFunction("M3", "M3.F3", f), TUdfIndex::EStatus::NotFound);
366+
UNIT_ASSERT_EQUAL(index1->FindFunction("M4", "M4.F4", f), TUdfIndex::EStatus::NotFound);
367+
}
368+
369+
Y_UNIT_TEST(SetInsensitiveSearch) {
370+
auto index1 = MakeIntrusive<TUdfIndex>();
371+
index1->SetCaseSentiveSearch(false);
372+
auto func1 = BuildFunctionInfo("M1.FA", 1);
373+
auto func2 = BuildFunctionInfo("M1.fa", 1);
374+
auto func3 = BuildFunctionInfo("M1.g", 1);
375+
auto func4 = BuildFunctionInfo("mx.h", 1);
376+
auto func5 = BuildFunctionInfo("MX.g", 1);
377+
auto link1 = TDownloadLink::File("file1");
378+
379+
TResourceBuilder b(link1);
380+
b.AddFunction(func1);
381+
b.AddFunction(func2);
382+
b.AddFunction(func3);
383+
b.AddFunction(func4);
384+
b.AddFunction(func5);
385+
386+
index1->RegisterResource(b.Build(), TUdfIndex::EOverrideMode::RaiseError);
387+
388+
auto checkIndex = [&](auto index) {
389+
UNIT_ASSERT_EQUAL(index->ContainsModule("M1"), TUdfIndex::EStatus::Found);
390+
UNIT_ASSERT_EQUAL(index->ContainsModule("m1"), TUdfIndex::EStatus::Found);
391+
UNIT_ASSERT_EQUAL(index->ContainsModule("mx"), TUdfIndex::EStatus::Found);
392+
UNIT_ASSERT_EQUAL(index->ContainsModule("MX"), TUdfIndex::EStatus::Found);
393+
UNIT_ASSERT_EQUAL(index->ContainsModule("mX"), TUdfIndex::EStatus::Ambigious);
394+
UNIT_ASSERT_EQUAL(index->ContainsModule("M3"), TUdfIndex::EStatus::NotFound);
395+
396+
UNIT_ASSERT(index->FindResourceByModule("M3") == nullptr);
397+
auto resource1 = index->FindResourceByModule("m1");
398+
UNIT_ASSERT(resource1 != nullptr);
399+
EnsureLinksEqual(resource1->Link, link1);
400+
401+
TFunctionInfo f;
402+
UNIT_ASSERT_EQUAL(index->FindFunction("m1", "M1.FA", f), TUdfIndex::EStatus::Found);
403+
EnsureFunctionsEqual(f, func1);
404+
UNIT_ASSERT_EQUAL(index->FindFunction("m1", "m1.Fa", f), TUdfIndex::EStatus::Ambigious);
405+
UNIT_ASSERT_EQUAL(index->FindFunction("m1", "M1.fa", f), TUdfIndex::EStatus::Found);
406+
EnsureFunctionsEqual(f, func2);
407+
UNIT_ASSERT_EQUAL(index->FindFunction("m1", "m1.g", f), TUdfIndex::EStatus::Found);
408+
EnsureFunctionsEqual(f, func3);
409+
UNIT_ASSERT_EQUAL(index->FindFunction("Mx", "mx.h", f), TUdfIndex::EStatus::Ambigious);
410+
UNIT_ASSERT_EQUAL(index->FindFunction("mx", "mx.H", f), TUdfIndex::EStatus::Found);
411+
EnsureFunctionsEqual(f, func4);
412+
UNIT_ASSERT_EQUAL(index->FindFunction("MX", "mx.g", f), TUdfIndex::EStatus::Found);
413+
EnsureFunctionsEqual(f, func5);
414+
};
415+
416+
checkIndex(index1);
417+
// ensure both indexes contain the same info
418+
auto index2 = index1->Clone();
419+
checkIndex(index2);
367420
}
368421
}

ydb/library/yql/core/yql_type_annotation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ enum class EBlockEngineMode {
273273
};
274274

275275
struct TUdfCachedInfo {
276+
TString NormalizedName;
276277
const TTypeAnnotationNode* FunctionType = nullptr;
277278
const TTypeAnnotationNode* RunConfigType = nullptr;
278279
const TTypeAnnotationNode* NormalizedUserType = nullptr;

0 commit comments

Comments
 (0)