Skip to content

Commit 0d768e9

Browse files
authored
Optimize file downloading for requests with strong worker filters (#9452)
1 parent d8ee31d commit 0d768e9

File tree

4 files changed

+84
-7
lines changed

4 files changed

+84
-7
lines changed

ydb/library/yql/providers/dq/global_worker_manager/worker_filter.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,29 @@ TWorkerFilter::TWorkerFilter(const Yql::DqsProto::TWorkerFilter& filter)
2323
}
2424
}
2525

26-
TWorkerFilter::EMatchStatus TWorkerFilter::Match(const TWorkerInfo::TPtr& workerInfo, int taskId, TStats* stats) const {
27-
bool allExists = true;
28-
bool partial = false;
26+
bool TWorkerFilter::MatchHost(const NDqs::TWorkerInfo::TPtr& workerInfo) const {
2927
if (FullMatch) {
3028
if (Filter.GetClusterName() && workerInfo->ClusterName != Filter.GetClusterName()) {
31-
return EFAIL;
29+
return false;
3230
}
3331
if (!Addresses.empty() && Addresses.find(workerInfo->Address) == Addresses.end()) {
34-
return EFAIL;
32+
return false;
3533
}
3634
if (!NodeIds.empty() && NodeIds.find(workerInfo->NodeId) == NodeIds.end()) {
37-
return EFAIL;
35+
return false;
3836
}
3937
}
38+
39+
return true;
40+
}
41+
42+
TWorkerFilter::EMatchStatus TWorkerFilter::Match(const TWorkerInfo::TPtr& workerInfo, int taskId, TStats* stats) const {
43+
bool allExists = true;
44+
bool partial = false;
45+
46+
if (!MatchHost(workerInfo)) {
47+
return EFAIL;
48+
}
4049
if (Filter.GetClusterNameHint() && workerInfo->ClusterName != Filter.GetClusterNameHint()) {
4150
partial = true;
4251
}
@@ -52,7 +61,10 @@ TWorkerFilter::EMatchStatus TWorkerFilter::Match(const TWorkerInfo::TPtr& worker
5261
(*stats->WaitingResources)[id].insert(taskId);
5362
} else {
5463
(*stats->WaitingResources)[id].erase(taskId);
55-
stats->Uploaded->find(id)->second.TryCount ++;
64+
auto maybeUploadedStats = stats->Uploaded->find(id);
65+
if (maybeUploadedStats != stats->Uploaded->end()) {
66+
maybeUploadedStats->second.TryCount ++;
67+
}
5668
}
5769
}
5870
}

ydb/library/yql/providers/dq/global_worker_manager/worker_filter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ class TWorkerFilter {
4040
TWorkerFilter(const Yql::DqsProto::TWorkerFilter& filter);
4141

4242
EMatchStatus Match(const NDqs::TWorkerInfo::TPtr& workerInfo, int taskId, TStats* stats) const;
43+
// match mandatory host-specific fields like Address, NodeId, ClusterName
44+
bool MatchHost(const NDqs::TWorkerInfo::TPtr& workerInfo) const;
4345

4446
void Visit(const std::function<void(const Yql::DqsProto::TFile&)>& visitor) const;
4547

ydb/library/yql/providers/dq/global_worker_manager/workers_storage.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,9 @@ TVector<TWorkerInfo::TPtr> TWorkersStorage::TryAllocate(const NDq::IScheduler::T
437437
if (workerInfo->Stopping) {
438438
continue;
439439
}
440+
if (!filter.MatchHost(workerInfo)) {
441+
continue;
442+
}
440443
filter.Visit([&](const auto& file) {
441444
if (workerInfo->AddToDownloadList(file.GetObjectId(), file)) {
442445
YQL_CLOG(TRACE, ProviderDq) << "Added " << file.GetName() << "|" << file.GetObjectId() << " to worker's " << GetGuidAsString(workerInfo->WorkerId) << " download list" ;

ydb/library/yql/providers/dq/global_worker_manager/workers_storage_ut.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,64 @@ Y_UNIT_TEST_SUITE(WorkersBenchmark) {
7979
UNIT_ASSERT_VALUES_EQUAL(all.size(), 100);
8080
UNIT_ASSERT_VALUES_EQUAL(0, storage.FreeSlots());
8181
}
82+
83+
Y_UNIT_TEST(ScheduleDownload) {
84+
int workers = 10;
85+
TWorkersStorage storage(1, new TSensorsGroup, new TSensorsGroup);
86+
storage.Clear();
87+
for (int i = 0; i < workers; i++) {
88+
TGUID guid;
89+
Yql::DqsProto::RegisterNodeRequest request;
90+
request.SetCapacity(100);
91+
request.AddKnownNodes(1);
92+
CreateGuid(&guid);
93+
storage.CreateOrUpdate(100+i, guid, request);
94+
}
95+
96+
{
97+
auto request = NDqProto::TAllocateWorkersRequest();
98+
request.SetCount(10);
99+
100+
auto waitInfo1 = IScheduler::TWaitInfo(request, NActors::TActorId());
101+
auto result = storage.TryAllocate(waitInfo1);
102+
103+
UNIT_ASSERT_VALUES_EQUAL(result.size(), 10);
104+
}
105+
106+
{
107+
auto request = NDqProto::TAllocateWorkersRequest();
108+
auto workerFilter = Yql::DqsProto::TWorkerFilter();
109+
workerFilter.AddNodeId(102);
110+
111+
request.SetCount(10);
112+
for (ui32 i = 0; i < request.GetCount(); i++) {
113+
*request.AddWorkerFilterPerTask() = workerFilter;
114+
}
115+
auto waitInfo2 = IScheduler::TWaitInfo(request, NActors::TActorId());
116+
auto result = storage.TryAllocate(waitInfo2);
117+
UNIT_ASSERT_VALUES_EQUAL(result.size(), 10);
118+
}
119+
120+
{
121+
auto request = NDqProto::TAllocateWorkersRequest();
122+
auto workerFilter = Yql::DqsProto::TWorkerFilter();
123+
workerFilter.AddNodeId(102);
124+
Yql::DqsProto::TFile file;
125+
file.SetObjectId("fileId");
126+
file.SetLocalPath("/tmp/test");
127+
*workerFilter.AddFile() = file;
128+
request.SetCount(10);
129+
for (ui32 i = 0; i < request.GetCount(); i++) {
130+
*request.AddWorkerFilterPerTask() = workerFilter;
131+
}
132+
133+
auto waitInfo3 = IScheduler::TWaitInfo(request, NActors::TActorId());
134+
auto result = storage.TryAllocate(waitInfo3);
135+
UNIT_ASSERT_VALUES_EQUAL(result.size(), 0);
136+
137+
storage.Visit([](const NDqs::TWorkerInfo::TPtr& workerInfo) {
138+
UNIT_ASSERT(workerInfo->GetDownloadList().size() == 0 || workerInfo->NodeId == 102);
139+
});
140+
}
141+
}
82142
}

0 commit comments

Comments
 (0)