Skip to content

Commit 4ee430b

Browse files
authored
Merge stable 24 3 (#8545)
1 parent 89e6dec commit 4ee430b

File tree

6 files changed

+117
-31
lines changed

6 files changed

+117
-31
lines changed

ydb/core/kqp/executer_actor/kqp_planner.cpp

Lines changed: 83 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ bool TKqpPlanner::UseMockEmptyPlanner = false;
6060
// Task can allocate extra memory during execution.
6161
// So, we estimate total memory amount required for task as apriori task size multiplied by this constant.
6262
constexpr ui32 MEMORY_ESTIMATION_OVERFLOW = 2;
63-
constexpr ui32 MAX_NON_PARALLEL_TASKS_EXECUTION_LIMIT = 8;
6463

6564
TKqpPlanner::TKqpPlanner(TKqpPlanner::TArgs&& args)
6665
: TxId(args.TxId)
@@ -256,9 +255,18 @@ std::unique_ptr<IEventHandle> TKqpPlanner::AssignTasksToNodes() {
256255

257256
auto localResources = ResourceManager_->GetLocalResources();
258257
Y_UNUSED(MEMORY_ESTIMATION_OVERFLOW);
258+
259+
auto placingOptions = ResourceManager_->GetPlacingOptions();
260+
261+
bool singleNodeExecutionMakeSence = (
262+
ResourceEstimations.size() <= placingOptions.MaxNonParallelTasksExecutionLimit ||
263+
// all readers are located on the one node.
264+
TasksPerNode.size() == 1
265+
);
266+
259267
if (LocalRunMemoryEst * MEMORY_ESTIMATION_OVERFLOW <= localResources.Memory[NRm::EKqpMemoryPool::ScanQuery] &&
260268
ResourceEstimations.size() <= localResources.ExecutionUnits &&
261-
ResourceEstimations.size() <= MAX_NON_PARALLEL_TASKS_EXECUTION_LIMIT)
269+
singleNodeExecutionMakeSence)
262270
{
263271
ui64 selfNodeId = ExecuterId.NodeId();
264272
for(ui64 taskId: ComputeTasks) {
@@ -293,47 +301,100 @@ std::unique_ptr<IEventHandle> TKqpPlanner::AssignTasksToNodes() {
293301
return std::make_unique<IEventHandle>(ExecuterId, ExecuterId, ev.Release());
294302
}
295303

304+
std::vector<ui64> deepestTasks;
305+
ui64 maxLevel = 0;
306+
for(auto& task: TasksGraph.GetTasks()) {
307+
// const auto& task = TasksGraph.GetTask(taskId);
308+
const auto& stageInfo = TasksGraph.GetStageInfo(task.StageId);
309+
const NKqpProto::TKqpPhyStage& stage = stageInfo.Meta.GetStage(stageInfo.Id);
310+
const ui64 stageLevel = stage.GetProgram().GetSettings().GetStageLevel();
311+
312+
if (stageLevel > maxLevel) {
313+
maxLevel = stageLevel;
314+
deepestTasks.clear();
315+
}
316+
317+
if (stageLevel == maxLevel) {
318+
deepestTasks.push_back(task.Id);
319+
}
320+
}
321+
322+
THashMap<ui64, ui64> alreadyAssigned;
323+
for(auto& [nodeId, tasks] : TasksPerNode) {
324+
for(ui64 taskId: tasks) {
325+
alreadyAssigned.emplace(taskId, nodeId);
326+
}
327+
}
328+
329+
if (deepestTasks.size() <= placingOptions.MaxNonParallelTopStageExecutionLimit) {
330+
// looks like the merge / union all connection
331+
for(ui64 taskId: deepestTasks) {
332+
auto [it, success] = alreadyAssigned.emplace(taskId, ExecuterId.NodeId());
333+
if (success) {
334+
TasksPerNode[ExecuterId.NodeId()].push_back(taskId);
335+
}
336+
}
337+
}
338+
296339
auto planner = (UseMockEmptyPlanner ? CreateKqpMockEmptyPlanner() : CreateKqpGreedyPlanner()); // KqpMockEmptyPlanner is a mock planner for tests
297340

298341
auto ctx = TlsActivationContext->AsActorContext();
299342
if (ctx.LoggerSettings() && ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::KQP_EXECUTER)) {
300343
planner->SetLogFunc([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); });
301344
}
302345

303-
THashMap<ui64, size_t> nodeIdtoIdx;
304-
for (size_t idx = 0; idx < ResourcesSnapshot.size(); ++idx) {
305-
nodeIdtoIdx[ResourcesSnapshot[idx].nodeid()] = idx;
306-
}
307-
308346
LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); });
309347

310-
auto plan = planner->Plan(ResourcesSnapshot, ResourceEstimations);
348+
ui64 selfNodeId = ExecuterId.NodeId();
349+
TString selfNodeDC;
311350

312-
THashMap<ui64, ui64> alreadyAssigned;
313-
for(auto& [nodeId, tasks] : TasksPerNode) {
314-
for(ui64 taskId: tasks) {
315-
alreadyAssigned.emplace(taskId, nodeId);
351+
TVector<const NKikimrKqp::TKqpNodeResources*> allNodes;
352+
TVector<const NKikimrKqp::TKqpNodeResources*> executerDcNodes;
353+
allNodes.reserve(ResourcesSnapshot.size());
354+
355+
for(auto& snapNode: ResourcesSnapshot) {
356+
const TString& dc = snapNode.GetKqpProxyNodeResources().GetDataCenterId();
357+
if (snapNode.GetNodeId() == selfNodeId) {
358+
selfNodeDC = dc;
359+
break;
316360
}
317361
}
318362

319-
if (!plan.empty()) {
320-
for (auto& group : plan) {
321-
for(ui64 taskId: group.TaskIds) {
322-
auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId);
323-
if (success) {
324-
TasksPerNode[group.NodeId].push_back(taskId);
325-
}
326-
}
363+
for(auto& snapNode: ResourcesSnapshot) {
364+
allNodes.push_back(&snapNode);
365+
if (selfNodeDC == snapNode.GetKqpProxyNodeResources().GetDataCenterId()) {
366+
executerDcNodes.push_back(&snapNode);
327367
}
368+
}
328369

329-
return nullptr;
330-
} else {
370+
TVector<IKqpPlannerStrategy::TResult> plan;
371+
372+
if (!executerDcNodes.empty() && placingOptions.PreferLocalDatacenterExecution) {
373+
plan = planner->Plan(executerDcNodes, ResourceEstimations);
374+
}
375+
376+
if (plan.empty()) {
377+
plan = planner->Plan(allNodes, ResourceEstimations);
378+
}
379+
380+
if (plan.empty()) {
331381
LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_E(msg); });
332382

333383
auto ev = MakeHolder<TEvKqp::TEvAbortExecution>(NYql::NDqProto::StatusIds::PRECONDITION_FAILED,
334384
TStringBuilder() << "Not enough resources to execute query. " << "TraceId: " << UserRequestContext->TraceId);
335385
return std::make_unique<IEventHandle>(ExecuterId, ExecuterId, ev.Release());
336386
}
387+
388+
for (auto& group : plan) {
389+
for(ui64 taskId: group.TaskIds) {
390+
auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId);
391+
if (success) {
392+
TasksPerNode[group.NodeId].push_back(taskId);
393+
}
394+
}
395+
}
396+
397+
return nullptr;
337398
}
338399

339400
const IKqpGateway::TKqpSnapshot& TKqpPlanner::GetSnapshot() const {

ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,16 @@ class TNodesManager {
9090
return result;
9191
}
9292

93-
TNodesManager(const TVector<NKikimrKqp::TKqpNodeResources>& nodeResources) {
93+
TNodesManager(const TVector<const NKikimrKqp::TKqpNodeResources*>& nodeResources) {
9494
for (auto& node : nodeResources) {
95-
if (!node.GetAvailableComputeActors()) {
95+
if (!node->GetAvailableComputeActors()) {
9696
continue;
9797
}
9898
Nodes.emplace_back(TNodeDesc{
99-
node.GetNodeId(),
100-
ActorIdFromProto(node.GetResourceManagerActorId()),
101-
node.GetTotalMemory() - node.GetUsedMemory(),
102-
node.GetAvailableComputeActors(),
99+
node->GetNodeId(),
100+
ActorIdFromProto(node->GetResourceManagerActorId()),
101+
node->GetTotalMemory() - node->GetUsedMemory(),
102+
node->GetAvailableComputeActors(),
103103
{}
104104
});
105105
}
@@ -111,7 +111,7 @@ class TKqpGreedyPlanner : public IKqpPlannerStrategy {
111111
public:
112112
~TKqpGreedyPlanner() override {}
113113

114-
TVector<TResult> Plan(const TVector<NKikimrKqp::TKqpNodeResources>& nodeResources,
114+
TVector<TResult> Plan(const TVector<const NKikimrKqp::TKqpNodeResources*>& nodeResources,
115115
const TVector<TTaskResourceEstimation>& tasks) override
116116
{
117117
TVector<TResult> result;
@@ -161,7 +161,7 @@ class TKqpMockEmptyPlanner : public IKqpPlannerStrategy {
161161
public:
162162
~TKqpMockEmptyPlanner() override {}
163163

164-
TVector<TResult> Plan(const TVector<NKikimrKqp::TKqpNodeResources>&,
164+
TVector<TResult> Plan(const TVector<const NKikimrKqp::TKqpNodeResources*>&,
165165
const TVector<TTaskResourceEstimation>&) override
166166
{
167167
return {};

ydb/core/kqp/executer_actor/kqp_planner_strategy.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class IKqpPlannerStrategy {
2323
TVector<ui64> TaskIds;
2424
};
2525

26-
virtual TVector<TResult> Plan(const TVector<NKikimrKqp::TKqpNodeResources>& nodeResources,
26+
virtual TVector<TResult> Plan(const TVector<const NKikimrKqp::TKqpNodeResources*>& nodeResources,
2727
const TVector<TTaskResourceEstimation>& estimatedResources) = 0;
2828

2929
protected:

ydb/core/kqp/rm_service/kqp_rm_service.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,14 @@ class TKqpResourceManager : public IKqpResourceManager {
142142
return Counters;
143143
}
144144

145+
TPlannerPlacingOptions GetPlacingOptions() override {
146+
return TPlannerPlacingOptions{
147+
.MaxNonParallelTasksExecutionLimit = MaxNonParallelTasksExecutionLimit.load(),
148+
.MaxNonParallelTopStageExecutionLimit = MaxNonParallelTopStageExecutionLimit.load(),
149+
.PreferLocalDatacenterExecution = PreferLocalDatacenterExecution.load(),
150+
};
151+
}
152+
145153
void CreateResourceInfoExchanger(
146154
const NKikimrConfig::TTableServiceConfig::TResourceManager::TInfoExchangerSettings& settings) {
147155
PublishResourcesByExchanger = true;
@@ -414,6 +422,9 @@ class TKqpResourceManager : public IKqpResourceManager {
414422
MinChannelBufferSize.store(config.GetMinChannelBufferSize());
415423
MaxTotalChannelBuffersSize.store(config.GetMaxTotalChannelBuffersSize());
416424
QueryMemoryLimit.store(config.GetQueryMemoryLimit());
425+
MaxNonParallelTopStageExecutionLimit.store(config.GetMaxNonParallelTopStageExecutionLimit());
426+
MaxNonParallelTasksExecutionLimit.store(config.GetMaxNonParallelTasksExecutionLimit());
427+
PreferLocalDatacenterExecution.store(config.GetPreferLocalDatacenterExecution());
417428
}
418429

419430
ui32 GetNodeId() override {
@@ -460,6 +471,9 @@ class TKqpResourceManager : public IKqpResourceManager {
460471
std::atomic<i32> ExecutionUnitsLimit;
461472
TLimitedResource<ui64> ScanQueryMemoryResource;
462473
std::atomic<i64> ExternalDataQueryMemory = 0;
474+
std::atomic<ui64> MaxNonParallelTopStageExecutionLimit = 1;
475+
std::atomic<ui64> MaxNonParallelTasksExecutionLimit = 8;
476+
std::atomic<bool> PreferLocalDatacenterExecution = true;
463477

464478
// current state
465479
std::atomic<ui64> LastResourceBrokerTaskId = 0;

ydb/core/kqp/rm_service/kqp_rm_service.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,12 @@ struct TKqpLocalNodeResources {
202202
std::array<ui64, EKqpMemoryPool::Count> Memory;
203203
};
204204

205+
struct TPlannerPlacingOptions {
206+
ui64 MaxNonParallelTasksExecutionLimit = 8;
207+
ui64 MaxNonParallelTopStageExecutionLimit = 1;
208+
bool PreferLocalDatacenterExecution = true;
209+
};
210+
205211
/// per node singleton with instant API
206212
class IKqpResourceManager : private TNonCopyable {
207213
public:
@@ -211,6 +217,7 @@ class IKqpResourceManager : private TNonCopyable {
211217

212218
virtual TKqpRMAllocateResult AllocateResources(TIntrusivePtr<TTxState>& tx, TIntrusivePtr<TTaskState>& task, const TKqpResourcesRequest& resources) = 0;
213219

220+
virtual TPlannerPlacingOptions GetPlacingOptions() = 0;
214221
virtual TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, const ui32 tasksCount) = 0;
215222
virtual void EstimateTaskResources(TTaskResourceEstimation& result, const ui32 tasksCount) = 0;
216223

ydb/core/protos/table_service_config.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ message TTableServiceConfig {
4646

4747
optional uint64 MinMemAllocSize = 23 [default = 8388608]; // 8 MiB
4848
optional uint64 MinMemFreeSize = 24 [default = 33554432]; // 32 MiB
49+
50+
optional uint64 MaxNonParallelTasksExecutionLimit = 25 [default = 8];
51+
optional uint64 MaxNonParallelTopStageExecutionLimit = 26 [default = 1];
52+
optional bool PreferLocalDatacenterExecution = 27 [ default = true ];
4953
}
5054

5155
message TSpillingServiceConfig {

0 commit comments

Comments
 (0)