Skip to content

Commit 84a2fb2

Browse files
authored
Better plans + bad plans handling (#10820)
1 parent 63903b0 commit 84a2fb2

File tree

8 files changed

+172
-34
lines changed

8 files changed

+172
-34
lines changed

ydb/core/fq/libs/compute/common/utils.cpp

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <library/cpp/json/json_writer.h>
66
#include <library/cpp/json/yson/json2yson.h>
77
#include <ydb/library/yql/public/issue/yql_issue_message.h>
8+
#include <ydb/library/yql/public/issue/protos/issue_severity.pb.h>
89

910
namespace NFq {
1011

@@ -431,7 +432,7 @@ void EnumeratePlans(NYson::TYsonWriter& writer, NJson::TJsonValue& value, ui32&
431432
}
432433
}
433434

434-
TString GetV1StatFromV2Plan(const TString& plan, double* cpuUsage, TString* timeline, ui64 maxTimelineSize) {
435+
TString GetV1StatFromV2Plan(const TString& plan, double* cpuUsage, TString* timeline) {
435436
TStringStream out;
436437
NYson::TYsonWriter writer(&out);
437438
writer.OnBeginMap();
@@ -475,20 +476,7 @@ TString GetV1StatFromV2Plan(const TString& plan, double* cpuUsage, TString* time
475476
if (timeline) {
476477
TPlanVisualizer planViz;
477478
planViz.LoadPlans(plan);
478-
*timeline = planViz.PrintSvgSafe();
479-
if (maxTimelineSize && timeline->size() > maxTimelineSize) {
480-
TStringBuilder builder;
481-
builder
482-
<< "<svg width='600' height='200' xmlns='http://www.w3.org/2000/svg'>" << Endl
483-
<< " <text font-size='16px' x='20' y='40'>There is nothing wrong with the request.</text>" << Endl
484-
<< " <text font-size='16px' x='20' y='80'>Unfortunately, image size " << timeline->size() << " is too large.</text>" << Endl
485-
<< " <text font-size='16px' x='20' y='120'>It exceeds limit of " << maxTimelineSize << " and was discarded</text>" << Endl
486-
<< "</svg>" << Endl;
487-
*timeline = builder;
488-
}
489-
// remove json "timeline" field after migration
490-
writer.OnKeyedItem("timeline");
491-
writer.OnStringScalar(*timeline);
479+
*timeline = planViz.PrintSvg();
492480
}
493481
writer.OnEndMap();
494482
return NJson2Yson::ConvertYson2Json(out.Str());
@@ -1164,7 +1152,7 @@ struct TNoneStatProcessor : IPlanStatProcessor {
11641152
return plan;
11651153
}
11661154

1167-
TString GetQueryStat(const TString&, double& cpuUsage, TString*, ui64) override {
1155+
TString GetQueryStat(const TString&, double& cpuUsage, TString*) override {
11681156
cpuUsage = 0.0;
11691157
return "";
11701158
}
@@ -1197,8 +1185,8 @@ struct TPlanStatProcessor : IPlanStatProcessor {
11971185
return plan;
11981186
}
11991187

1200-
TString GetQueryStat(const TString& plan, double& cpuUsage, TString* timeline, ui64 maxtimelineSize) override {
1201-
return GetV1StatFromV2Plan(plan, &cpuUsage, timeline, maxtimelineSize);
1188+
TString GetQueryStat(const TString& plan, double& cpuUsage, TString* timeline) override {
1189+
return GetV1StatFromV2Plan(plan, &cpuUsage, timeline);
12021190
}
12031191

12041192
TPublicStat GetPublicStat(const TString& stat) override {
@@ -1229,8 +1217,8 @@ struct TProfileStatProcessor : TPlanStatProcessor {
12291217
};
12301218

12311219
struct TProdStatProcessor : TFullStatProcessor {
1232-
TString GetQueryStat(const TString& plan, double& cpuUsage, TString* timeline, ui64 maxtimelineSize) override {
1233-
return GetPrettyStatistics(GetV1StatFromV2Plan(plan, &cpuUsage, timeline, maxtimelineSize));
1220+
TString GetQueryStat(const TString& plan, double& cpuUsage, TString* timeline) override {
1221+
return GetPrettyStatistics(GetV1StatFromV2Plan(plan, &cpuUsage, timeline));
12341222
}
12351223
};
12361224

@@ -1263,10 +1251,18 @@ Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(
12631251
) {
12641252
Fq::Private::PingTaskRequest pingTaskRequest = Build(queryStats);
12651253

1254+
// Application-level issues, pass as is
12661255
if (issues) {
12671256
NYql::IssuesToMessage(issues, pingTaskRequest.mutable_issues());
12681257
}
12691258

1259+
// Builder own (internal) issues will be logged later, just warn the user
1260+
if (Issues) {
1261+
auto* issue = pingTaskRequest.add_issues();
1262+
issue->set_message("There are minor issues with query statistics processing. You can supply query ID and ask support for the information.");
1263+
issue->set_severity(NYql::TSeverityIds::S_WARNING);
1264+
}
1265+
12701266
if (computeStatus) {
12711267
pingTaskRequest.set_status(*computeStatus);
12721268
}
@@ -1318,7 +1314,13 @@ Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(const TString& queryP
13181314
CpuUsage = 0.0;
13191315
try {
13201316
TString timeline;
1321-
auto stat = Processor->GetQueryStat(plan, CpuUsage, ShowQueryTimeline ? &timeline : nullptr, MaxQueryTimelineSize);
1317+
auto stat = Processor->GetQueryStat(plan, CpuUsage, ShowQueryTimeline ? &timeline : nullptr);
1318+
1319+
if (MaxQueryTimelineSize && timeline.size() > MaxQueryTimelineSize) {
1320+
Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Timeline size " << timeline.size() << " exceeds limit of " << MaxQueryTimelineSize));
1321+
timeline = "";
1322+
}
1323+
13221324
pingTaskRequest.set_statistics(stat);
13231325
pingTaskRequest.set_dump_raw_statistics(true);
13241326
if (timeline) {
@@ -1329,8 +1331,8 @@ Fq::Private::PingTaskRequest PingTaskRequestBuilder::Build(const TString& queryP
13291331
flatStat["ComputeTimeUs"] = computeTimeUs;
13301332
SerializeStats(*pingTaskRequest.mutable_flat_stats(), flatStat);
13311333
PublicStat = Processor->GetPublicStat(stat);
1332-
} catch(const NJson::TJsonException& ex) {
1333-
Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Error stat conversion: " << ex.what()));
1334+
} catch (const std::exception& e) {
1335+
Issues.AddIssue(NYql::TIssue(TStringBuilder() << "Error stat processing: " << e.what()));
13341336
}
13351337

13361338
return pingTaskRequest;

ydb/core/fq/libs/compute/common/utils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ inline std::shared_ptr<NYdb::NTable::TTableClient> CreateNewTableClient(const TS
2828
tableSettings);
2929
}
3030

31-
TString GetV1StatFromV2Plan(const TString& plan, double* cpuUsage = nullptr, TString* timeline = nullptr, ui64 maxTimelineSize = 0);
31+
TString GetV1StatFromV2Plan(const TString& plan, double* cpuUsage = nullptr, TString* timeline = nullptr);
3232
TString GetV1StatFromV2PlanV2(const TString& plan);
3333
TString GetPrettyStatistics(const TString& statistics);
3434
THashMap<TString, i64> AggregateStats(TStringBuf plan);
@@ -55,7 +55,7 @@ struct IPlanStatProcessor {
5555
virtual Ydb::Query::StatsMode GetStatsMode() = 0;
5656
virtual TString ConvertPlan(const TString& plan) = 0;
5757
virtual TString GetPlanVisualization(const TString& plan) = 0;
58-
virtual TString GetQueryStat(const TString& plan, double& cpuUsage, TString* timeline, ui64 maxtimelineSize) = 0;
58+
virtual TString GetQueryStat(const TString& plan, double& cpuUsage, TString* timeline) = 0;
5959
virtual TPublicStat GetPublicStat(const TString& stat) = 0;
6060
virtual THashMap<TString, i64> GetFlatStat(TStringBuf plan) = 0;
6161
};

ydb/core/fq/libs/compute/common/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ PEERDIR(
1414
ydb/core/fq/libs/grpc
1515
ydb/core/fq/libs/shared_resources
1616
ydb/library/yql/public/issue
17+
ydb/library/yql/public/issue/protos
1718
ydb/library/yql/providers/common/http_gateway
1819
ydb/library/yql/providers/dq/provider
1920
ydb/library/yql/providers/generic/connector/api/service/protos

ydb/core/fq/libs/compute/ydb/status_tracker_actor.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ class TStatusTrackerActor : public TBaseComputeActor<TStatusTrackerActor> {
228228
Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues);
229229
if (Builder.Issues) {
230230
LOG_W(Builder.Issues.ToOneLineString());
231+
GetStepCountersSubgroup()->GetCounter("StatIssues", true)->Inc();
231232
}
232233
ReportPublicCounters(Builder.PublicStat);
233234
Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest), 0, 1);
@@ -248,6 +249,7 @@ class TStatusTrackerActor : public TBaseComputeActor<TStatusTrackerActor> {
248249
Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues, std::nullopt, StatusCode);
249250
if (Builder.Issues) {
250251
LOG_W(Builder.Issues.ToOneLineString());
252+
GetStepCountersSubgroup()->GetCounter("StatIssues", true)->Inc();
251253
}
252254
ReportPublicCounters(Builder.PublicStat);
253255
UpdateCpuQuota(Builder.CpuUsage);
@@ -263,6 +265,7 @@ class TStatusTrackerActor : public TBaseComputeActor<TStatusTrackerActor> {
263265
Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(QueryStats, Issues, ComputeStatus, std::nullopt);
264266
if (Builder.Issues) {
265267
LOG_W(Builder.Issues.ToOneLineString());
268+
GetStepCountersSubgroup()->GetCounter("StatIssues", true)->Inc();
266269
}
267270
ReportPublicCounters(Builder.PublicStat);
268271
UpdateCpuQuota(Builder.CpuUsage);

ydb/core/fq/libs/compute/ydb/stopper_actor.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class TStopperActor : public TBaseComputeActor<TStopperActor> {
136136
Fq::Private::PingTaskRequest pingTaskRequest = Builder.Build(response.QueryStats, response.Issues, FederatedQuery::QueryMeta::ABORTING_BY_USER, statusCode);
137137
if (Builder.Issues) {
138138
LOG_W(Builder.Issues.ToOneLineString());
139+
GetStepCountersSubgroup()->GetCounter("StatIssues", true)->Inc();
139140
}
140141
Send(Pinger, new TEvents::TEvForwardPingRequest(pingTaskRequest));
141142
}

ydb/public/lib/ydb_cli/common/plan2svg.cpp

Lines changed: 138 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ void TPlan::LoadStage(std::shared_ptr<TStage> stage, const NJson::TJsonValue& no
451451

452452
TStringBuilder builder;
453453

454-
if (name == "Iterator" || name == "Member") {
454+
if (name == "Iterator" || name == "Member" || name == "ToFlow") {
455455
builder << "Reference";
456456
} else {
457457
builder << name;
@@ -461,11 +461,25 @@ void TPlan::LoadStage(std::shared_ptr<TStage> stage, const NJson::TJsonValue& no
461461
if (auto* limitNode = subNode.GetValueByPath("Limit")) {
462462
builder << ": " << limitNode->GetStringSafe();
463463
}
464+
} else if (name == "Sort") {
465+
if (auto* sortByNode = subNode.GetValueByPath("SortBy")) {
466+
auto sortBy = sortByNode->GetStringSafe();
467+
while (true) {
468+
auto p = sortBy.find("row.");
469+
if (p == sortBy.npos) {
470+
break;
471+
}
472+
sortBy.erase(p, 4);
473+
}
474+
if (sortBy) {
475+
builder << " by " << sortBy;
476+
}
477+
}
464478
} else if (name == "Filter") {
465479
if (auto* predicateNode = subNode.GetValueByPath("Predicate")) {
466480
auto filter = predicateNode->GetStringSafe();
467481
prevFilter = filter;
468-
while(true) {
482+
while (true) {
469483
auto p = filter.find("item.");
470484
if (p == filter.npos) {
471485
break;
@@ -482,14 +496,110 @@ void TPlan::LoadStage(std::shared_ptr<TStage> stage, const NJson::TJsonValue& no
482496
}
483497
builder << ": " << filter;
484498
}
485-
} else if (name == "TopSort") {
499+
} else if (name == "Aggregate") {
500+
if (auto* aggregationNode = subNode.GetValueByPath("Aggregation")) {
501+
auto aggr = aggregationNode->GetStringSafe();
502+
if (aggr) {
503+
if (aggr.StartsWith("{")) {
504+
aggr.erase(aggr.begin());
505+
}
506+
if (aggr.EndsWith("}")) {
507+
aggr.erase(aggr.end() - 1);
508+
}
509+
while (true) {
510+
auto p = aggr.find("_yql_agg_");
511+
if (p == aggr.npos) {
512+
break;
513+
}
514+
auto l = 9;
515+
auto p1 = aggr.begin() + p + l;
516+
while (p1 != aggr.end() && *p1 >= '0' && *p1 <= '9') {
517+
p1++;
518+
l++;
519+
}
520+
auto yqlAgg = aggr.substr(p, l);
521+
if (p1 != aggr.end() && *p1 == ':') {
522+
p1++;
523+
l++;
524+
if (p1 != aggr.end() && *p1 == ' ') {
525+
p1++;
526+
l++;
527+
}
528+
}
529+
aggr.erase(p, l);
530+
531+
auto extraChars = 7;
532+
p = aggr.find(",state." + yqlAgg);
533+
if (p == aggr.npos) {
534+
p = aggr.find("state." + yqlAgg + ",");
535+
}
536+
if (p == aggr.npos) {
537+
p = aggr.find("state." + yqlAgg);
538+
extraChars = 6;
539+
}
540+
if (p != aggr.npos) {
541+
aggr.erase(p, yqlAgg.size() + extraChars);
542+
}
543+
}
544+
while (true) {
545+
auto p = aggr.find("item.");
546+
if (p == aggr.npos) {
547+
break;
548+
}
549+
aggr.erase(p, 5);
550+
}
551+
builder << " " << aggr;
552+
}
553+
}
554+
if (auto* groupByNode = subNode.GetValueByPath("GroupBy")) {
555+
auto groupBy = groupByNode->GetStringSafe();
556+
while (true) {
557+
auto p = groupBy.find("item.");
558+
if (p == groupBy.npos) {
559+
break;
560+
}
561+
groupBy.erase(p, 5);
562+
}
563+
if (groupBy) {
564+
builder << ", Group By: " << groupBy;
565+
}
566+
}
567+
} else if (name == "TableFullScan") {
568+
if (auto* tableNode = subNode.GetValueByPath("Table")) {
569+
auto table = tableNode->GetStringSafe();
570+
auto n = table.find_last_of('/');
571+
if (n != table.npos) {
572+
table = table.substr(n + 1);
573+
}
574+
builder << " " << table;
575+
}
576+
builder << "(";
577+
if (auto* readColumnsNode = subNode.GetValueByPath("ReadColumns")) {
578+
bool firstColumn = true;
579+
for (const auto& subNode : readColumnsNode->GetArray()) {
580+
if (firstColumn) {
581+
firstColumn = false;
582+
} else {
583+
builder << ", ";
584+
}
585+
builder << subNode.GetStringSafe();
586+
}
587+
}
588+
builder << ")";
589+
} else if (name == "TopSort" || name == "Top") {
486590
if (auto* limitNode = subNode.GetValueByPath("Limit")) {
487-
builder << ", Limit: " << limitNode->GetStringSafe();
591+
auto limit = limitNode->GetStringSafe();
592+
if (limit) {
593+
builder << ", Limit: " << limit;
594+
}
488595
}
489596
if (auto* topSortByNode = subNode.GetValueByPath("TopSortBy")) {
490-
builder << ", TopSortBy: " << topSortByNode->GetStringSafe();
597+
auto topSortBy = topSortByNode->GetStringSafe();
598+
if (topSortBy) {
599+
builder << ", TopSortBy: " << topSortBy;
600+
}
491601
}
492-
} else if (name == "Iterator" || name == "Member") {
602+
} else if (name == "Iterator" || name == "Member" || name == "ToFlow") {
493603
if (auto* referenceNode = subNode.GetValueByPath(name)) {
494604
auto referenceName = referenceNode->GetStringSafe();
495605
references.insert(referenceName);
@@ -640,6 +750,7 @@ void TPlan::LoadStage(std::shared_ptr<TStage> stage, const NJson::TJsonValue& no
640750
}
641751
if (planNodeType == "Connection") {
642752
auto* keyColumnsNode = plan.GetValueByPath("KeyColumns");
753+
auto* sortColumnsNode = plan.GetValueByPath("SortColumns");
643754
if (auto* subNode = plan.GetValueByPath("Plans")) {
644755
for (auto& plan : subNode->GetArray()) {
645756
TString nodeType;
@@ -659,6 +770,11 @@ void TPlan::LoadStage(std::shared_ptr<TStage> stage, const NJson::TJsonValue& no
659770
stage->Connections.back()->KeyColumns.push_back(keyColumn.GetStringSafe());
660771
}
661772
}
773+
if (sortColumnsNode) {
774+
for (auto& sortColumn : sortColumnsNode->GetArray()) {
775+
stage->Connections.back()->SortColumns.push_back(sortColumn.GetStringSafe());
776+
}
777+
}
662778

663779
if (auto* planNodeIdNode = plan.GetValueByPath("PlanNodeId")) {
664780
auto planNodeId = planNodeIdNode->GetStringRobust();
@@ -758,8 +874,9 @@ void TPlan::LoadSource(std::shared_ptr<TSource> source, const NJson::TJsonValue&
758874
builder << " " << sourceTypeNode->GetStringSafe();
759875
}
760876
if (auto* nameNode = subNode.GetValueByPath("Name")) {
761-
builder << " " << nameNode->GetStringSafe() << "(";
877+
builder << " " << nameNode->GetStringSafe();
762878
}
879+
builder << "(";
763880
if (auto* readColumnsNode = subNode.GetValueByPath("ReadColumns")) {
764881
bool firstColumn = true;
765882
for (const auto& subNode : readColumnsNode->GetArray()) {
@@ -1038,8 +1155,9 @@ void TPlan::PrintSvg(ui64 maxTime, ui32& offsetY, TStringBuilder& background, TS
10381155
if (!s->Info.empty()) {
10391156
for (auto text : s->Info) {
10401157
canvas
1158+
<< "<g><title>" << text << "</title>"
10411159
<< "<text clip-path='url(#clipTextPath)' font-family='Verdana' font-size='" << INTERNAL_TEXT_HEIGHT << "px' fill='" << Config.Palette.StageText << "' x='" << s->IndentX + INTERNAL_WIDTH + 2
1042-
<< "' y='" << y0 << "'>" << text << "</text>" << Endl;
1160+
<< "' y='" << y0 << "'>" << text << "</text>" << "</g>" << Endl;
10431161
y0 += (INTERNAL_TEXT_HEIGHT + INTERNAL_GAP_Y);
10441162
}
10451163
} else {
@@ -1295,6 +1413,18 @@ void TPlan::PrintSvg(ui64 maxTime, ui32& offsetY, TStringBuilder& background, TS
12951413
canvas << k;
12961414
}
12971415
}
1416+
if (!c->SortColumns.empty()) {
1417+
canvas << " SortColumns: ";
1418+
bool first = true;
1419+
for (auto s : c->SortColumns) {
1420+
if (first) {
1421+
first = false;
1422+
} else {
1423+
canvas << ", ";
1424+
}
1425+
canvas << s;
1426+
}
1427+
}
12981428
canvas
12991429
<< "</title>" << Endl
13001430
<< " <polygon points='" << x + INTERNAL_WIDTH << "," << y + INTERNAL_HEIGHT << " "

ydb/public/lib/ydb_cli/common/plan2svg.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class TConnection {
9090
std::shared_ptr<TSingleMetric> InputBytes;
9191
std::shared_ptr<TSingleMetric> InputRows;
9292
std::vector<std::string> KeyColumns;
93+
std::vector<std::string> SortColumns;
9394
bool CteConnection = false;
9495
ui32 CteIndentX = 0;
9596
ui32 CteOffsetY = 0;

0 commit comments

Comments
 (0)