@@ -12,8 +12,12 @@ ui64 NonZeroMin(ui64 a, ui64 b) {
12
12
return (b == 0 ) ? a : ((a == 0 || a > b) ? b : a);
13
13
}
14
14
15
- void TTimeSeriesStats::ExportAggStats (ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats) {
15
+ void TTimeSeriesStats::ExportAggStats (NYql::NDqProto::TDqStatsAggr& stats) {
16
16
NKikimr::NKqp::ExportAggStats (Values, stats);
17
+ }
18
+
19
+ void TTimeSeriesStats::ExportAggStats (ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats) {
20
+ ExportAggStats (stats);
17
21
ExportHistory (baseTimeMs, stats);
18
22
}
19
23
@@ -28,16 +32,21 @@ void TTimeSeriesStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqStatsAg
28
32
}
29
33
}
30
34
31
- void TTimeSeriesStats::Resize (ui32 taskCount ) {
32
- Values.resize (taskCount );
35
+ void TTimeSeriesStats::Resize (ui32 count ) {
36
+ Values.resize (count );
33
37
}
34
38
35
- void TTimeSeriesStats::SetNonZero (ui32 taskIndex , ui64 value) {
39
+ void TTimeSeriesStats::SetNonZero (ui32 index , ui64 value) {
36
40
if (value) {
41
+ Y_ASSERT (index < Values.size ());
37
42
Sum += value;
38
- Sum -= Values[taskIndex];
39
- Values[taskIndex] = value;
43
+ Sum -= Values[index];
44
+ Values[index] = value;
45
+ AppendHistory ();
40
46
}
47
+ }
48
+
49
+ void TTimeSeriesStats::AppendHistory () {
41
50
if (HistorySampleCount) {
42
51
auto nowMs = Now ().MilliSeconds ();
43
52
@@ -97,6 +106,69 @@ void TTimeSeriesStats::Pack() {
97
106
}
98
107
}
99
108
109
+ void TPartitionedStats::ResizeByTasks (ui32 taskCount) {
110
+ for (auto & p : Parts) {
111
+ p.resize (taskCount);
112
+ }
113
+ }
114
+
115
+ void TPartitionedStats::ResizeByParts (ui32 partCount, ui32 taskCount) {
116
+ auto oldPartCount = Parts.size ();
117
+ Parts.resize (partCount);
118
+ for (auto i = oldPartCount; i < partCount; i++) {
119
+ Parts[i].resize (taskCount);
120
+ }
121
+ Resize (partCount);
122
+ }
123
+
124
+ void TPartitionedStats::SetNonZero (ui32 taskIndex, ui32 partIndex, ui64 value, bool recordTimeSeries) {
125
+ if (value) {
126
+ Y_ASSERT (partIndex < Parts.size ());
127
+ auto & part = Parts[partIndex];
128
+ auto delta = value - part[taskIndex];
129
+ Y_ASSERT (taskIndex < part.size ());
130
+ part[taskIndex] = value;
131
+ Y_ASSERT (partIndex < Values.size ());
132
+ Values[partIndex] += delta;
133
+ Sum += delta;
134
+ if (recordTimeSeries) {
135
+ AppendHistory ();
136
+ }
137
+ }
138
+ }
139
+
140
+ void TTimeMultiSeriesStats::SetNonZero (TPartitionedStats& stats, ui32 taskIndex, const TString& key, ui64 value, bool recordTimeSeries) {
141
+ auto [it, inserted] = Indices.try_emplace (key);
142
+ if (inserted) {
143
+ it->second = Indices.size () - 1 ;
144
+ if (PartCount < Indices.size ()) {
145
+ PartCount += 4 ;
146
+ }
147
+ }
148
+ if (stats.Parts .size () < PartCount) {
149
+ stats.ResizeByParts (PartCount, TaskCount);
150
+ }
151
+ stats.SetNonZero (taskIndex, it->second , value, recordTimeSeries);
152
+ }
153
+
154
+ void TExternalStats::Resize (ui32 taskCount) {
155
+ ExternalRows.ResizeByTasks (taskCount);
156
+ ExternalBytes.ResizeByTasks (taskCount);
157
+ FirstMessageMs.ResizeByTasks (taskCount);
158
+ LastMessageMs.ResizeByTasks (taskCount);
159
+ TaskCount = taskCount;
160
+ }
161
+
162
+ void TExternalStats::SetHistorySampleCount (ui32 historySampleCount) {
163
+ ExternalBytes.HistorySampleCount = historySampleCount;
164
+ }
165
+
166
+ void TExternalStats::ExportHistory (ui64 baseTimeMs, NDqProto::TDqExternalAggrStats& stats) {
167
+ if (stats.HasExternalBytes ()) {
168
+ ExternalBytes.ExportHistory (baseTimeMs, *stats.MutableExternalBytes ());
169
+ }
170
+ }
171
+
100
172
void TAsyncStats::Resize (ui32 taskCount) {
101
173
Bytes.Resize (taskCount);
102
174
DecompressedBytes.resize (taskCount);
@@ -127,20 +199,25 @@ void TAsyncStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqAsyncStatsAg
127
199
}
128
200
129
201
void TAsyncBufferStats::Resize (ui32 taskCount) {
202
+ External.Resize (taskCount);
130
203
Ingress.Resize (taskCount);
131
204
Push.Resize (taskCount);
132
205
Pop.Resize (taskCount);
133
206
Egress.Resize (taskCount);
134
207
}
135
208
136
209
void TAsyncBufferStats::SetHistorySampleCount (ui32 historySampleCount) {
210
+ External.SetHistorySampleCount (historySampleCount);
137
211
Ingress.SetHistorySampleCount (historySampleCount);
138
212
Push.SetHistorySampleCount (historySampleCount);
139
213
Pop.SetHistorySampleCount (historySampleCount);
140
214
Egress.SetHistorySampleCount (historySampleCount);
141
215
}
142
216
143
217
void TAsyncBufferStats::ExportHistory (ui64 baseTimeMs, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) {
218
+ if (stats.HasExternal ()) {
219
+ External.ExportHistory (baseTimeMs, *stats.MutableExternal ());
220
+ }
144
221
if (stats.HasIngress ()) {
145
222
Ingress.ExportHistory (baseTimeMs, *stats.MutableIngress ());
146
223
}
@@ -281,40 +358,46 @@ void TStageExecutionStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqSta
281
358
}
282
359
}
283
360
284
- void SetNonZero (ui64& target, ui64 source) {
361
+ inline void SetNonZero (ui64& target, ui64 source) {
285
362
if (source) {
286
363
target = source;
287
364
}
288
365
}
289
366
367
+ inline void SetNonZero (std::vector<ui64>& vector, ui32 index, ui64 value) {
368
+ Y_ASSERT (index < vector.size ());
369
+ SetNonZero (vector[index], value);
370
+ }
371
+
290
372
ui64 TStageExecutionStats::UpdateAsyncStats (ui32 index, TAsyncStats& aggrAsyncStats, const NYql::NDqProto::TDqAsyncBufferStats& asyncStats) {
291
373
ui64 baseTimeMs = 0 ;
292
374
293
375
aggrAsyncStats.Bytes .SetNonZero (index, asyncStats.GetBytes ());
294
- SetNonZero (aggrAsyncStats.DecompressedBytes [ index] , asyncStats.GetDecompressedBytes ());
295
- SetNonZero (aggrAsyncStats.Rows [ index] , asyncStats.GetRows ());
296
- SetNonZero (aggrAsyncStats.Chunks [ index] , asyncStats.GetChunks ());
297
- SetNonZero (aggrAsyncStats.Splits [ index] , asyncStats.GetSplits ());
376
+ SetNonZero (aggrAsyncStats.DecompressedBytes , index, asyncStats.GetDecompressedBytes ());
377
+ SetNonZero (aggrAsyncStats.Rows , index, asyncStats.GetRows ());
378
+ SetNonZero (aggrAsyncStats.Chunks , index, asyncStats.GetChunks ());
379
+ SetNonZero (aggrAsyncStats.Splits , index, asyncStats.GetSplits ());
298
380
299
381
auto firstMessageMs = asyncStats.GetFirstMessageMs ();
300
- SetNonZero (aggrAsyncStats.FirstMessageMs [ index] , firstMessageMs);
382
+ SetNonZero (aggrAsyncStats.FirstMessageMs , index, firstMessageMs);
301
383
baseTimeMs = NonZeroMin (baseTimeMs, firstMessageMs);
302
384
303
385
auto pauseMessageMs = asyncStats.GetPauseMessageMs ();
304
- SetNonZero (aggrAsyncStats.PauseMessageMs [ index] , pauseMessageMs);
386
+ SetNonZero (aggrAsyncStats.PauseMessageMs , index, pauseMessageMs);
305
387
baseTimeMs = NonZeroMin (baseTimeMs, pauseMessageMs);
306
388
307
389
auto resumeMessageMs = asyncStats.GetResumeMessageMs ();
308
- SetNonZero (aggrAsyncStats.ResumeMessageMs [ index] , resumeMessageMs);
390
+ SetNonZero (aggrAsyncStats.ResumeMessageMs , index, resumeMessageMs);
309
391
baseTimeMs = NonZeroMin (baseTimeMs, resumeMessageMs);
310
392
311
393
auto lastMessageMs = asyncStats.GetLastMessageMs ();
312
- SetNonZero (aggrAsyncStats.LastMessageMs [ index] , lastMessageMs);
394
+ SetNonZero (aggrAsyncStats.LastMessageMs , index, lastMessageMs);
313
395
baseTimeMs = NonZeroMin (baseTimeMs, lastMessageMs);
314
396
315
397
aggrAsyncStats.WaitTimeUs .SetNonZero (index, asyncStats.GetWaitTimeUs ());
316
- SetNonZero (aggrAsyncStats.WaitPeriods [ index] , asyncStats.GetWaitPeriods ());
398
+ SetNonZero (aggrAsyncStats.WaitPeriods , index, asyncStats.GetWaitPeriods ());
317
399
if (firstMessageMs && lastMessageMs > firstMessageMs) {
400
+ Y_ASSERT (index < aggrAsyncStats.ActiveTimeUs .size ());
318
401
aggrAsyncStats.ActiveTimeUs [index] = lastMessageMs - firstMessageMs;
319
402
}
320
403
@@ -348,29 +431,29 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
348
431
}
349
432
350
433
CpuTimeUs.SetNonZero (index, taskStats.GetCpuTimeUs ());
351
- SetNonZero (SourceCpuTimeUs[ index] , taskStats.GetSourceCpuTimeUs ());
352
-
353
- SetNonZero (InputRows[ index] , taskStats.GetInputRows ());
354
- SetNonZero (InputBytes[ index] , taskStats.GetInputBytes ());
355
- SetNonZero (OutputRows[ index] , taskStats.GetOutputRows ());
356
- SetNonZero (OutputBytes[ index] , taskStats.GetOutputBytes ());
357
- SetNonZero (ResultRows[ index] , taskStats.GetResultRows ());
358
- SetNonZero (ResultBytes[ index] , taskStats.GetResultBytes ());
359
- SetNonZero (IngressRows[ index] , taskStats.GetIngressRows ());
360
- SetNonZero (IngressBytes[ index] , taskStats.GetIngressBytes ());
361
- SetNonZero (IngressDecompressedBytes[ index] , taskStats.GetIngressDecompressedBytes ());
362
- SetNonZero (EgressRows[ index] , taskStats.GetEgressRows ());
363
- SetNonZero (EgressBytes[ index] , taskStats.GetEgressBytes ());
434
+ SetNonZero (SourceCpuTimeUs, index, taskStats.GetSourceCpuTimeUs ());
435
+
436
+ SetNonZero (InputRows, index, taskStats.GetInputRows ());
437
+ SetNonZero (InputBytes, index, taskStats.GetInputBytes ());
438
+ SetNonZero (OutputRows, index, taskStats.GetOutputRows ());
439
+ SetNonZero (OutputBytes, index, taskStats.GetOutputBytes ());
440
+ SetNonZero (ResultRows, index, taskStats.GetResultRows ());
441
+ SetNonZero (ResultBytes, index, taskStats.GetResultBytes ());
442
+ SetNonZero (IngressRows, index, taskStats.GetIngressRows ());
443
+ SetNonZero (IngressBytes, index, taskStats.GetIngressBytes ());
444
+ SetNonZero (IngressDecompressedBytes, index, taskStats.GetIngressDecompressedBytes ());
445
+ SetNonZero (EgressRows, index, taskStats.GetEgressRows ());
446
+ SetNonZero (EgressBytes, index, taskStats.GetEgressBytes ());
364
447
365
448
auto startTimeMs = taskStats.GetStartTimeMs ();
366
- SetNonZero (StartTimeMs[ index] , startTimeMs);
449
+ SetNonZero (StartTimeMs, index, startTimeMs);
367
450
baseTimeMs = NonZeroMin (baseTimeMs, startTimeMs);
368
451
369
452
auto finishTimeMs = taskStats.GetFinishTimeMs ();
370
- SetNonZero (FinishTimeMs[ index] , finishTimeMs);
453
+ SetNonZero (FinishTimeMs, index, finishTimeMs);
371
454
baseTimeMs = NonZeroMin (baseTimeMs, finishTimeMs);
372
455
373
- SetNonZero (DurationUs[ index] , durationUs);
456
+ SetNonZero (DurationUs, index, durationUs);
374
457
WaitInputTimeUs.SetNonZero (index, taskStats.GetWaitInputTimeUs ());
375
458
WaitOutputTimeUs.SetNonZero (index, taskStats.GetWaitOutputTimeUs ());
376
459
@@ -383,13 +466,13 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
383
466
auto tablePath = tableStat.GetTablePath ();
384
467
auto [it, inserted] = Tables.try_emplace (tablePath, TaskCount);
385
468
auto & aggrTableStats = it->second ;
386
- SetNonZero (aggrTableStats.ReadRows [ index] , tableStat.GetReadRows ());
387
- SetNonZero (aggrTableStats.ReadBytes [ index] , tableStat.GetReadBytes ());
388
- SetNonZero (aggrTableStats.WriteRows [ index] , tableStat.GetWriteRows ());
389
- SetNonZero (aggrTableStats.WriteBytes [ index] , tableStat.GetWriteBytes ());
390
- SetNonZero (aggrTableStats.EraseRows [ index] , tableStat.GetEraseRows ());
391
- SetNonZero (aggrTableStats.EraseBytes [ index] , tableStat.GetEraseBytes ());
392
- SetNonZero (aggrTableStats.AffectedPartitions [ index] , tableStat.GetAffectedPartitions ());
469
+ SetNonZero (aggrTableStats.ReadRows , index, tableStat.GetReadRows ());
470
+ SetNonZero (aggrTableStats.ReadBytes , index, tableStat.GetReadBytes ());
471
+ SetNonZero (aggrTableStats.WriteRows , index, tableStat.GetWriteRows ());
472
+ SetNonZero (aggrTableStats.WriteBytes , index, tableStat.GetWriteBytes ());
473
+ SetNonZero (aggrTableStats.EraseRows , index, tableStat.GetEraseRows ());
474
+ SetNonZero (aggrTableStats.EraseBytes , index, tableStat.GetEraseBytes ());
475
+ SetNonZero (aggrTableStats.AffectedPartitions , index, tableStat.GetAffectedPartitions ());
393
476
}
394
477
395
478
for (auto & sourceStat : taskStats.GetSources ()) {
@@ -403,6 +486,17 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
403
486
baseTimeMs = NonZeroMin (baseTimeMs, UpdateAsyncStats (index, asyncBufferStats.Ingress , sourceStat.GetIngress ()));
404
487
baseTimeMs = NonZeroMin (baseTimeMs, UpdateAsyncStats (index, asyncBufferStats.Push , sourceStat.GetPush ()));
405
488
baseTimeMs = NonZeroMin (baseTimeMs, UpdateAsyncStats (index, asyncBufferStats.Pop , sourceStat.GetPop ()));
489
+ for (auto & partitionStat : sourceStat.GetExternalPartitions ()) {
490
+ auto key = partitionStat.GetPartitionId ();
491
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .ExternalRows ,
492
+ index, key, partitionStat.GetExternalRows (), false );
493
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .ExternalBytes ,
494
+ index, key, partitionStat.GetExternalBytes (), true );
495
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .FirstMessageMs ,
496
+ index, key, partitionStat.GetFirstMessageMs (), false );
497
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .LastMessageMs ,
498
+ index, key, partitionStat.GetLastMessageMs (), false );
499
+ }
406
500
}
407
501
}
408
502
@@ -449,22 +543,22 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
449
543
case NYql::NDqProto::TDqOperatorStats::kJoin : {
450
544
auto [it, inserted] = Joins.try_emplace (operatorId, TaskCount);
451
545
auto & joinStats = it->second ;
452
- SetNonZero (joinStats.Rows [ index] , operatorStat.GetRows ());
453
- SetNonZero (joinStats.Bytes [ index] , operatorStat.GetBytes ());
546
+ SetNonZero (joinStats.Rows , index, operatorStat.GetRows ());
547
+ SetNonZero (joinStats.Bytes , index, operatorStat.GetBytes ());
454
548
break ;
455
549
}
456
550
case NYql::NDqProto::TDqOperatorStats::kFilter : {
457
551
auto [it, inserted] = Filters.try_emplace (operatorId, TaskCount);
458
552
auto & filterStats = it->second ;
459
- SetNonZero (filterStats.Rows [ index] , operatorStat.GetRows ());
460
- SetNonZero (filterStats.Bytes [ index] , operatorStat.GetBytes ());
553
+ SetNonZero (filterStats.Rows , index, operatorStat.GetRows ());
554
+ SetNonZero (filterStats.Bytes , index, operatorStat.GetBytes ());
461
555
break ;
462
556
}
463
557
case NYql::NDqProto::TDqOperatorStats::kAggregation : {
464
558
auto [it, inserted] = Aggregations.try_emplace (operatorId, TaskCount);
465
559
auto & aggStats = it->second ;
466
- SetNonZero (aggStats.Rows [ index] , operatorStat.GetRows ());
467
- SetNonZero (aggStats.Bytes [ index] , operatorStat.GetBytes ());
560
+ SetNonZero (aggStats.Rows , index, operatorStat.GetRows ());
561
+ SetNonZero (aggStats.Bytes , index, operatorStat.GetBytes ());
468
562
break ;
469
563
}
470
564
default :
@@ -1074,6 +1168,8 @@ void TQueryExecutionStats::UpdateTaskStats(ui64 taskId, const NYql::NDqProto::TD
1074
1168
BaseTimeMs = NonZeroMin (BaseTimeMs, it->second .UpdateStats (taskStats, state, stats.GetMaxMemoryUsage (), stats.GetDurationUs ()));
1075
1169
}
1076
1170
1171
+ // SIMD-friendly aggregations are below. Compiler is able to vectorize sum/count, but needs help with min/max
1172
+
1077
1173
void ExportAggStats (std::vector<ui64>& data, NYql::NDqProto::TDqStatsMinMax& stats) {
1078
1174
1079
1175
Y_DEBUG_ABORT_UNLESS ((data.size () & 3 ) == 0 );
@@ -1215,6 +1311,12 @@ void TQueryExecutionStats::ExportAggAsyncStats(TAsyncStats& data, NYql::NDqProto
1215
1311
}
1216
1312
1217
1313
void TQueryExecutionStats::ExportAggAsyncBufferStats (TAsyncBufferStats& data, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) {
1314
+ auto & external = *stats.MutableExternal ();
1315
+ data.External .ExternalRows .ExportAggStats (*external.MutableExternalRows ());
1316
+ data.External .ExternalBytes .ExportAggStats (BaseTimeMs, *external.MutableExternalBytes ());
1317
+ ExportOffsetAggStats (data.External .FirstMessageMs .Values , *external.MutableFirstMessageMs (), BaseTimeMs);
1318
+ ExportOffsetAggStats (data.External .LastMessageMs .Values , *external.MutableLastMessageMs (), BaseTimeMs);
1319
+ external.SetPartitionCount (data.External .Indices .size ());
1218
1320
ExportAggAsyncStats (data.Ingress , *stats.MutableIngress ());
1219
1321
ExportAggAsyncStats (data.Push , *stats.MutablePush ());
1220
1322
ExportAggAsyncStats (data.Pop , *stats.MutablePop ());
0 commit comments