@@ -12,8 +12,12 @@ ui64 NonZeroMin(ui64 a, ui64 b) {
12
12
return (b == 0 ) ? a : ((a == 0 || a > b) ? b : a);
13
13
}
14
14
15
- void TTimeSeriesStats::ExportAggStats (ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats) {
15
+ void TTimeSeriesStats::ExportAggStats (NYql::NDqProto::TDqStatsAggr& stats) {
16
16
NKikimr::NKqp::ExportAggStats (Values, stats);
17
+ }
18
+
19
+ void TTimeSeriesStats::ExportAggStats (ui64 baseTimeMs, NYql::NDqProto::TDqStatsAggr& stats) {
20
+ ExportAggStats (stats);
17
21
ExportHistory (baseTimeMs, stats);
18
22
}
19
23
@@ -28,16 +32,20 @@ void TTimeSeriesStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqStatsAg
28
32
}
29
33
}
30
34
31
- void TTimeSeriesStats::Resize (ui32 taskCount ) {
32
- Values.resize (taskCount );
35
+ void TTimeSeriesStats::Resize (ui32 count ) {
36
+ Values.resize (count );
33
37
}
34
38
35
- void TTimeSeriesStats::SetNonZero (ui32 taskIndex , ui64 value) {
39
+ void TTimeSeriesStats::SetNonZero (ui32 index , ui64 value) {
36
40
if (value) {
37
41
Sum += value;
38
- Sum -= Values[taskIndex ];
39
- Values[taskIndex ] = value;
42
+ Sum -= Values[index ];
43
+ Values[index ] = value;
40
44
}
45
+ AppendHistory ();
46
+ }
47
+
48
+ void TTimeSeriesStats::AppendHistory () {
41
49
if (HistorySampleCount) {
42
50
auto nowMs = Now ().MilliSeconds ();
43
51
@@ -97,6 +105,62 @@ void TTimeSeriesStats::Pack() {
97
105
}
98
106
}
99
107
108
+ void TPartitionedStats::ResizeByTasks (ui32 taskCount) {
109
+ for (auto & p : Parts) {
110
+ p.resize (taskCount);
111
+ }
112
+ }
113
+
114
+ void TPartitionedStats::ResizeByParts (ui32 partCount, ui32 taskCount) {
115
+ auto oldPartCount = Parts.size ();
116
+ Parts.resize (partCount);
117
+ for (auto i = oldPartCount; i < partCount; i++) {
118
+ Parts[i].resize (taskCount);
119
+ }
120
+ Resize (partCount);
121
+ }
122
+
123
+ void TPartitionedStats::SetNonZero (ui32 taskIndex, ui32 partIndex, ui64 value, bool recordTimeSeries) {
124
+ auto & part = Parts[partIndex];
125
+ auto delta = value - part[taskIndex];
126
+ part[taskIndex] = value;
127
+ Values[partIndex] += delta;
128
+ Sum += delta;
129
+ if (recordTimeSeries) {
130
+ AppendHistory ();
131
+ }
132
+ }
133
+
134
+ void TTimeMultiSeriesStats::SetNonZero (TPartitionedStats& stats, ui32 taskIndex, const TString& key, ui64 value, bool recordTimeSeries) {
135
+ auto [it, inserted] = Indices.try_emplace (key);
136
+ if (inserted) {
137
+ it->second = Indices.size () - 1 ;
138
+ if (PartCount < Indices.size ()) {
139
+ PartCount += 4 ;
140
+ }
141
+ }
142
+ if (stats.Parts .size () < PartCount) {
143
+ stats.ResizeByParts (PartCount, TaskCount);
144
+ }
145
+ stats.SetNonZero (taskIndex, it->second , value, recordTimeSeries);
146
+ }
147
+
148
+ void TExternalStats::Resize (ui32 taskCount) {
149
+ ExternalRows.ResizeByTasks (taskCount);
150
+ ExternalBytes.ResizeByTasks (taskCount);
151
+ TaskCount = taskCount;
152
+ }
153
+
154
+ void TExternalStats::SetHistorySampleCount (ui32 historySampleCount) {
155
+ ExternalBytes.HistorySampleCount = historySampleCount;
156
+ }
157
+
158
+ void TExternalStats::ExportHistory (ui64 baseTimeMs, NDqProto::TDqExternalAggrStats& stats) {
159
+ if (stats.HasExternalBytes ()) {
160
+ ExternalBytes.ExportHistory (baseTimeMs, *stats.MutableExternalBytes ());
161
+ }
162
+ }
163
+
100
164
void TAsyncStats::Resize (ui32 taskCount) {
101
165
Bytes.Resize (taskCount);
102
166
DecompressedBytes.resize (taskCount);
@@ -127,20 +191,25 @@ void TAsyncStats::ExportHistory(ui64 baseTimeMs, NYql::NDqProto::TDqAsyncStatsAg
127
191
}
128
192
129
193
void TAsyncBufferStats::Resize (ui32 taskCount) {
194
+ External.Resize (taskCount);
130
195
Ingress.Resize (taskCount);
131
196
Push.Resize (taskCount);
132
197
Pop.Resize (taskCount);
133
198
Egress.Resize (taskCount);
134
199
}
135
200
136
201
void TAsyncBufferStats::SetHistorySampleCount (ui32 historySampleCount) {
202
+ External.SetHistorySampleCount (historySampleCount);
137
203
Ingress.SetHistorySampleCount (historySampleCount);
138
204
Push.SetHistorySampleCount (historySampleCount);
139
205
Pop.SetHistorySampleCount (historySampleCount);
140
206
Egress.SetHistorySampleCount (historySampleCount);
141
207
}
142
208
143
209
void TAsyncBufferStats::ExportHistory (ui64 baseTimeMs, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) {
210
+ if (stats.HasExternal ()) {
211
+ External.ExportHistory (baseTimeMs, *stats.MutableExternal ());
212
+ }
144
213
if (stats.HasIngress ()) {
145
214
Ingress.ExportHistory (baseTimeMs, *stats.MutableIngress ());
146
215
}
@@ -403,6 +472,17 @@ ui64 TStageExecutionStats::UpdateStats(const NYql::NDqProto::TDqTaskStats& taskS
403
472
baseTimeMs = NonZeroMin (baseTimeMs, UpdateAsyncStats (index, asyncBufferStats.Ingress , sourceStat.GetIngress ()));
404
473
baseTimeMs = NonZeroMin (baseTimeMs, UpdateAsyncStats (index, asyncBufferStats.Push , sourceStat.GetPush ()));
405
474
baseTimeMs = NonZeroMin (baseTimeMs, UpdateAsyncStats (index, asyncBufferStats.Pop , sourceStat.GetPop ()));
475
+ for (auto & partitionStat : sourceStat.GetExternalPartitions ()) {
476
+ auto key = partitionStat.GetPartitionId ();
477
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .ExternalRows ,
478
+ index, key, partitionStat.GetExternalRows (), false );
479
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .ExternalBytes ,
480
+ index, key, partitionStat.GetExternalBytes (), true );
481
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .FirstMessageMs ,
482
+ index, key, partitionStat.GetFirstMessageMs (), false );
483
+ asyncBufferStats.External .SetNonZero (asyncBufferStats.External .LastMessageMs ,
484
+ index, key, partitionStat.GetLastMessageMs (), false );
485
+ }
406
486
}
407
487
}
408
488
@@ -1074,6 +1154,8 @@ void TQueryExecutionStats::UpdateTaskStats(ui64 taskId, const NYql::NDqProto::TD
1074
1154
BaseTimeMs = NonZeroMin (BaseTimeMs, it->second .UpdateStats (taskStats, state, stats.GetMaxMemoryUsage (), stats.GetDurationUs ()));
1075
1155
}
1076
1156
1157
+ // SIMD-friendly aggregations are below. Compiler is able to vectorize sum/count, but needs help with min/max
1158
+
1077
1159
void ExportAggStats (std::vector<ui64>& data, NYql::NDqProto::TDqStatsMinMax& stats) {
1078
1160
1079
1161
Y_DEBUG_ABORT_UNLESS ((data.size () & 3 ) == 0 );
@@ -1215,6 +1297,12 @@ void TQueryExecutionStats::ExportAggAsyncStats(TAsyncStats& data, NYql::NDqProto
1215
1297
}
1216
1298
1217
1299
void TQueryExecutionStats::ExportAggAsyncBufferStats (TAsyncBufferStats& data, NYql::NDqProto::TDqAsyncBufferStatsAggr& stats) {
1300
+ auto & external = *stats.MutableExternal ();
1301
+ data.External .ExternalRows .ExportAggStats (*external.MutableExternalRows ());
1302
+ data.External .ExternalBytes .ExportAggStats (BaseTimeMs, *external.MutableExternalBytes ());
1303
+ ExportOffsetAggStats (data.External .FirstMessageMs .Values , *external.MutableFirstMessageMs (), BaseTimeMs);
1304
+ ExportOffsetAggStats (data.External .LastMessageMs .Values , *external.MutableLastMessageMs (), BaseTimeMs);
1305
+ external.SetPartitionCount (data.External .Indices .size ());
1218
1306
ExportAggAsyncStats (data.Ingress , *stats.MutableIngress ());
1219
1307
ExportAggAsyncStats (data.Push , *stats.MutablePush ());
1220
1308
ExportAggAsyncStats (data.Pop , *stats.MutablePop ());
0 commit comments