Skip to content

Commit 7ad9e1e

Browse files
authored
add missing monitoring metrics for column shards (#7314)
1 parent 437bf85 commit 7ad9e1e

File tree

7 files changed

+72
-15
lines changed

7 files changed

+72
-15
lines changed

ydb/core/protos/counters_columnshard.proto

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ enum ECumulativeCounters {
6565
COUNTER_PLAN_STEP_ACCEPTED = 9 [(CounterOpts) = {Name: "PlanStepAccepted"}];
6666
COUNTER_SCANNED_ROWS = 10 [(CounterOpts) = {Name: "ScannedRows"}];
6767
COUNTER_SCANNED_BYTES = 11 [(CounterOpts) = {Name: "ScannedBytes"}];
68-
COUNTER_UPSERT_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "UpsertBlobsWritten"}];
69-
COUNTER_UPSERT_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "UpsertBytesWritten"}];
68+
COUNTER_OPERATIONS_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "OperationsBlobsWritten"}];
69+
COUNTER_OPERATIONS_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "OperationsBytesWritten"}];
7070
COUNTER_INDEXING_BLOBS_WRITTEN = 14 [(CounterOpts) = {Name: "IndexingBlobsWritten"}];
7171
COUNTER_INDEXING_BYTES_WRITTEN = 15 [(CounterOpts) = {Name: "IndexingBytesWritten"}];
7272
COUNTER_COMPACTION_BLOBS_WRITTEN = 16 [(CounterOpts) = {Name: "CompactionBlobsWritten"}];
@@ -137,6 +137,8 @@ enum ECumulativeCounters {
137137
COUNTER_READING_EXPORTED_RANGES = 81 [(CounterOpts) = {Name: "ReadingExportedRanges"}];
138138
COUNTER_PLANNED_TX_COMPLETED = 82 [(CounterOpts) = {Name: "PlannedTxCompleted"}];
139139
COUNTER_IMMEDIATE_TX_COMPLETED = 83 [(CounterOpts) = {Name: "ImmediateTxCompleted"}];
140+
COUNTER_ROWS_ERASED = 84 [(CounterOpts) = {Name: "RowsErased"}];
141+
COUNTER_OPERATIONS_ROWS_WRITTEN = 85 [(CounterOpts) = {Name: "OperationsRowsWritten"}];
140142
}
141143

142144
enum EPercentileCounters {

ydb/core/tablet/tablet_counters_aggregator.cpp

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -765,10 +765,16 @@ class TTabletMon {
765765
TCounterPtr DatashardSizeBytes;
766766
TCounterPtr DatashardCacheHitBytes;
767767
TCounterPtr DatashardCacheMissBytes;
768+
TCounterPtr ColumnShardReadRows_;
769+
TCounterPtr ColumnShardReadBytes_;
768770
TCounterPtr ColumnShardScanRows_;
769771
TCounterPtr ColumnShardScanBytes_;
772+
TCounterPtr ColumnShardWriteRows_;
773+
TCounterPtr ColumnShardWriteBytes_;
770774
TCounterPtr ColumnShardBulkUpsertRows_;
771775
TCounterPtr ColumnShardBulkUpsertBytes_;
776+
TCounterPtr ColumnShardEraseRows_;
777+
TCounterPtr ColumnShardEraseBytes_;
772778
TCounterPtr ResourcesStorageUsedBytes;
773779
TCounterPtr ResourcesStorageUsedBytesOnSsd;
774780
TCounterPtr ResourcesStorageUsedBytesOnHdd;
@@ -787,6 +793,7 @@ class TTabletMon {
787793
TCounterPtr ResourcesStreamReservedStorageLimit;
788794

789795
THistogramPtr ShardCpuUtilization;
796+
THistogramPtr ColumnShardCpuUtilization;
790797

791798
TCounterPtr RowUpdates;
792799
TCounterPtr RowUpdateBytes;
@@ -808,8 +815,11 @@ class TTabletMon {
808815

809816
TCounterPtr ColumnShardScannedBytes_;
810817
TCounterPtr ColumnShardScannedRows_;
811-
TCounterPtr ColumnShardUpsertBlobsWritten_;
812-
TCounterPtr ColumnShardUpsertBytesWritten_;
818+
TCounterPtr ColumnShardOperationsRowsWritten_;
819+
TCounterPtr ColumnShardOperationsBytesWritten_;
820+
TCounterPtr ColumnShardErasedBytes_;
821+
TCounterPtr ColumnShardErasedRows_;
822+
THistogramPtr ColumnShardConsumedCpuHistogram;
813823

814824
TCounterPtr DiskSpaceTablesTotalBytes;
815825
TCounterPtr DiskSpaceTablesTotalBytesOnSsd;
@@ -859,14 +869,26 @@ class TTabletMon {
859869
DatashardCacheMissBytes = ydbGroup->GetNamedCounter("name",
860870
"table.datashard.cache_miss.bytes", true);
861871

872+
ColumnShardReadRows_ = ydbGroup->GetNamedCounter("name",
873+
"table.columnshard.read.rows", true);
874+
ColumnShardReadBytes_ = ydbGroup->GetNamedCounter("name",
875+
"table.columnshard.read.bytes", true);
862876
ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name",
863877
"table.columnshard.scan.rows", true);
864878
ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name",
865879
"table.columnshard.scan.bytes", true);
880+
ColumnShardWriteRows_ = ydbGroup->GetNamedCounter("name",
881+
"table.columnshard.write.rows", true);
882+
ColumnShardWriteBytes_ = ydbGroup->GetNamedCounter("name",
883+
"table.columnshard.write.bytes", true);
866884
ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name",
867885
"table.columnshard.bulk_upsert.rows", true);
868886
ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name",
869887
"table.columnshard.bulk_upsert.bytes", true);
888+
ColumnShardEraseRows_ = ydbGroup->GetNamedCounter("name",
889+
"table.columnshard.erase.rows", true);
890+
ColumnShardEraseBytes_ = ydbGroup->GetNamedCounter("name",
891+
"table.columnshard.erase.bytes", true);
870892

871893
ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name",
872894
"resources.storage.used_bytes", false);
@@ -908,6 +930,8 @@ class TTabletMon {
908930

909931
ShardCpuUtilization = ydbGroup->GetNamedHistogram("name",
910932
"table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
933+
ColumnShardCpuUtilization = ydbGroup->GetNamedHistogram("name",
934+
"table.columnshard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
911935
};
912936

913937
void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) {
@@ -943,8 +967,11 @@ class TTabletMon {
943967

944968
ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes");
945969
ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows");
946-
ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten");
947-
ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten");
970+
ColumnShardOperationsRowsWritten_ = appGroup->GetCounter("ColumnShard/OperationsRowsWritten");
971+
ColumnShardOperationsBytesWritten_ = appGroup->GetCounter("ColumnShard/OperationsBytesWritten");
972+
ColumnShardErasedBytes_ = appGroup->GetCounter("ColumnShard/BytesErased");
973+
ColumnShardErasedRows_ = appGroup->GetCounter("ColumnShard/RowsErased");
974+
ColumnShardConsumedCpuHistogram = appGroup->FindHistogram("HIST(ConsumedCPU)");
948975
}
949976

950977
if (hasSchemeshard && !DiskSpaceTablesTotalBytes) {
@@ -990,10 +1017,20 @@ class TTabletMon {
9901017
}
9911018

9921019
if (ColumnShardScannedBytes_) {
1020+
ColumnShardReadRows_->Set(0);
1021+
ColumnShardReadBytes_->Set(0);
9931022
ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val());
9941023
ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val());
995-
ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val());
996-
ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val());
1024+
ColumnShardWriteRows_->Set(ColumnShardOperationsRowsWritten_->Val());
1025+
ColumnShardWriteBytes_->Set(ColumnShardOperationsBytesWritten_->Val());
1026+
ColumnShardBulkUpsertRows_->Set(ColumnShardOperationsRowsWritten_->Val());
1027+
ColumnShardBulkUpsertBytes_->Set(ColumnShardOperationsBytesWritten_->Val());
1028+
ColumnShardEraseRows_->Set(ColumnShardErasedRows_->Val());
1029+
ColumnShardEraseBytes_->Set(ColumnShardErasedBytes_->Val());
1030+
1031+
if (ColumnShardConsumedCpuHistogram) {
1032+
TransferBuckets(ColumnShardCpuUtilization, ColumnShardConsumedCpuHistogram);
1033+
}
9971034
}
9981035

9991036
if (DiskSpaceTablesTotalBytes) {

ydb/core/tx/columnshard/columnshard__write.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo
129129
wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator());
130130
} else {
131131
const TMonotonic now = TMonotonic::Now();
132-
Counters.GetCSCounters().OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant());
132+
Counters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant(), aggr->GetRows());
133133
Counters.GetCSCounters().OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant());
134134
Counters.GetCSCounters().OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant());
135135
Counters.GetCSCounters().OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant());

ydb/core/tx/columnshard/counters/counters_manager.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ class TCountersManager {
8787
BackgroundControllerCounters->FillTotalStats(tableStats);
8888
ScanCounters.FillStats(tableStats);
8989
}
90+
91+
void OnWritePutBlobsSuccess(const TDuration d, const ui64 rowsWritten) const {
92+
TabletCounters->OnWritePutBlobsSuccess(rowsWritten);
93+
CSCounters.OnWritePutBlobsSuccess(d);
94+
}
9095
};
9196

9297
} // namespace NKikimr::NColumnShard

ydb/core/tx/columnshard/counters/tablet_counters.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,8 @@ class TTabletCountersHandle {
7171
}
7272

7373
void OnWriteSuccess(const ui64 blobsWritten, const ui64 bytesWritten) const {
74-
IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten);
75-
IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten);
76-
// self.Stats.GetTabletCounters().IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes);
74+
IncCounter(NColumnShard::COUNTER_OPERATIONS_BLOBS_WRITTEN, blobsWritten);
75+
IncCounter(NColumnShard::COUNTER_OPERATIONS_BYTES_WRITTEN, bytesWritten);
7776
IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS);
7877
}
7978

@@ -106,9 +105,19 @@ class TTabletCountersHandle {
106105
IncCounter(NColumnShard::COUNTER_INDEXING_TIME, duration.MilliSeconds());
107106
}
108107

108+
void OnWritePutBlobsSuccess(const ui64 rowsWritten) const {
109+
IncCounter(NColumnShard::COUNTER_OPERATIONS_ROWS_WRITTEN, rowsWritten);
110+
}
111+
112+
void OnDropPortionEvent(const ui64 rawBytes, const ui64 blobBytes, const ui64 rows) const {
113+
IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, rawBytes);
114+
IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobBytes);
115+
IncCounter(NColumnShard::COUNTER_ROWS_ERASED, rows);
116+
}
117+
109118
void FillStats(::NKikimrTableStats::TTableStats& output) const {
110-
output.SetRowUpdates(GetValue(COUNTER_WRITE_SUCCESS));
111-
output.SetRowDeletes(0); // manual deletes are not supported
119+
output.SetRowUpdates(GetValue(COUNTER_OPERATIONS_ROWS_WRITTEN));
120+
output.SetRowDeletes(GetValue(COUNTER_ROWS_ERASED));
112121
output.SetRowReads(0); // all reads are range reads
113122
output.SetRangeReadRows(GetValue(COUNTER_READ_INDEX_ROWS));
114123

ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T
4343
if (self) {
4444
self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size());
4545
for (auto&& p : PortionsToDrop) {
46-
self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes());
46+
self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows());
4747
}
4848
}
4949
}

ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class TWriteAggregation {
8989
NEvWrite::TWriteMeta WriteMeta;
9090
YDB_READONLY(ui64, SchemaVersion, 0);
9191
YDB_READONLY(ui64, Size, 0);
92+
YDB_READONLY(ui64, Rows, 0);
9293
YDB_ACCESSOR_DEF(std::vector<TWideSerializedBatch>, SplittedBlobs);
9394
YDB_READONLY_DEF(TVector<TWriteId>, WriteIds);
9495
YDB_READONLY_DEF(std::shared_ptr<NOlap::IBlobsWritingAction>, BlobsAction);
@@ -117,6 +118,9 @@ class TWriteAggregation {
117118
for (auto&& s : splittedBlobs) {
118119
SplittedBlobs.emplace_back(std::move(s), *this);
119120
}
121+
for (const auto& batch : SplittedBlobs) {
122+
Rows += batch->GetRowsCount();
123+
}
120124
}
121125

122126
TWriteAggregation(const NEvWrite::TWriteData& writeData)

0 commit comments

Comments
 (0)