Skip to content

Commit 3d89711

Browse files
authored
Report group statuses in SysView (#18541)
1 parent 7ece2a1 commit 3d89711

File tree

10 files changed

+45
-10
lines changed

10 files changed

+45
-10
lines changed

ydb/core/mind/bscontroller/bsc.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ TBlobStorageController::TVSlotInfo::TVSlotInfo(TVSlotId vSlotId, TPDiskInfo *pdi
6262
}
6363
}
6464

65-
void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
65+
bool TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
66+
const TGroupStatus prev = Status;
6667
Status = {NKikimrBlobStorage::TGroupStatus::FULL, NKikimrBlobStorage::TGroupStatus::FULL};
6768

6869
if ((VirtualGroupState == NKikimrBlobStorage::EVirtualGroupState::CREATE_FAILED ||
@@ -82,6 +83,8 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
8283
}
8384
Status.MakeWorst(DeriveStatus(Topology.get(), failed), DeriveStatus(Topology.get(), failed | failedByPDisk));
8485
}
86+
87+
return Status != prev;
8588
}
8689

8790
void TBlobStorageController::TGroupInfo::CalculateLayoutStatus(TBlobStorageController *self,

ydb/core/mind/bscontroller/impl.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
136136
bool OnlyPhantomsRemain = false;
137137

138138
public:
139-
void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TMonotonic now, TInstant instant, bool onlyPhantomsRemain) {
139+
void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TMonotonic now, TInstant instant, bool onlyPhantomsRemain,
140+
TBlobStorageController *controller) {
140141
if (status != VDiskStatus) {
141142
if (status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // became "replicating"
142143
LastGotReplicating = instant;
@@ -159,7 +160,9 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
159160
} else {
160161
DropFromVSlotReadyTimestampQ();
161162
}
162-
const_cast<TGroupInfo&>(*Group).CalculateGroupStatus();
163+
if (const_cast<TGroupInfo&>(*Group).CalculateGroupStatus()) {
164+
controller->SysViewChangedGroups.insert(Group->ID);
165+
}
163166
}
164167
if (status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) {
165168
OnlyPhantomsRemain = onlyPhantomsRemain;
@@ -616,6 +619,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
616619
OperatingStatus = Max(OperatingStatus, operating);
617620
ExpectedStatus = Max(ExpectedStatus, expected);
618621
}
622+
623+
friend std::strong_ordering operator <=>(const TGroupStatus&, const TGroupStatus&) = default;
619624
} Status;
620625

621626
// group status depends on the IsReady value for every VDisk; so it has to be updated every time there is possible
@@ -627,7 +632,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
627632
//
628633
// also it depends on the Status of underlying PDisks, so every time their status change, group status has to
629634
// be recalculated too
630-
void CalculateGroupStatus();
635+
bool CalculateGroupStatus();
631636

632637
// group layout status: whether it is positioned correctly
633638
bool LayoutCorrect = false;
@@ -2295,7 +2300,9 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
22952300
ScrubState.UpdateVDiskState(&*it->second);
22962301
}
22972302
for (TGroupInfo *group : groups) {
2298-
group->CalculateGroupStatus();
2303+
if (group->CalculateGroupStatus()) {
2304+
SysViewChangedGroups.insert(group->ID);
2305+
}
22992306
}
23002307
ScheduleVSlotReadyUpdate();
23012308
if (!timingQ.empty()) {

ydb/core/mind/bscontroller/load_everything.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,17 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
539539
db.Table<Schema::VDiskMetrics>().Key(key).Delete();
540540
}
541541

542+
// issue all sys view updates just after the start
543+
for (const auto& [pdiskId, _] : Self->PDisks) {
544+
Self->SysViewChangedPDisks.insert(pdiskId);
545+
}
546+
for (const auto& [vdiskId, _] : Self->VSlots) {
547+
Self->SysViewChangedVSlots.insert(vdiskId);
548+
}
549+
for (const auto& [groupId, _] : Self->GroupMap) {
550+
Self->SysViewChangedGroups.insert(groupId);
551+
}
552+
542553
return true;
543554
}
544555

ydb/core/mind/bscontroller/register_node.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId, TActorId serve
620620
if (it->second->IsReady) {
621621
NotReadyVSlotIds.insert(it->second->VSlotId);
622622
}
623-
it->second->SetStatus(NKikimrBlobStorage::EVDiskStatus::ERROR, mono, now, false);
623+
it->second->SetStatus(NKikimrBlobStorage::EVDiskStatus::ERROR, mono, now, false, this);
624624
timingQ.emplace_back(*it->second);
625625
updates.push_back({
626626
.VDiskId = it->second->GetVDiskId(),

ydb/core/mind/bscontroller/self_heal.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1108,7 +1108,7 @@ namespace NKikimr::NBsController {
11081108
if (const TGroupInfo *group = slot->Group) {
11091109
const bool wasReady = slot->IsReady;
11101110
if (slot->GetStatus() != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) {
1111-
slot->SetStatus(m.GetStatus(), mono, now, m.GetOnlyPhantomsRemain());
1111+
slot->SetStatus(m.GetStatus(), mono, now, m.GetOnlyPhantomsRemain(), this);
11121112
if (slot->IsReady != wasReady) {
11131113
ScrubState.UpdateVDiskState(slot);
11141114
if (wasReady) {

ydb/core/mind/bscontroller/sys_view.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,8 @@ void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageContro
400400
}
401401

402402
info->SetLayoutCorrect(groupInfo->LayoutCorrect);
403+
info->SetOperatingStatus(NKikimrBlobStorage::TGroupStatus::E_Name(groupInfo->Status.OperatingStatus));
404+
info->SetExpectedStatus(NKikimrBlobStorage::TGroupStatus::E_Name(groupInfo->Status.ExpectedStatus));
403405
}
404406

405407
void CopyInfo(NKikimrSysView::TStoragePoolInfo* info, const TBlobStorageController::TStoragePoolInfo& poolInfo) {

ydb/core/protos/sys_view.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,8 @@ message TGroupInfo {
278278
// down/persisted down ?
279279
// metrics ?
280280
optional bool LayoutCorrect = 16 [default = true]; // is the group layout correct?
281+
optional string OperatingStatus = 17;
282+
optional string ExpectedStatus = 18;
281283
}
282284

283285
message TGroupEntry {

ydb/core/sys_view/common/schema.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,8 @@ struct Schema : NIceDb::Schema {
321321
struct PutUserDataLatency : Column<14, NScheme::NTypeIds::Interval> {};
322322
struct GetFastLatency : Column<15, NScheme::NTypeIds::Interval> {};
323323
struct LayoutCorrect : Column<16, NScheme::NTypeIds::Bool> {};
324+
struct OperatingStatus : Column<17, NScheme::NTypeIds::Utf8> {};
325+
struct ExpectedStatus : Column<18, NScheme::NTypeIds::Utf8> {};
324326

325327
using TKey = TableKey<GroupId>;
326328
using TColumns = TableColumns<
@@ -337,7 +339,9 @@ struct Schema : NIceDb::Schema {
337339
PutTabletLogLatency,
338340
PutUserDataLatency,
339341
GetFastLatency,
340-
LayoutCorrect>;
342+
LayoutCorrect,
343+
OperatingStatus,
344+
ExpectedStatus>;
341345
};
342346

343347
struct StoragePools : Table<7> {

ydb/core/sys_view/storage/groups.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ class TGroupsScan : public TStorageScanBase<TGroupsScan, TEvSysView::TEvGetGroup
3737
{T::PutUserDataLatency::ColumnId, {E::kInfoFieldNumber, V::kPutUserDataLatencyFieldNumber}},
3838
{T::GetFastLatency::ColumnId, {E::kInfoFieldNumber, V::kGetFastLatencyFieldNumber}},
3939
{T::LayoutCorrect::ColumnId, {E::kInfoFieldNumber, V::kLayoutCorrectFieldNumber}},
40+
{T::OperatingStatus::ColumnId, {E::kInfoFieldNumber, V::kOperatingStatusFieldNumber}},
41+
{T::ExpectedStatus::ColumnId, {E::kInfoFieldNumber, V::kExpectedStatusFieldNumber}},
4042
};
4143
return fieldMap;
4244
}

ydb/core/sys_view/ut_kqp.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2954,7 +2954,9 @@ ALTER OBJECT `/Root/test_show_create` (TYPE TABLE) SET (ACTION = UPSERT_OPTIONS,
29542954
PutTabletLogLatency,
29552955
PutUserDataLatency,
29562956
StoragePoolId,
2957-
LayoutCorrect
2957+
LayoutCorrect,
2958+
OperatingStatus,
2959+
ExpectedStatus
29582960
FROM `/Root/.sys/ds_groups` WHERE GroupId >= 0x80000000;
29592961
)").GetValueSync();
29602962

@@ -2970,7 +2972,7 @@ ALTER OBJECT `/Root/test_show_create` (TYPE TABLE) SET (ACTION = UPSERT_OPTIONS,
29702972
}
29712973
}
29722974

2973-
TYsonFieldChecker check(ysonString, 13);
2975+
TYsonFieldChecker check(ysonString, 15);
29742976

29752977
check.Uint64(0u); // AllocatedSize
29762978
check.Uint64GreaterOrEquals(0u); // AvailableSize
@@ -2985,6 +2987,8 @@ ALTER OBJECT `/Root/test_show_create` (TYPE TABLE) SET (ACTION = UPSERT_OPTIONS,
29852987
check.Null(); // PutUserDataLatency
29862988
check.Uint64(2u); // StoragePoolId
29872989
check.Bool(true); // LayoutCorrect
2990+
check.String("DISINTEGRATED"); // OperatingStatus
2991+
check.String("DISINTEGRATED"); // ExpectedStatus
29882992
}
29892993

29902994
Y_UNIT_TEST(StoragePoolsFields) {

0 commit comments

Comments
 (0)