@@ -43,14 +43,44 @@ namespace NSentinel {
43
43
44
44
// / TPDiskStatusComputer
45
45
46
- TPDiskStatusComputer::TPDiskStatusComputer (const ui32& defaultStateLimit, const TLimitsMap& stateLimits)
46
+ TPDiskStatusComputer::TPDiskStatusComputer (const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits)
47
47
: DefaultStateLimit(defaultStateLimit)
48
+ , GoodStateLimit(goodStateLimit)
48
49
, StateLimits(stateLimits)
49
50
, StateCounter(0 )
50
51
{
51
52
}
52
53
53
- void TPDiskStatusComputer::AddState (EPDiskState state) {
54
+ bool IsGoodState (EPDiskState state) {
55
+ switch (state) {
56
+ case NKikimrBlobStorage::TPDiskState::Unknown:
57
+ return false ;
58
+ case NKikimrBlobStorage::TPDiskState::Initial:
59
+ case NKikimrBlobStorage::TPDiskState::InitialFormatRead:
60
+ case NKikimrBlobStorage::TPDiskState::InitialSysLogRead:
61
+ case NKikimrBlobStorage::TPDiskState::InitialCommonLogRead:
62
+ case NKikimrBlobStorage::TPDiskState::Normal:
63
+ return true ;
64
+ case NKikimrBlobStorage::TPDiskState::InitialFormatReadError:
65
+ case NKikimrBlobStorage::TPDiskState::InitialSysLogReadError:
66
+ case NKikimrBlobStorage::TPDiskState::InitialSysLogParseError:
67
+ case NKikimrBlobStorage::TPDiskState::InitialCommonLogReadError:
68
+ case NKikimrBlobStorage::TPDiskState::InitialCommonLogParseError:
69
+ case NKikimrBlobStorage::TPDiskState::CommonLoggerInitError:
70
+ case NKikimrBlobStorage::TPDiskState::OpenFileError:
71
+ case NKikimrBlobStorage::TPDiskState::ChunkQuotaError:
72
+ case NKikimrBlobStorage::TPDiskState::DeviceIoError:
73
+ case NKikimrBlobStorage::TPDiskState::Reserved14:
74
+ case NKikimrBlobStorage::TPDiskState::Reserved15:
75
+ case NKikimrBlobStorage::TPDiskState::Reserved16:
76
+ case NKikimrBlobStorage::TPDiskState::Missing:
77
+ case NKikimrBlobStorage::TPDiskState::Timeout:
78
+ case NKikimrBlobStorage::TPDiskState::NodeDisconnected:
79
+ return false ;
80
+ }
81
+ }
82
+
83
+ void TPDiskStatusComputer::AddState (EPDiskState state, bool isNodeLocked) {
54
84
if (StateCounter && state == State) {
55
85
if (StateCounter != Max<ui64>()) {
56
86
++StateCounter;
@@ -59,6 +89,12 @@ void TPDiskStatusComputer::AddState(EPDiskState state) {
59
89
PrevState = std::exchange (State, state);
60
90
StateCounter = 1 ;
61
91
}
92
+
93
+ if (!isNodeLocked && !IsGoodState (state)) {
94
+ // If node is not locked (i.e. it is not in maintenance mode),
95
+ // then we should remember that we had a bad state recently
96
+ HadBadStateRecently = true ;
97
+ }
62
98
}
63
99
64
100
EPDiskStatus TPDiskStatusComputer::Compute (EPDiskStatus current, TString& reason) const {
@@ -81,12 +117,18 @@ EPDiskStatus TPDiskStatusComputer::Compute(EPDiskStatus current, TString& reason
81
117
<< " State# " << State
82
118
<< " StateCounter# " << StateCounter
83
119
<< " current# " << current;
84
- switch (PrevState) {
85
- case NKikimrBlobStorage::TPDiskState::Unknown:
86
- return current;
87
- default :
88
- return EPDiskStatus::INACTIVE;
120
+
121
+ if (PrevState == NKikimrBlobStorage::TPDiskState::Unknown) {
122
+ return current;
89
123
}
124
+
125
+ if (IsGoodState (PrevState) && State == NKikimrBlobStorage::TPDiskState::Normal) {
126
+ if (!HadBadStateRecently && (StateCounter >= GoodStateLimit)) {
127
+ return EPDiskStatus::ACTIVE;
128
+ }
129
+ }
130
+
131
+ return EPDiskStatus::INACTIVE;
90
132
}
91
133
92
134
reason = TStringBuilder ()
@@ -99,6 +141,7 @@ EPDiskStatus TPDiskStatusComputer::Compute(EPDiskStatus current, TString& reason
99
141
100
142
switch (State) {
101
143
case NKikimrBlobStorage::TPDiskState::Normal:
144
+ HadBadStateRecently = false ;
102
145
return EPDiskStatus::ACTIVE;
103
146
default :
104
147
return EPDiskStatus::FAULTY;
@@ -135,15 +178,15 @@ void TPDiskStatusComputer::ResetForcedStatus() {
135
178
136
179
// / TPDiskStatus
137
180
138
- TPDiskStatus::TPDiskStatus (EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits)
139
- : TPDiskStatusComputer(defaultStateLimit, stateLimits)
181
+ TPDiskStatus::TPDiskStatus (EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits)
182
+ : TPDiskStatusComputer(defaultStateLimit, goodStateLimit, stateLimits)
140
183
, Current(initialStatus)
141
184
, ChangingAllowed(true )
142
185
{
143
186
}
144
187
145
- void TPDiskStatus::AddState (EPDiskState state) {
146
- TPDiskStatusComputer::AddState (state);
188
+ void TPDiskStatus::AddState (EPDiskState state, bool isNodeLocked ) {
189
+ TPDiskStatusComputer::AddState (state, isNodeLocked );
147
190
}
148
191
149
192
bool TPDiskStatus::IsChanged () const {
@@ -198,15 +241,15 @@ void TPDiskStatus::DisallowChanging() {
198
241
199
242
// / TPDiskInfo
200
243
201
- TPDiskInfo::TPDiskInfo (EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits)
202
- : TPDiskStatus(initialStatus, defaultStateLimit, stateLimits)
244
+ TPDiskInfo::TPDiskInfo (EPDiskStatus initialStatus, const ui32& defaultStateLimit, const ui32& goodStateLimit, const TLimitsMap& stateLimits)
245
+ : TPDiskStatus(initialStatus, defaultStateLimit, goodStateLimit, stateLimits)
203
246
, ActualStatus(initialStatus)
204
247
{
205
248
Touch ();
206
249
}
207
250
208
- void TPDiskInfo::AddState (EPDiskState state) {
209
- TPDiskStatus::AddState (state);
251
+ void TPDiskInfo::AddState (EPDiskState state, bool isNodeLocked ) {
252
+ TPDiskStatus::AddState (state, isNodeLocked );
210
253
Touch ();
211
254
}
212
255
@@ -476,7 +519,7 @@ class TConfigUpdater: public TUpdaterBase<TEvSentinel::TEvConfigUpdated, TConfig
476
519
continue ;
477
520
}
478
521
479
- pdisks.emplace (id, new TPDiskInfo (pdisk.GetDriveStatus (), Config.DefaultStateLimit , Config.StateLimits ));
522
+ pdisks.emplace (id, new TPDiskInfo (pdisk.GetDriveStatus (), Config.DefaultStateLimit , Config.GoodStateLimit , Config. StateLimits ));
480
523
}
481
524
482
525
SentinelState->ConfigUpdaterState .GotBSCResponse = true ;
@@ -570,16 +613,31 @@ class TStateUpdater: public TUpdaterBase<TEvSentinel::TEvStateUpdated, TStateUpd
570
613
Reply ();
571
614
}
572
615
616
+ bool IsNodeLocked (ui32 nodeId) const {
617
+ const auto & clusterInfo = CmsState->ClusterInfo ;
618
+
619
+ if (clusterInfo && clusterInfo->HasNode (nodeId)) {
620
+ const auto & node = clusterInfo->Node (nodeId);
621
+ TErrorInfo unused;
622
+ if (node.IsLocked (unused, TDuration::Zero (), TInstant::Zero (), TDuration::Zero ())) {
623
+ return true ;
624
+ }
625
+ }
626
+
627
+ return false ;
628
+ }
629
+
573
630
void MarkNodePDisks (ui32 nodeId, EPDiskState state, bool skipTouched = false ) {
631
+ bool isNodeLocked = IsNodeLocked (nodeId);
574
632
auto it = SentinelState->PDisks .lower_bound (TPDiskID (nodeId, 0 ));
575
633
while (it != SentinelState->PDisks .end () && it->first .NodeId == nodeId) {
576
634
if (skipTouched && it->second ->IsTouched ()) {
577
635
++it;
578
636
continue ;
579
637
}
580
-
638
+
581
639
Y_ABORT_UNLESS (!it->second ->IsTouched ());
582
- it->second ->AddState (state);
640
+ it->second ->AddState (state, isNodeLocked );
583
641
++it;
584
642
}
585
643
}
@@ -613,6 +671,7 @@ class TStateUpdater: public TUpdaterBase<TEvSentinel::TEvStateUpdated, TStateUpd
613
671
<< " : nodeId# " << nodeId);
614
672
MarkNodePDisks (nodeId, NKikimrBlobStorage::TPDiskState::Missing);
615
673
} else {
674
+ const bool isNodeLocked = IsNodeLocked (nodeId);
616
675
for (const auto & info : record.GetPDiskStateInfo ()) {
617
676
auto it = SentinelState->PDisks .find (TPDiskID (nodeId, info.GetPDiskId ()));
618
677
if (it == SentinelState->PDisks .end ()) {
@@ -625,7 +684,7 @@ class TStateUpdater: public TUpdaterBase<TEvSentinel::TEvStateUpdated, TStateUpd
625
684
<< " , original# " << (ui32)info.GetState ()
626
685
<< " , safeState# " << safeState);
627
686
628
- it->second ->AddState (safeState);
687
+ it->second ->AddState (safeState, isNodeLocked );
629
688
}
630
689
631
690
MarkNodePDisks (nodeId, NKikimrBlobStorage::TPDiskState::Missing, true );
0 commit comments