Skip to content

Commit b7e09a0

Browse files
committed
Automatically recreate pvc when sts pod is stuck in pending
1 parent 6a832aa commit b7e09a0

File tree

4 files changed

+86
-4
lines changed

4 files changed

+86
-4
lines changed

pkg/controller/statefulset/stateful_pod_control.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ type StatefulPodControlInterface interface {
5050
// DeleteStatefulPod deletes a Pod in a StatefulSet. The pods PVCs are not deleted. If the delete is successful,
5151
// the returned error is nil.
5252
DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error
53+
// CreatePersistentVolumeClaims creates all of the required PersistentVolumeClaims for pod
54+
CreatePersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error
5355
}
5456

5557
func NewRealStatefulPodControl(
@@ -74,7 +76,7 @@ type realStatefulPodControl struct {
7476

7577
func (spc *realStatefulPodControl) CreateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error {
7678
// Create the Pod's PVCs prior to creating the Pod
77-
if err := spc.createPersistentVolumeClaims(set, pod); err != nil {
79+
if err := spc.CreatePersistentVolumeClaims(set, pod); err != nil {
7880
spc.recordPodEvent("create", set, pod, err)
7981
return err
8082
}
@@ -103,7 +105,7 @@ func (spc *realStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod
103105
if !storageMatches(set, pod) {
104106
updateStorage(set, pod)
105107
consistent = false
106-
if err := spc.createPersistentVolumeClaims(set, pod); err != nil {
108+
if err := spc.CreatePersistentVolumeClaims(set, pod); err != nil {
107109
spc.recordPodEvent("update", set, pod, err)
108110
return err
109111
}
@@ -174,11 +176,11 @@ func (spc *realStatefulPodControl) recordClaimEvent(verb string, set *apps.State
174176
}
175177
}
176178

177-
// createPersistentVolumeClaims creates all of the required PersistentVolumeClaims for pod, which must be a member of
179+
// CreatePersistentVolumeClaims creates all of the required PersistentVolumeClaims for pod, which must be a member of
178180
// set. If all of the claims for Pod are successfully created, the returned error is nil. If creation fails, this method
179181
// may be called again until no error is returned, indicating the PersistentVolumeClaims for pod are consistent with
180182
// set's Spec.
181-
func (spc *realStatefulPodControl) createPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error {
183+
func (spc *realStatefulPodControl) CreatePersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error {
182184
var errs []error
183185
for _, claim := range getPersistentVolumeClaims(set, pod) {
184186
pvc, err := spc.pvcLister.PersistentVolumeClaims(claim.Namespace).Get(claim.Name)

pkg/controller/statefulset/stateful_set_control.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,18 @@ func (ssc *defaultStatefulSetControl) updateStatefulSet(
426426
// pod created, no more work possible for this round
427427
continue
428428
}
429+
430+
// If the Pod is stuck in pending state then trigger PVC creation to create missing PVCs
431+
if isStuckPending(replicas[i]) {
432+
klog.V(4).Infof(
433+
"StatefulSet %s/%s is triggering PVC creation for pending Pod %s",
434+
set.Namespace,
435+
set.Name,
436+
replicas[i].Name)
437+
if err := ssc.podControl.CreatePersistentVolumeClaims(set, replicas[i]); err != nil {
438+
return &status, err
439+
}
440+
}
429441
// If we find a Pod that is currently terminating, we must wait until graceful deletion
430442
// completes before we continue to make progress.
431443
if isTerminating(replicas[i]) && monotonic {

pkg/controller/statefulset/stateful_set_control_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func TestStatefulSetControl(t *testing.T) {
9090
{UpdatePodFailure, simpleSetFn},
9191
{UpdateSetStatusFailure, simpleSetFn},
9292
{PodRecreateDeleteFailure, simpleSetFn},
93+
{RecreatesPVCForPendingPod, simpleSetFn},
9394
}
9495

9596
for _, testCase := range testCases {
@@ -443,6 +444,58 @@ func PodRecreateDeleteFailure(t *testing.T, set *apps.StatefulSet, invariants in
443444
}
444445
}
445446

447+
func RecreatesPVCForPendingPod(t *testing.T, set *apps.StatefulSet, invariants invariantFunc) {
448+
client := fake.NewSimpleClientset()
449+
spc, _, ssc, stop := setupController(client)
450+
defer close(stop)
451+
selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector)
452+
if err != nil {
453+
t.Error(err)
454+
}
455+
pods, err := spc.podsLister.Pods(set.Namespace).List(selector)
456+
if err != nil {
457+
t.Error(err)
458+
}
459+
if err := ssc.UpdateStatefulSet(set, pods); err != nil {
460+
t.Errorf("Error updating StatefulSet %s", err)
461+
}
462+
if err := invariants(set, spc); err != nil {
463+
t.Error(err)
464+
}
465+
pods, err = spc.podsLister.Pods(set.Namespace).List(selector)
466+
if err != nil {
467+
t.Error(err)
468+
}
469+
for _, claim := range getPersistentVolumeClaims(set, pods[0]) {
470+
spc.claimsIndexer.Delete(&claim)
471+
}
472+
pods[0].Status.Phase = v1.PodPending
473+
oldCreationTimestamp := pods[0].CreationTimestamp
474+
pods[0].CreationTimestamp = metav1.Now()
475+
spc.podsIndexer.Update(pods[0])
476+
if err := ssc.UpdateStatefulSet(set, pods); err != nil {
477+
t.Errorf("Error updating StatefulSet %s", err)
478+
}
479+
// The pod is still new, so the pvc should not be recreated yet
480+
if err := invariants(set, spc); err == nil {
481+
t.Error("Statefulset pod should be missing pvc")
482+
}
483+
484+
pods[0].CreationTimestamp = oldCreationTimestamp
485+
spc.podsIndexer.Update(pods[0])
486+
if err := ssc.UpdateStatefulSet(set, pods); err != nil {
487+
t.Errorf("Error updating StatefulSet %s", err)
488+
}
489+
// invariants check if there any missing PVCs for the Pods
490+
if err := invariants(set, spc); err != nil {
491+
t.Error(err)
492+
}
493+
pods, err = spc.podsLister.Pods(set.Namespace).List(selector)
494+
if err != nil {
495+
t.Error(err)
496+
}
497+
}
498+
446499
func TestStatefulSetControlScaleDownDeleteError(t *testing.T) {
447500
invariants := assertMonotonicInvariants
448501
set := newStatefulSet(3)
@@ -1767,6 +1820,13 @@ func (spc *fakeStatefulPodControl) DeleteStatefulPod(set *apps.StatefulSet, pod
17671820
return nil
17681821
}
17691822

1823+
func (spc *fakeStatefulPodControl) CreatePersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error {
1824+
for _, claim := range getPersistentVolumeClaims(set, pod) {
1825+
spc.claimsIndexer.Update(&claim)
1826+
}
1827+
return nil
1828+
}
1829+
17701830
var _ StatefulPodControlInterface = &fakeStatefulPodControl{}
17711831

17721832
type fakeStatefulSetStatusUpdater struct {

pkg/controller/statefulset/stateful_set_utils.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"regexp"
2424
"strconv"
25+
"time"
2526

2627
apps "k8s.io/api/apps/v1"
2728
"k8s.io/api/core/v1"
@@ -34,6 +35,8 @@ import (
3435
"k8s.io/kubernetes/pkg/controller/history"
3536
)
3637

38+
const pendingPodDelay = 30 * time.Second
39+
3740
var patchCodec = scheme.Codecs.LegacyCodec(apps.SchemeGroupVersion)
3841

3942
// overlappingStatefulSets sorts a list of StatefulSets by creation timestamp, using their names as a tie breaker.
@@ -209,6 +212,11 @@ func isCreated(pod *v1.Pod) bool {
209212
return pod.Status.Phase != ""
210213
}
211214

215+
// isStuckPending returns true if pod has a Phase of PodPending and has been pending for pendingPodDelay time to ignore transient pending pods
216+
func isStuckPending(pod *v1.Pod) bool {
217+
return pod.Status.Phase == v1.PodPending && time.Now().Sub(pod.CreationTimestamp.Time) > pendingPodDelay
218+
}
219+
212220
// isFailed returns true if pod has a Phase of PodFailed
213221
func isFailed(pod *v1.Pod) bool {
214222
return pod.Status.Phase == v1.PodFailed

0 commit comments

Comments
 (0)