From 10571c56d48f693864190278d7806d861d4918d3 Mon Sep 17 00:00:00 2001 From: Yi Jin Date: Fri, 14 Mar 2025 11:53:17 -0700 Subject: [PATCH 1/2] [PLAT-129166] fix parallel db update Signed-off-by: Yi Jin --- pkg/controller/controller.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index c9a73462..92ef51ae 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "sort" "strings" "time" @@ -427,7 +428,10 @@ func (c *RolloutController) hasStatefulSetNotReadyPods(sts *v1.StatefulSet) (boo // We can quickly check the number of ready replicas reported by the StatefulSet. // If they don't match the total number of replicas, then we're sure there are some // not ready pods. - if sts.Status.Replicas != sts.Status.ReadyReplicas { + // This is causing issues when enable parallel db update (delete multiple pods at the same time): + // 1. use Spec.Replicas instead of Status.Replicas because of deleting multiple pods at the same time will cause Status.Replicas < Spec.Replicas + // 2. use Status.AvailableReplicas instead of Status.ReadyReplicas because of minReadySeconds > 0 & stability + if *sts.Spec.Replicas != sts.Status.AvailableReplicas { return true, nil } @@ -626,6 +630,16 @@ func (c *RolloutController) podsNotMatchingUpdateRevision(sts *v1.StatefulSet) ( // Sort pods in order to provide a deterministic behaviour. util.SortPods(pods) + // Sort pods so not running pods will be updated first. + sort.Slice(pods, func(i, j int) bool { + rank := func(p *corev1.Pod) int { + if p.Status.Phase == corev1.PodRunning { + return 1 // Running pods are ranked higher and will be updated last. + } + return 0 // Running pods are ranked lower and will be updated first. + } + return rank(pods[i]) < rank(pods[j]) + }) return pods, nil } From 71ce581a38161c848ad3b9f27e6d91f1cc0b25da Mon Sep 17 00:00:00 2001 From: Yi Jin Date: Fri, 14 Mar 2025 12:25:11 -0700 Subject: [PATCH 2/2] test old behavior pass integration tests Signed-off-by: Yi Jin --- pkg/controller/controller.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 92ef51ae..941376cc 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -425,13 +425,17 @@ func (c *RolloutController) listStatefulSetsWithRolloutGroup() ([]*v1.StatefulSe } func (c *RolloutController) hasStatefulSetNotReadyPods(sts *v1.StatefulSet) (bool, error) { + if getMaxUnavailableForStatefulSet(sts, c.logger) > 1 && *sts.Spec.Replicas != sts.Status.AvailableReplicas { + // This is causing issues when enable parallel db update (delete multiple pods at the same time): + // 1. use Spec.Replicas instead of Status.Replicas because of deleting multiple pods at the same time will cause Status.Replicas < Spec.Replicas + // 2. use Status.AvailableReplicas instead of Status.ReadyReplicas because of minReadySeconds > 0 & stability + return true, nil + } + // fallback to old behavior if maxUnavailable is <= 1. // We can quickly check the number of ready replicas reported by the StatefulSet. // If they don't match the total number of replicas, then we're sure there are some // not ready pods. - // This is causing issues when enable parallel db update (delete multiple pods at the same time): - // 1. use Spec.Replicas instead of Status.Replicas because of deleting multiple pods at the same time will cause Status.Replicas < Spec.Replicas - // 2. use Status.AvailableReplicas instead of Status.ReadyReplicas because of minReadySeconds > 0 & stability - if *sts.Spec.Replicas != sts.Status.AvailableReplicas { + if sts.Status.Replicas != sts.Status.ReadyReplicas { return true, nil } @@ -513,7 +517,7 @@ func (c *RolloutController) listPods(sel labels.Selector) ([]*corev1.Pod, error) } func (c *RolloutController) updateStatefulSetPods(ctx context.Context, sts *v1.StatefulSet) (bool, error) { - level.Debug(c.logger).Log("msg", "reconciling StatefulSet", "statefulset", sts.Name) + level.Debug(c.logger).Log("msg", "reconciling StatefulSet==============", "statefulset", sts.Name) podsToUpdate, err := c.podsNotMatchingUpdateRevision(sts) if err != nil { @@ -524,11 +528,14 @@ func (c *RolloutController) updateStatefulSetPods(ctx context.Context, sts *v1.S maxUnavailable := getMaxUnavailableForStatefulSet(sts, c.logger) var numNotAvailable int if sts.Spec.MinReadySeconds > 0 { - level.Info(c.logger).Log("msg", "StatefulSet has minReadySeconds set, waiting before terminating pods", "statefulset", sts.Name, "min_ready_seconds", sts.Spec.MinReadySeconds) numNotAvailable = int(sts.Status.Replicas - sts.Status.AvailableReplicas) } else { numNotAvailable = int(sts.Status.Replicas - sts.Status.ReadyReplicas) } + if maxUnavailable > 1 { + // when deleting multiple pods at the same time, the number of not-available pods should include pods that hasn't been managed by the controller yet. + numNotAvailable += int(*sts.Spec.Replicas - sts.Status.Replicas) + } // Compute the number of pods we should update, honoring the configured maxUnavailable. numPods := max(0, min( @@ -541,11 +548,13 @@ func (c *RolloutController) updateStatefulSetPods(ctx context.Context, sts *v1.S "msg", "StatefulSet has some pods to be updated but maxUnavailable pods has been reached", "statefulset", sts.Name, "pods_to_update", len(podsToUpdate), + "pod[0]", podsToUpdate[0].Name, + "expected_replicas", sts.Spec.Replicas, "replicas", sts.Status.Replicas, "ready_replicas", sts.Status.ReadyReplicas, "available_replicas", sts.Status.AvailableReplicas, + "num_not_available", numNotAvailable, "max_unavailable", maxUnavailable) - return true, nil } @@ -636,7 +645,7 @@ func (c *RolloutController) podsNotMatchingUpdateRevision(sts *v1.StatefulSet) ( if p.Status.Phase == corev1.PodRunning { return 1 // Running pods are ranked higher and will be updated last. } - return 0 // Running pods are ranked lower and will be updated first. + return 0 // Non-running pods are ranked lower and will be updated first. } return rank(pods[i]) < rank(pods[j]) })