Skip to content

Commit 5d9e8e9

Browse files
authored
Use ExponentialBackoff when updating SparkApplication status (#638)
1 parent d2f1d75 commit 5d9e8e9

File tree

1 file changed

+25
-24
lines changed

1 file changed

+25
-24
lines changed

pkg/controller/sparkapplication/controller.go

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
v1 "k8s.io/client-go/listers/core/v1"
3939
"k8s.io/client-go/tools/cache"
4040
"k8s.io/client-go/tools/record"
41+
"k8s.io/client-go/util/retry"
4142
"k8s.io/client-go/util/workqueue"
4243

4344
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta2"
@@ -56,7 +57,6 @@ const (
5657
podAlreadyExistsErrorCode = "code=409"
5758
queueTokenRefillRate = 50
5859
queueTokenBucketSize = 500
59-
maximumUpdateRetries = 3
6060
)
6161

6262
var (
@@ -704,46 +704,47 @@ func (c *Controller) shouldDoBatchScheduling(app *v1beta2.SparkApplication) (boo
704704
if c.batchSchedulerMgr == nil || app.Spec.BatchScheduler == nil || *app.Spec.BatchScheduler == "" {
705705
return false, nil
706706
}
707-
if scheduler, err := c.batchSchedulerMgr.GetScheduler(*app.Spec.BatchScheduler); err != nil {
708-
glog.Errorf("failed to get batch scheduler from name %s", *app.Spec.BatchScheduler)
707+
708+
scheduler, err := c.batchSchedulerMgr.GetScheduler(*app.Spec.BatchScheduler)
709+
if err != nil {
710+
glog.Errorf("failed to get batch scheduler for name %s", *app.Spec.BatchScheduler)
709711
return false, nil
710-
} else {
711-
return scheduler.ShouldSchedule(app), scheduler
712712
}
713+
return scheduler.ShouldSchedule(app), scheduler
713714
}
714715

715716
func (c *Controller) updateApplicationStatusWithRetries(
716717
original *v1beta2.SparkApplication,
717718
updateFunc func(status *v1beta2.SparkApplicationStatus)) (*v1beta2.SparkApplication, error) {
718719
toUpdate := original.DeepCopy()
719-
720-
var lastUpdateErr error
721-
for i := 0; i < maximumUpdateRetries; i++ {
720+
updateErr := wait.ExponentialBackoff(retry.DefaultBackoff, func() (ok bool, err error) {
722721
updateFunc(&toUpdate.Status)
723722
if equality.Semantic.DeepEqual(original.Status, toUpdate.Status) {
724-
return toUpdate, nil
723+
return true, nil
725724
}
726-
_, err := c.crdClient.SparkoperatorV1beta2().SparkApplications(toUpdate.Namespace).Update(toUpdate)
725+
726+
toUpdate, err = c.crdClient.SparkoperatorV1beta2().SparkApplications(original.Namespace).Update(toUpdate)
727727
if err == nil {
728-
return toUpdate, nil
728+
return true, nil
729+
}
730+
if !errors.IsConflict(err) {
731+
return false, err
729732
}
730733

731-
lastUpdateErr = err
732-
733-
// Failed to update to the API server.
734-
// Get the latest version from the API server first and re-apply the update.
735-
name := toUpdate.Name
736-
toUpdate, err = c.crdClient.SparkoperatorV1beta2().SparkApplications(toUpdate.Namespace).Get(name,
737-
metav1.GetOptions{})
734+
// There was a conflict updating the SparkApplication, fetch the latest version from the API server.
735+
toUpdate, err = c.crdClient.SparkoperatorV1beta2().SparkApplications(original.Namespace).Get(original.Name, metav1.GetOptions{})
738736
if err != nil {
739-
glog.Errorf("failed to get SparkApplication %s/%s: %v", original.Namespace, name, err)
740-
return nil, err
737+
glog.Errorf("failed to get SparkApplication %s/%s: %v", original.Namespace, original.Name, err)
738+
return false, err
741739
}
742-
}
743740

744-
if lastUpdateErr != nil {
745-
glog.Errorf("failed to update SparkApplication %s/%s: %v", original.Namespace, original.Name, lastUpdateErr)
746-
return nil, lastUpdateErr
741+
// Retry with the latest version.
742+
return false, nil
743+
})
744+
745+
if updateErr != nil {
746+
glog.Errorf("failed to update SparkApplication %s/%s: %v", original.Namespace, original.Name, updateErr)
747+
return nil, updateErr
747748
}
748749

749750
return toUpdate, nil

0 commit comments

Comments
 (0)