Skip to content

Commit 03cba24

Browse files
authored
Fix: Replace exit code if the job succeeds (#624)
Co-authored-by: Yunus Olgun <yunuso@spotify.com>
1 parent badcf95 commit 03cba24

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

controllers/flinkcluster/flinkcluster_updater.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,12 @@ func (updater *ClusterStatusUpdater) deriveJobStatus(ctx context.Context) *v1bet
671671
case oldJob.IsActive() && observedSubmitter.job != nil && observedSubmitter.job.Status.Active == 0:
672672
if observedSubmitter.job.Status.Succeeded == 1 {
673673
newJobState = v1beta1.JobStateSucceeded
674+
if newJob.SubmitterExitCode == -1 {
675+
log.Info("Job succeeded but the exit code is -1. This is an edge case that may " +
676+
"happen if the controller is down or busy for a long time and the submitter pod is deleted externally " +
677+
"including by kube-system:pod-garbage-collector. Changing exit code to 0.")
678+
newJob.SubmitterExitCode = 0
679+
}
674680
} else if observedSubmitter.job.Status.Failed == 1 {
675681
newJobState = v1beta1.JobStateFailed
676682
} else {

0 commit comments

Comments
 (0)