Skip to content

Commit 65db274

Browse files
fix: stuck hook issue when a Job resource has a ttlSecondsAfterFinished field set (argoproj#646)
Signed-off-by: Dejan Zele Pejchev <pejcev.dejan@gmail.com> Signed-off-by: Alexandre Gaudreault <alexandre_gaudreault@intuit.com> Co-authored-by: Alexandre Gaudreault <alexandre_gaudreault@intuit.com>
1 parent 11a5e25 commit 65db274

File tree

4 files changed

+166
-40
lines changed

4 files changed

+166
-40
lines changed

pkg/health/health.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
55
"k8s.io/apimachinery/pkg/runtime/schema"
66

7+
"github.com/argoproj/gitops-engine/pkg/sync/hook"
78
"github.com/argoproj/gitops-engine/pkg/utils/kube"
89
)
910

@@ -65,7 +66,7 @@ func IsWorse(current, new HealthStatusCode) bool {
6566

6667
// GetResourceHealth returns the health of a k8s resource
6768
func GetResourceHealth(obj *unstructured.Unstructured, healthOverride HealthOverride) (health *HealthStatus, err error) {
68-
if obj.GetDeletionTimestamp() != nil {
69+
if obj.GetDeletionTimestamp() != nil && !hook.HasHookFinalizer(obj) {
6970
return &HealthStatus{
7071
Status: HealthStatusProgressing,
7172
Message: "Pending deletion",

pkg/sync/hook/hook.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,21 @@ import (
88
resourceutil "github.com/argoproj/gitops-engine/pkg/sync/resource"
99
)
1010

11+
const (
12+
// HookFinalizer is the finalizer added to hooks to ensure they are deleted only after the sync phase is completed.
13+
HookFinalizer = "argocd.argoproj.io/hook-finalizer"
14+
)
15+
16+
func HasHookFinalizer(obj *unstructured.Unstructured) bool {
17+
finalizers := obj.GetFinalizers()
18+
for _, finalizer := range finalizers {
19+
if finalizer == HookFinalizer {
20+
return true
21+
}
22+
}
23+
return false
24+
}
25+
1126
func IsHook(obj *unstructured.Unstructured) bool {
1227
_, ok := obj.GetAnnotations()[common.AnnotationKeyHook]
1328
if ok {

pkg/sync/sync_context.go

Lines changed: 85 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,12 @@ const (
295295
crdReadinessTimeout = time.Duration(3) * time.Second
296296
)
297297

298-
// getOperationPhase returns a hook status from an _live_ unstructured object
299-
func (sc *syncContext) getOperationPhase(hook *unstructured.Unstructured) (common.OperationPhase, string, error) {
298+
// getOperationPhase returns a health status from a _live_ unstructured object
299+
func (sc *syncContext) getOperationPhase(obj *unstructured.Unstructured) (common.OperationPhase, string, error) {
300300
phase := common.OperationSucceeded
301-
message := hook.GetName() + " created"
301+
message := obj.GetName() + " created"
302302

303-
resHealth, err := health.GetResourceHealth(hook, sc.healthOverride)
303+
resHealth, err := health.GetResourceHealth(obj, sc.healthOverride)
304304
if err != nil {
305305
return "", "", err
306306
}
@@ -475,6 +475,15 @@ func (sc *syncContext) Sync() {
475475
return
476476
}
477477

478+
hooksCompleted := tasks.Filter(func(task *syncTask) bool {
479+
return task.isHook() && task.completed()
480+
})
481+
for _, task := range hooksCompleted {
482+
if err := sc.removeHookFinalizer(task); err != nil {
483+
sc.setResourceResult(task, task.syncStatus, common.OperationError, fmt.Sprintf("Failed to remove hook finalizer: %v", err))
484+
}
485+
}
486+
478487
// collect all completed hooks which have appropriate delete policy
479488
hooksPendingDeletionSuccessful := tasks.Filter(func(task *syncTask) bool {
480489
return task.isHook() && task.liveObj != nil && !task.running() && task.deleteOnPhaseSuccessful()
@@ -576,6 +585,64 @@ func (sc *syncContext) filterOutOfSyncTasks(tasks syncTasks) syncTasks {
576585
})
577586
}
578587

588+
func (sc *syncContext) removeHookFinalizer(task *syncTask) error {
589+
if task.liveObj == nil {
590+
return nil
591+
}
592+
removeFinalizerMutation := func(obj *unstructured.Unstructured) bool {
593+
finalizers := obj.GetFinalizers()
594+
for i, finalizer := range finalizers {
595+
if finalizer == hook.HookFinalizer {
596+
obj.SetFinalizers(append(finalizers[:i], finalizers[i+1:]...))
597+
return true
598+
}
599+
}
600+
return false
601+
}
602+
603+
// The cached live object may be stale in the controller cache, and the actual object may have been updated in the meantime,
604+
// and Kubernetes API will return a conflict error on the Update call.
605+
// In that case, we need to get the latest version of the object and retry the update.
606+
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
607+
mutated := removeFinalizerMutation(task.liveObj)
608+
if !mutated {
609+
return nil
610+
}
611+
612+
updateErr := sc.updateResource(task)
613+
if apierrors.IsConflict(updateErr) {
614+
sc.log.WithValues("task", task).V(1).Info("Retrying hook finalizer removal due to conflict on update")
615+
resIf, err := sc.getResourceIf(task, "get")
616+
if err != nil {
617+
return err
618+
}
619+
liveObj, err := resIf.Get(context.TODO(), task.liveObj.GetName(), metav1.GetOptions{})
620+
if apierrors.IsNotFound(err) {
621+
sc.log.WithValues("task", task).V(1).Info("Resource is already deleted")
622+
return nil
623+
} else if err != nil {
624+
return err
625+
}
626+
task.liveObj = liveObj
627+
} else if apierrors.IsNotFound(updateErr) {
628+
// If the resource is already deleted, it is a no-op
629+
sc.log.WithValues("task", task).V(1).Info("Resource is already deleted")
630+
return nil
631+
}
632+
return updateErr
633+
})
634+
}
635+
636+
func (sc *syncContext) updateResource(task *syncTask) error {
637+
sc.log.WithValues("task", task).V(1).Info("Updating resource")
638+
resIf, err := sc.getResourceIf(task, "update")
639+
if err != nil {
640+
return err
641+
}
642+
_, err = resIf.Update(context.TODO(), task.liveObj, metav1.UpdateOptions{})
643+
return err
644+
}
645+
579646
func (sc *syncContext) deleteHooks(hooksPendingDeletion syncTasks) {
580647
for _, task := range hooksPendingDeletion {
581648
err := sc.deleteResource(task)
@@ -597,8 +664,8 @@ func (sc *syncContext) GetState() (common.OperationPhase, string, []common.Resou
597664
}
598665

599666
func (sc *syncContext) setOperationFailed(syncFailTasks, syncFailedTasks syncTasks, message string) {
600-
errorMessageFactory := func(_ []*syncTask, message string) string {
601-
messages := syncFailedTasks.Map(func(task *syncTask) string {
667+
errorMessageFactory := func(tasks syncTasks, message string) string {
668+
messages := tasks.Map(func(task *syncTask) string {
602669
return task.message
603670
})
604671
if len(messages) > 0 {
@@ -619,7 +686,9 @@ func (sc *syncContext) setOperationFailed(syncFailTasks, syncFailedTasks syncTas
619686
// the phase, so we make sure we have at least one more sync
620687
sc.log.WithValues("syncFailTasks", syncFailTasks).V(1).Info("Running sync fail tasks")
621688
if sc.runTasks(syncFailTasks, false) == failed {
622-
sc.setOperationPhase(common.OperationFailed, errorMessage)
689+
failedSyncFailTasks := syncFailTasks.Filter(func(t *syncTask) bool { return t.syncStatus == common.ResultCodeSyncFailed })
690+
syncFailTasksMessage := errorMessageFactory(failedSyncFailTasks, "one or more SyncFail hooks failed")
691+
sc.setOperationPhase(common.OperationFailed, fmt.Sprintf("%s\n%s", errorMessage, syncFailTasksMessage))
623692
}
624693
} else {
625694
sc.setOperationPhase(common.OperationFailed, errorMessage)
@@ -679,7 +748,9 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) {
679748
generateName := obj.GetGenerateName()
680749
targetObj.SetName(fmt.Sprintf("%s%s", generateName, postfix))
681750
}
682-
751+
if !hook.HasHookFinalizer(targetObj) {
752+
targetObj.SetFinalizers(append(targetObj.GetFinalizers(), hook.HookFinalizer))
753+
}
683754
hookTasks = append(hookTasks, &syncTask{phase: phase, targetObj: targetObj})
684755
}
685756
}
@@ -1085,14 +1156,19 @@ func (sc *syncContext) Terminate() {
10851156
if !task.isHook() || task.liveObj == nil {
10861157
continue
10871158
}
1159+
if err := sc.removeHookFinalizer(task); err != nil {
1160+
sc.setResourceResult(task, task.syncStatus, common.OperationError, fmt.Sprintf("Failed to remove hook finalizer: %v", err))
1161+
terminateSuccessful = false
1162+
continue
1163+
}
10881164
phase, msg, err := sc.getOperationPhase(task.liveObj)
10891165
if err != nil {
10901166
sc.setOperationPhase(common.OperationError, fmt.Sprintf("Failed to get hook health: %v", err))
10911167
return
10921168
}
10931169
if phase == common.OperationRunning {
10941170
err := sc.deleteResource(task)
1095-
if err != nil {
1171+
if err != nil && !apierrors.IsNotFound(err) {
10961172
sc.setResourceResult(task, "", common.OperationFailed, fmt.Sprintf("Failed to delete: %v", err))
10971173
terminateSuccessful = false
10981174
} else {

pkg/sync/sync_context_test.go

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/argoproj/gitops-engine/pkg/diff"
3030
"github.com/argoproj/gitops-engine/pkg/health"
3131
synccommon "github.com/argoproj/gitops-engine/pkg/sync/common"
32+
"github.com/argoproj/gitops-engine/pkg/sync/hook"
3233
"github.com/argoproj/gitops-engine/pkg/utils/kube"
3334
"github.com/argoproj/gitops-engine/pkg/utils/kube/kubetest"
3435
testingutils "github.com/argoproj/gitops-engine/pkg/utils/testing"
@@ -1285,15 +1286,19 @@ func (r resourceNameHealthOverride) GetResourceHealth(obj *unstructured.Unstruct
12851286
}
12861287

12871288
func TestRunSync_HooksNotDeletedIfPhaseNotCompleted(t *testing.T) {
1288-
completedHook := newHook(synccommon.HookTypePreSync)
1289-
completedHook.SetName("completed-hook")
1290-
completedHook.SetNamespace(testingutils.FakeArgoCDNamespace)
1291-
_ = testingutils.Annotate(completedHook, synccommon.AnnotationKeyHookDeletePolicy, "HookSucceeded")
1292-
1293-
inProgressHook := newHook(synccommon.HookTypePreSync)
1294-
inProgressHook.SetNamespace(testingutils.FakeArgoCDNamespace)
1295-
inProgressHook.SetName("in-progress-hook")
1296-
_ = testingutils.Annotate(inProgressHook, synccommon.AnnotationKeyHookDeletePolicy, "HookSucceeded")
1289+
hook1 := newHook(synccommon.HookTypePreSync)
1290+
hook1.SetName("completed-hook")
1291+
hook1.SetNamespace(testingutils.FakeArgoCDNamespace)
1292+
_ = testingutils.Annotate(hook1, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyHookSucceeded))
1293+
completedHook := hook1.DeepCopy()
1294+
completedHook.SetFinalizers(append(completedHook.GetFinalizers(), hook.HookFinalizer))
1295+
1296+
hook2 := newHook(synccommon.HookTypePreSync)
1297+
hook2.SetNamespace(testingutils.FakeArgoCDNamespace)
1298+
hook2.SetName("in-progress-hook")
1299+
_ = testingutils.Annotate(hook2, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyHookSucceeded))
1300+
inProgressHook := hook2.DeepCopy()
1301+
inProgressHook.SetFinalizers(append(inProgressHook.GetFinalizers(), hook.HookFinalizer))
12971302

12981303
syncCtx := newTestSyncCtx(nil,
12991304
WithHealthOverride(resourceNameHealthOverride(map[string]health.HealthStatusCode{
@@ -1312,6 +1317,12 @@ func TestRunSync_HooksNotDeletedIfPhaseNotCompleted(t *testing.T) {
13121317
))
13131318
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
13141319
syncCtx.dynamicIf = fakeDynamicClient
1320+
updatedCount := 0
1321+
fakeDynamicClient.PrependReactor("update", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
1322+
// Removing the finalizers
1323+
updatedCount++
1324+
return true, nil, nil
1325+
})
13151326
deletedCount := 0
13161327
fakeDynamicClient.PrependReactor("delete", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
13171328
deletedCount++
@@ -1321,7 +1332,7 @@ func TestRunSync_HooksNotDeletedIfPhaseNotCompleted(t *testing.T) {
13211332
Live: []*unstructured.Unstructured{completedHook, inProgressHook},
13221333
Target: []*unstructured.Unstructured{nil, nil},
13231334
})
1324-
syncCtx.hooks = []*unstructured.Unstructured{completedHook, inProgressHook}
1335+
syncCtx.hooks = []*unstructured.Unstructured{hook1, hook2}
13251336

13261337
syncCtx.kubectl = &kubetest.MockKubectlCmd{
13271338
Commands: map[string]kubetest.KubectlOutput{},
@@ -1330,19 +1341,24 @@ func TestRunSync_HooksNotDeletedIfPhaseNotCompleted(t *testing.T) {
13301341
syncCtx.Sync()
13311342

13321343
assert.Equal(t, synccommon.OperationRunning, syncCtx.phase)
1344+
assert.Equal(t, 0, updatedCount)
13331345
assert.Equal(t, 0, deletedCount)
13341346
}
13351347

13361348
func TestRunSync_HooksDeletedAfterPhaseCompleted(t *testing.T) {
1337-
completedHook1 := newHook(synccommon.HookTypePreSync)
1338-
completedHook1.SetName("completed-hook1")
1339-
completedHook1.SetNamespace(testingutils.FakeArgoCDNamespace)
1340-
_ = testingutils.Annotate(completedHook1, synccommon.AnnotationKeyHookDeletePolicy, "HookSucceeded")
1341-
1342-
completedHook2 := newHook(synccommon.HookTypePreSync)
1343-
completedHook2.SetNamespace(testingutils.FakeArgoCDNamespace)
1344-
completedHook2.SetName("completed-hook2")
1345-
_ = testingutils.Annotate(completedHook2, synccommon.AnnotationKeyHookDeletePolicy, "HookSucceeded")
1349+
hook1 := newHook(synccommon.HookTypePreSync)
1350+
hook1.SetName("completed-hook1")
1351+
hook1.SetNamespace(testingutils.FakeArgoCDNamespace)
1352+
_ = testingutils.Annotate(hook1, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyHookSucceeded))
1353+
completedHook1 := hook1.DeepCopy()
1354+
completedHook1.SetFinalizers(append(completedHook1.GetFinalizers(), hook.HookFinalizer))
1355+
1356+
hook2 := newHook(synccommon.HookTypePreSync)
1357+
hook2.SetNamespace(testingutils.FakeArgoCDNamespace)
1358+
hook2.SetName("completed-hook2")
1359+
_ = testingutils.Annotate(hook2, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyHookSucceeded))
1360+
completedHook2 := hook2.DeepCopy()
1361+
completedHook2.SetFinalizers(append(completedHook1.GetFinalizers(), hook.HookFinalizer))
13461362

13471363
syncCtx := newTestSyncCtx(nil,
13481364
WithInitialState(synccommon.OperationRunning, "", []synccommon.ResourceSyncResult{{
@@ -1358,6 +1374,12 @@ func TestRunSync_HooksDeletedAfterPhaseCompleted(t *testing.T) {
13581374
))
13591375
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
13601376
syncCtx.dynamicIf = fakeDynamicClient
1377+
updatedCount := 0
1378+
fakeDynamicClient.PrependReactor("update", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
1379+
// Removing the finalizers
1380+
updatedCount++
1381+
return true, nil, nil
1382+
})
13611383
deletedCount := 0
13621384
fakeDynamicClient.PrependReactor("delete", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
13631385
deletedCount++
@@ -1367,7 +1389,7 @@ func TestRunSync_HooksDeletedAfterPhaseCompleted(t *testing.T) {
13671389
Live: []*unstructured.Unstructured{completedHook1, completedHook2},
13681390
Target: []*unstructured.Unstructured{nil, nil},
13691391
})
1370-
syncCtx.hooks = []*unstructured.Unstructured{completedHook1, completedHook2}
1392+
syncCtx.hooks = []*unstructured.Unstructured{hook1, hook2}
13711393

13721394
syncCtx.kubectl = &kubetest.MockKubectlCmd{
13731395
Commands: map[string]kubetest.KubectlOutput{},
@@ -1376,19 +1398,24 @@ func TestRunSync_HooksDeletedAfterPhaseCompleted(t *testing.T) {
13761398
syncCtx.Sync()
13771399

13781400
assert.Equal(t, synccommon.OperationSucceeded, syncCtx.phase)
1401+
assert.Equal(t, 2, updatedCount)
13791402
assert.Equal(t, 2, deletedCount)
13801403
}
13811404

13821405
func TestRunSync_HooksDeletedAfterPhaseCompletedFailed(t *testing.T) {
1383-
completedHook1 := newHook(synccommon.HookTypeSync)
1384-
completedHook1.SetName("completed-hook1")
1385-
completedHook1.SetNamespace(testingutils.FakeArgoCDNamespace)
1386-
_ = testingutils.Annotate(completedHook1, synccommon.AnnotationKeyHookDeletePolicy, "HookFailed")
1387-
1388-
completedHook2 := newHook(synccommon.HookTypeSync)
1389-
completedHook2.SetNamespace(testingutils.FakeArgoCDNamespace)
1390-
completedHook2.SetName("completed-hook2")
1391-
_ = testingutils.Annotate(completedHook2, synccommon.AnnotationKeyHookDeletePolicy, "HookFailed")
1406+
hook1 := newHook(synccommon.HookTypeSync)
1407+
hook1.SetName("completed-hook1")
1408+
hook1.SetNamespace(testingutils.FakeArgoCDNamespace)
1409+
_ = testingutils.Annotate(hook1, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyHookFailed))
1410+
completedHook1 := hook1.DeepCopy()
1411+
completedHook1.SetFinalizers(append(completedHook1.GetFinalizers(), hook.HookFinalizer))
1412+
1413+
hook2 := newHook(synccommon.HookTypeSync)
1414+
hook2.SetNamespace(testingutils.FakeArgoCDNamespace)
1415+
hook2.SetName("completed-hook2")
1416+
_ = testingutils.Annotate(hook2, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyHookFailed))
1417+
completedHook2 := hook2.DeepCopy()
1418+
completedHook2.SetFinalizers(append(completedHook1.GetFinalizers(), hook.HookFinalizer))
13921419

13931420
syncCtx := newTestSyncCtx(nil,
13941421
WithInitialState(synccommon.OperationRunning, "", []synccommon.ResourceSyncResult{{
@@ -1404,6 +1431,12 @@ func TestRunSync_HooksDeletedAfterPhaseCompletedFailed(t *testing.T) {
14041431
))
14051432
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme())
14061433
syncCtx.dynamicIf = fakeDynamicClient
1434+
updatedCount := 0
1435+
fakeDynamicClient.PrependReactor("update", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
1436+
// Removing the finalizers
1437+
updatedCount++
1438+
return true, nil, nil
1439+
})
14071440
deletedCount := 0
14081441
fakeDynamicClient.PrependReactor("delete", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
14091442
deletedCount++
@@ -1413,7 +1446,7 @@ func TestRunSync_HooksDeletedAfterPhaseCompletedFailed(t *testing.T) {
14131446
Live: []*unstructured.Unstructured{completedHook1, completedHook2},
14141447
Target: []*unstructured.Unstructured{nil, nil},
14151448
})
1416-
syncCtx.hooks = []*unstructured.Unstructured{completedHook1, completedHook2}
1449+
syncCtx.hooks = []*unstructured.Unstructured{hook1, hook2}
14171450

14181451
syncCtx.kubectl = &kubetest.MockKubectlCmd{
14191452
Commands: map[string]kubetest.KubectlOutput{},
@@ -1422,6 +1455,7 @@ func TestRunSync_HooksDeletedAfterPhaseCompletedFailed(t *testing.T) {
14221455
syncCtx.Sync()
14231456

14241457
assert.Equal(t, synccommon.OperationFailed, syncCtx.phase)
1458+
assert.Equal(t, 2, updatedCount)
14251459
assert.Equal(t, 2, deletedCount)
14261460
}
14271461

0 commit comments

Comments
 (0)