Skip to content

Commit d29085b

Browse files
pooknullhors
andauthored
K8SPG-804: Fix internal.percona.com/delete-backup finalizer (#1182)
* Fix `internal.percona.com/delete-backup` finalizer * i think this is a proper fix * one more fix * final fix * update the commets * delete return since we are getting latest cluster object * fix tests * add dependency * final lint fix --------- Co-authored-by: Viacheslav Sarzhan <slava.sarzhan@percona.com>
1 parent 16dc36e commit d29085b

File tree

2 files changed

+44
-40
lines changed

2 files changed

+44
-40
lines changed

internal/controller/postgrescluster/pgbackrest.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2303,7 +2303,28 @@ func (r *Reconciler) reconcileDedicatedRepoHost(ctx context.Context,
23032303
// manually by the end-user
23042304
func (r *Reconciler) reconcileManualBackup(ctx context.Context,
23052305
postgresCluster *v1beta1.PostgresCluster, manualBackupJobs []*batchv1.Job,
2306-
serviceAccount *corev1.ServiceAccount, instances *observedInstances) error {
2306+
serviceAccount *corev1.ServiceAccount, instances *observedInstances,
2307+
) error {
2308+
// K8SPG-804: Get the current state of PostgresCluster.
2309+
// It's necessary to make internal.percona.com/delete-backup finalizer work.
2310+
// Because the reconcileManualBackup can get an outdated postgresCluster,
2311+
// resulting in a duplicated backup jobs per one pg-backup resources.
2312+
// For more information check the K8SPG-804 PR description.
2313+
currentPostgresCluster := new(v1beta1.PostgresCluster)
2314+
err := r.Client.Get(ctx, client.ObjectKeyFromObject(postgresCluster), currentPostgresCluster)
2315+
if client.IgnoreNotFound(err) != nil {
2316+
return err
2317+
}
2318+
2319+
// refPostgresCluster keeps pointer to the postgresCluster which is used in other reconcile functions
2320+
// It should be used to assign values to the postgresCluster inside this function
2321+
refPostgresCluster := postgresCluster
2322+
2323+
// If it's the first run of reconcileManualBackup .Status will be nil.
2324+
// Nothing will happen if we keep the old postgresCluster.
2325+
if !apierrors.IsNotFound(err) && currentPostgresCluster.Status.PGBackRest != nil {
2326+
postgresCluster = currentPostgresCluster
2327+
}
23072328

23082329
manualAnnotation := postgresCluster.GetAnnotations()[naming.PGBackRestBackup]
23092330
manualStatus := postgresCluster.Status.PGBackRest.ManualBackup
@@ -2396,7 +2417,7 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context,
23962417
meta.RemoveStatusCondition(&postgresCluster.Status.Conditions,
23972418
ConditionManualBackupSuccessful)
23982419

2399-
postgresCluster.Status.PGBackRest.ManualBackup = manualStatus
2420+
refPostgresCluster.Status.PGBackRest.ManualBackup = manualStatus
24002421
}
24012422

24022423
// if the status shows the Job is no longer in progress, then simply exit (which means a Job

percona/controller/pgbackup/controller.go

Lines changed: 21 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"path"
66
"slices"
7-
"strings"
87
"time"
98

109
"github.com/pkg/errors"
@@ -514,10 +513,17 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
514513
if err != nil {
515514
return nil, errors.Wrap(err, "get backup in progress")
516515
}
516+
517517
if runningBackup != pgBackup.Name {
518+
// This block only runs after all finalizer operations are complete.
519+
// Or, it runs when the user deletes a backup object that never started.
520+
// In both cases, treat this as the function's exit point.
521+
518522
return nil, nil
519523
}
520524

525+
// deleteAnnotation deletes the annotation from the PerconaPGCluster
526+
// and returns true when it's deleted from crunchy PostgresCluster.
521527
deleteAnnotation := func(annotation string) (bool, error) {
522528
pgCluster := new(v1beta1.PostgresCluster)
523529
if err := c.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: pgBackup.Namespace}, pgCluster); err != nil {
@@ -555,24 +561,20 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
555561
return nil, errors.Wrapf(err, "delete %s annotation", naming.PGBackRestBackup)
556562
}
557563
if !deleted {
558-
// We should wait until the crunchy reconciler is finished.
559-
// If we delete the job labels without waiting for the reconcile to finish, the Crunchy reconciler will
560-
// receive the pgcluster with the "naming.PGBackRestBackup" annotation, but will not find the manual backup job.
561-
// It will attempt to create a new job with the same name, failing and resulting in a scary error in the logs.
564+
// We should wait until the annotation is deleted from crunchy cluster.
562565
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
563566
}
564567

565568
// Remove PGBackRest labels to prevent the job from being
566-
// deleted by the cleanupRepoResources method.
569+
// deleted by the cleanupRepoResources method and included in
570+
// repoResources.manualBackupJobs used in reconcileManualBackup method
567571
if job != nil {
568572
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
569573
j := new(batchv1.Job)
570574
if err := c.Get(ctx, client.ObjectKeyFromObject(job), j); err != nil {
571-
if k8serrors.IsNotFound(err) {
572-
return nil
573-
}
574575
return errors.Wrap(err, "get job")
575576
}
577+
576578
for k := range naming.PGBackRestLabels(pgBackup.Spec.PGCluster) {
577579
delete(j.Labels, k)
578580
}
@@ -583,6 +585,7 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
583585
}
584586
}
585587

588+
// We should delete the pgbackrest status to be able to recreate backups with the same name.
586589
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
587590
pgCluster := new(v1beta1.PostgresCluster)
588591
if err := c.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: pgBackup.Namespace}, pgCluster); err != nil {
@@ -595,39 +598,19 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
595598
return nil, errors.Wrap(err, "update postgrescluster")
596599
}
597600

598-
deleted, err = deleteAnnotation(pNaming.AnnotationBackupInProgress)
601+
_, err = deleteAnnotation(pNaming.AnnotationBackupInProgress)
599602
if err != nil {
600603
return nil, errors.Wrapf(err, "delete %s annotation", pNaming.AnnotationBackupInProgress)
601604
}
602-
if !deleted {
603-
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
604-
}
605-
606-
if job != nil && checkBackupJob(job) != v2.BackupSucceeded {
607-
// Remove all crunchy labels to prevent the job from being included in
608-
// repoResources.manualBackupJobs used in reconcileManualBackup method.
609-
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
610-
j := new(batchv1.Job)
611-
if err := c.Get(ctx, client.ObjectKeyFromObject(job), j); err != nil {
612-
if k8serrors.IsNotFound(err) {
613-
return nil
614-
}
615-
return errors.Wrap(err, "get job")
616-
}
617-
618-
for k := range j.Labels {
619-
if strings.HasPrefix(k, pNaming.PrefixCrunchy) {
620-
delete(j.Labels, k)
621-
}
622-
}
623-
624-
return c.Update(ctx, j)
625-
}); err != nil {
626-
return nil, errors.Wrap(err, "delete backup job labels")
627-
}
628-
}
605+
// Do not add any code after this comment.
606+
//
607+
// The code after the comment may or may not execute, depending on whether the
608+
// crunchy cluster removes the AnnotationBackupInProgress annotation in time.
609+
//
610+
// Once AnnotationBackupInProgress is deleted, the successful return of finalizer must happen inside the
611+
// `if runningBackup != pgBackup.Name { ... }` block.
629612

630-
return nil, nil
613+
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
631614
}
632615

633616
func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) error {

0 commit comments

Comments
 (0)