Skip to content

Commit 93eb5ef

Browse files
joelanfordporridge
andauthored
reconciler: fix deletion wait and updater retry logic (#380)
* reconciler: fix deletion wait and updater retry logic Signed-off-by: Joe Lanford <joe.lanford@gmail.com> * Keep linter happy. --------- Signed-off-by: Joe Lanford <joe.lanford@gmail.com> Co-authored-by: Marcin Owsiany <porridge@redhat.com>
1 parent 05fd16e commit 93eb5ef

File tree

5 files changed

+146
-42
lines changed

5 files changed

+146
-42
lines changed

pkg/reconciler/internal/updater/updater.go

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,41 +84,40 @@ func (u *Updater) Apply(ctx context.Context, obj *unstructured.Unstructured) err
8484

8585
backoff := retry.DefaultRetry
8686

87+
st := statusFor(obj)
88+
needsStatusUpdate := false
89+
for _, f := range u.updateStatusFuncs {
90+
needsStatusUpdate = f(st) || needsStatusUpdate
91+
}
92+
8793
// Always update the status first. During uninstall, if
8894
// we remove the finalizer, updating the status will fail
8995
// because the object and its status will be garbage-collected.
90-
if err := retryOnRetryableUpdateError(backoff, func() error {
91-
st := statusFor(obj)
92-
needsStatusUpdate := false
93-
for _, f := range u.updateStatusFuncs {
94-
needsStatusUpdate = f(st) || needsStatusUpdate
96+
if needsStatusUpdate {
97+
uSt, err := runtime.DefaultUnstructuredConverter.ToUnstructured(st)
98+
if err != nil {
99+
return err
95100
}
96-
if needsStatusUpdate {
97-
uSt, err := runtime.DefaultUnstructuredConverter.ToUnstructured(st)
98-
if err != nil {
99-
return err
100-
}
101-
obj.Object["status"] = uSt
101+
obj.Object["status"] = uSt
102+
103+
if err := retryOnRetryableUpdateError(backoff, func() error {
102104
return u.client.Status().Update(ctx, obj)
105+
}); err != nil {
106+
return err
103107
}
104-
return nil
105-
}); err != nil {
106-
return err
107108
}
108109

109-
if err := retryOnRetryableUpdateError(backoff, func() error {
110-
needsUpdate := false
111-
for _, f := range u.updateFuncs {
112-
needsUpdate = f(obj) || needsUpdate
113-
}
114-
if needsUpdate {
110+
needsUpdate := false
111+
for _, f := range u.updateFuncs {
112+
needsUpdate = f(obj) || needsUpdate
113+
}
114+
if needsUpdate {
115+
if err := retryOnRetryableUpdateError(backoff, func() error {
115116
return u.client.Update(ctx, obj)
117+
}); err != nil {
118+
return err
116119
}
117-
return nil
118-
}); err != nil {
119-
return err
120120
}
121-
122121
return nil
123122
}
124123

pkg/reconciler/internal/updater/updater_test.go

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,19 @@ package updater
1818

1919
import (
2020
"context"
21+
"errors"
2122

2223
. "github.com/onsi/ginkgo/v2"
2324
. "github.com/onsi/gomega"
25+
2426
"helm.sh/helm/v3/pkg/release"
2527
corev1 "k8s.io/api/core/v1"
2628
apierrors "k8s.io/apimachinery/pkg/api/errors"
2729
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2830
"k8s.io/apimachinery/pkg/types"
2931
"sigs.k8s.io/controller-runtime/pkg/client"
3032
"sigs.k8s.io/controller-runtime/pkg/client/fake"
33+
"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
3134

3235
"github.com/operator-framework/helm-operator-plugins/pkg/reconciler/internal/conditions"
3336
)
@@ -36,14 +39,15 @@ const testFinalizer = "testFinalizer"
3639

3740
var _ = Describe("Updater", func() {
3841
var (
39-
client client.Client
40-
u Updater
41-
obj *unstructured.Unstructured
42+
cl client.Client
43+
u Updater
44+
obj *unstructured.Unstructured
45+
interceptorFuncs interceptor.Funcs
4246
)
4347

44-
BeforeEach(func() {
45-
client = fake.NewClientBuilder().Build()
46-
u = New(client)
48+
JustBeforeEach(func() {
49+
cl = fake.NewClientBuilder().WithInterceptorFuncs(interceptorFuncs).Build()
50+
u = New(cl)
4751
obj = &unstructured.Unstructured{Object: map[string]interface{}{
4852
"apiVersion": "apps/v1",
4953
"kind": "Deployment",
@@ -53,12 +57,12 @@ var _ = Describe("Updater", func() {
5357
},
5458
"spec": map[string]interface{}{},
5559
}}
56-
Expect(client.Create(context.TODO(), obj)).To(Succeed())
60+
Expect(cl.Create(context.TODO(), obj)).To(Succeed())
5761
})
5862

5963
When("the object does not exist", func() {
6064
It("should fail", func() {
61-
Expect(client.Delete(context.TODO(), obj)).To(Succeed())
65+
Expect(cl.Delete(context.TODO(), obj)).To(Succeed())
6266
u.Update(func(u *unstructured.Unstructured) bool {
6367
u.SetAnnotations(map[string]string{"foo": "bar"})
6468
return true
@@ -70,6 +74,18 @@ var _ = Describe("Updater", func() {
7074
})
7175

7276
When("an update is a change", func() {
77+
var updateCallCount int
78+
79+
BeforeEach(func() {
80+
// On the first update of (status) subresource, return an error. After that do what is expected.
81+
interceptorFuncs.SubResourceUpdate = func(ctx context.Context, interceptorClient client.Client, subResourceName string, obj client.Object, opts ...client.SubResourceUpdateOption) error {
82+
updateCallCount++
83+
if updateCallCount == 1 {
84+
return errors.New("transient error")
85+
}
86+
return interceptorClient.SubResource(subResourceName).Update(ctx, obj, opts...)
87+
}
88+
})
7389
It("should apply an update function", func() {
7490
u.Update(func(u *unstructured.Unstructured) bool {
7591
u.SetAnnotations(map[string]string{"foo": "bar"})
@@ -78,7 +94,7 @@ var _ = Describe("Updater", func() {
7894
resourceVersion := obj.GetResourceVersion()
7995

8096
Expect(u.Apply(context.TODO(), obj)).To(Succeed())
81-
Expect(client.Get(context.TODO(), types.NamespacedName{Namespace: "testNamespace", Name: "testDeployment"}, obj)).To(Succeed())
97+
Expect(cl.Get(context.TODO(), types.NamespacedName{Namespace: "testNamespace", Name: "testDeployment"}, obj)).To(Succeed())
8298
Expect(obj.GetAnnotations()["foo"]).To(Equal("bar"))
8399
Expect(obj.GetResourceVersion()).NotTo(Equal(resourceVersion))
84100
})
@@ -88,7 +104,7 @@ var _ = Describe("Updater", func() {
88104
resourceVersion := obj.GetResourceVersion()
89105

90106
Expect(u.Apply(context.TODO(), obj)).To(Succeed())
91-
Expect(client.Get(context.TODO(), types.NamespacedName{Namespace: "testNamespace", Name: "testDeployment"}, obj)).To(Succeed())
107+
Expect(cl.Get(context.TODO(), types.NamespacedName{Namespace: "testNamespace", Name: "testDeployment"}, obj)).To(Succeed())
92108
Expect((obj.Object["status"].(map[string]interface{}))["conditions"]).To(HaveLen(1))
93109
Expect(obj.GetResourceVersion()).NotTo(Equal(resourceVersion))
94110
})

pkg/reconciler/internal/values/values.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"os"
23+
"time"
2324

2425
"helm.sh/helm/v3/pkg/chartutil"
2526
"helm.sh/helm/v3/pkg/strvals"
@@ -28,6 +29,8 @@ import (
2829
"github.com/operator-framework/helm-operator-plugins/pkg/values"
2930
)
3031

32+
var DefaultWaitForDeletionTimeout = 30 * time.Second
33+
3134
var DefaultMaxReleaseHistory = 10
3235

3336
var DefaultMapper = values.MapperFunc(func(v chartutil.Values) chartutil.Values { return v })

pkg/reconciler/reconciler.go

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"time"
2626

2727
"github.com/go-logr/logr"
28-
sdkhandler "github.com/operator-framework/operator-lib/handler"
2928
errs "github.com/pkg/errors"
3029
"helm.sh/helm/v3/pkg/action"
3130
"helm.sh/helm/v3/pkg/chart"
@@ -46,6 +45,8 @@ import (
4645
"sigs.k8s.io/controller-runtime/pkg/predicate"
4746
"sigs.k8s.io/controller-runtime/pkg/source"
4847

48+
sdkhandler "github.com/operator-framework/operator-lib/handler"
49+
4950
"github.com/operator-framework/helm-operator-plugins/internal/sdk/controllerutil"
5051
"github.com/operator-framework/helm-operator-plugins/pkg/annotation"
5152
helmclient "github.com/operator-framework/helm-operator-plugins/pkg/client"
@@ -78,6 +79,7 @@ type Reconciler struct {
7879
skipDependentWatches bool
7980
maxConcurrentReconciles int
8081
reconcilePeriod time.Duration
82+
waitForDeletionTimeout time.Duration
8183
maxReleaseHistory *int
8284
skipPrimaryGVKSchemeRegistration bool
8385
controllerSetupFuncs []ControllerSetupFunc
@@ -332,6 +334,20 @@ func WithMaxConcurrentReconciles(max int) Option {
332334
}
333335
}
334336

337+
// WithWaitForDeletionTimeout is an Option that configures how long to wait for the client to
338+
// report that the primary resource has been deleted. If the primary resource is not deleted
339+
// within this time, the reconciler will return an error and retry reconciliation. By default,
340+
// the reconciler will wait for 30s. This function requires positive values.
341+
func WithWaitForDeletionTimeout(timeout time.Duration) Option {
342+
return func(r *Reconciler) error {
343+
if timeout <= 0 {
344+
return errors.New("wait for deletion timeout must be a positive value")
345+
}
346+
r.waitForDeletionTimeout = timeout
347+
return nil
348+
}
349+
}
350+
335351
// WithReconcilePeriod is an Option that configures the reconcile period of the
336352
// controller. This will cause the controller to reconcile CRs at least once
337353
// every period. By default, the reconcile period is set to 0, which means no
@@ -727,13 +743,15 @@ func (r *Reconciler) handleDeletion(ctx context.Context, actionClient helmclient
727743
}
728744

729745
// Since the client is hitting a cache, waiting for the
730-
// deletion here will guarantee that the next reconciliation
746+
// deletion here will help ensure that the next reconciliation
731747
// will see that the CR has been deleted and that there's
732748
// nothing left to do.
733-
if err := controllerutil.WaitForDeletion(ctx, r.client, obj); err != nil {
734-
return err
735-
}
736-
return nil
749+
//
750+
// If the CR is not deleted within the timeout, the next reconciliation
751+
// will attempt to uninstall the release again.
752+
timeoutCtx, timeoutCancel := context.WithTimeout(ctx, r.waitForDeletionTimeout)
753+
defer timeoutCancel()
754+
return controllerutil.WaitForDeletion(timeoutCtx, r.client, obj)
737755
}
738756

739757
func (r *Reconciler) getReleaseState(client helmclient.ActionInterface, obj metav1.Object, vals map[string]interface{}) (*release.Release, helmReleaseState, error) {
@@ -939,6 +957,10 @@ func (r *Reconciler) addDefaults(mgr ctrl.Manager, controllerName string) error
939957
r.valueMapper = internalvalues.DefaultMapper
940958
}
941959

960+
if r.waitForDeletionTimeout == 0 {
961+
r.waitForDeletionTimeout = internalvalues.DefaultWaitForDeletionTimeout
962+
}
963+
942964
if r.maxReleaseHistory == nil {
943965
r.maxReleaseHistory = &internalvalues.DefaultMaxReleaseHistory
944966
}

pkg/reconciler/reconciler_test.go

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ import (
2424
"strconv"
2525
"time"
2626

27-
"github.com/go-logr/logr"
2827
. "github.com/onsi/ginkgo/v2"
2928
. "github.com/onsi/gomega"
3029
. "github.com/onsi/gomega/gstruct"
31-
sdkhandler "github.com/operator-framework/operator-lib/handler"
30+
31+
"github.com/go-logr/logr"
3232
"helm.sh/helm/v3/pkg/action"
3333
"helm.sh/helm/v3/pkg/chart"
3434
"helm.sh/helm/v3/pkg/chartutil"
@@ -46,6 +46,7 @@ import (
4646
"k8s.io/client-go/tools/record"
4747
"sigs.k8s.io/controller-runtime/pkg/client"
4848
"sigs.k8s.io/controller-runtime/pkg/client/fake"
49+
"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
4950
"sigs.k8s.io/controller-runtime/pkg/event"
5051
"sigs.k8s.io/controller-runtime/pkg/log/zap"
5152
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -54,6 +55,8 @@ import (
5455
"sigs.k8s.io/controller-runtime/pkg/source"
5556
"sigs.k8s.io/yaml"
5657

58+
sdkhandler "github.com/operator-framework/operator-lib/handler"
59+
5760
"github.com/operator-framework/helm-operator-plugins/internal/sdk/controllerutil"
5861
"github.com/operator-framework/helm-operator-plugins/pkg/annotation"
5962
helmclient "github.com/operator-framework/helm-operator-plugins/pkg/client"
@@ -205,6 +208,18 @@ var _ = Describe("Reconciler", func() {
205208
Expect(WithMaxConcurrentReconciles(-1)(r)).NotTo(Succeed())
206209
})
207210
})
211+
_ = Describe("WithWaitForDeletionTimeout", func() {
212+
It("should set the reconciler wait for deletion timeout", func() {
213+
Expect(WithWaitForDeletionTimeout(time.Second)(r)).To(Succeed())
214+
Expect(r.waitForDeletionTimeout).To(Equal(time.Second))
215+
})
216+
It("should fail if value is zero", func() {
217+
Expect(WithWaitForDeletionTimeout(0)(r)).NotTo(Succeed())
218+
})
219+
It("should fail if value is negative", func() {
220+
Expect(WithWaitForDeletionTimeout(-time.Second)(r)).NotTo(Succeed())
221+
})
222+
})
208223
_ = Describe("WithReconcilePeriod", func() {
209224
It("should set the reconciler reconcile period", func() {
210225
Expect(WithReconcilePeriod(0)(r)).To(Succeed())
@@ -686,6 +701,55 @@ var _ = Describe("Reconciler", func() {
686701
})
687702
})
688703
})
704+
When("cache contains stale CR that has actually been deleted", func() {
705+
// This test simulates what we expect to happen when we time out waiting for a CR that we
706+
// deleted to be removed from the cache.
707+
It("ignores not found errors and returns successfully", func() {
708+
By("deleting the CR and then setting a finalizer on the stale CR", func() {
709+
// We _actually_ remove the CR from the API server, but we'll make a fake client
710+
// that returns the stale CR.
711+
Expect(mgr.GetClient().Delete(ctx, obj)).To(Succeed())
712+
Eventually(func() error {
713+
return mgr.GetAPIReader().Get(ctx, objKey, obj)
714+
}).Should(WithTransform(apierrors.IsNotFound, BeTrue()))
715+
716+
// We set the finalizer on the stale CR to simulate the typical state of the CR from a
717+
// prior reconcile run that timed out waiting for the CR to be removed from the cache.
718+
obj.SetFinalizers([]string{uninstallFinalizer})
719+
})
720+
721+
By("configuring a client that returns the stale CR", func() {
722+
// Make a client that returns the stale CR, but sends writes to the real client.
723+
cl := fake.NewClientBuilder().WithObjects(obj).WithInterceptorFuncs(interceptor.Funcs{
724+
Create: func(ctx context.Context, _ client.WithWatch, fakeObj client.Object, opts ...client.CreateOption) error {
725+
return mgr.GetClient().Create(ctx, fakeObj, opts...)
726+
},
727+
Delete: func(ctx context.Context, _ client.WithWatch, fakeObj client.Object, opts ...client.DeleteOption) error {
728+
return mgr.GetClient().Delete(ctx, fakeObj, opts...)
729+
},
730+
DeleteAllOf: func(ctx context.Context, _ client.WithWatch, fakeObj client.Object, opts ...client.DeleteAllOfOption) error {
731+
return mgr.GetClient().DeleteAllOf(ctx, fakeObj, opts...)
732+
},
733+
Update: func(ctx context.Context, _ client.WithWatch, fakeObj client.Object, opts ...client.UpdateOption) error {
734+
return mgr.GetClient().Update(ctx, fakeObj, opts...)
735+
},
736+
Patch: func(ctx context.Context, _ client.WithWatch, fakeObj client.Object, patch client.Patch, opts ...client.PatchOption) error {
737+
return mgr.GetClient().Patch(ctx, fakeObj, patch, opts...)
738+
},
739+
SubResource: func(_ client.WithWatch, subresource string) client.SubResourceClient {
740+
return mgr.GetClient().SubResource(subresource)
741+
},
742+
}).WithStatusSubresource(obj).Build()
743+
r.client = cl
744+
})
745+
746+
By("successfully ignoring not found errors and returning a nil error", func() {
747+
res, err := r.Reconcile(ctx, req)
748+
Expect(res).To(Equal(reconcile.Result{}))
749+
Expect(err).ToNot(HaveOccurred())
750+
})
751+
})
752+
})
689753
When("CR is deleted, release is not present, but uninstall finalizer exists", func() {
690754
It("removes the finalizer", func() {
691755
By("adding the uninstall finalizer and deleting the CR", func() {

0 commit comments

Comments
 (0)