Skip to content

Commit bf34cfe

Browse files
authored
Merge pull request #397 from containeroo/fix/retry
fix: retry failures
2 parents 7cc809b + 1a3a3ee commit bf34cfe

11 files changed

+140
-40
lines changed

cmd/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ func main() {
150150
Client: mgr.GetClient(),
151151
Scheme: mgr.GetScheme(),
152152
HTTPClientTimeout: ipReconcilerHTTPClientTimeout,
153+
RetryInterval: retryInterval,
153154
DefaultReconcileInterval: defaultReconcileInterval,
154155
}).SetupWithManager(mgr); err != nil {
155156
setupLog.Error(err, "unable to create controller", "controller", "IP")

internal/controller/account_controller.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ type AccountReconciler struct {
5151
CloudflareAPI *cloudflare.API
5252
}
5353

54+
var errWaitForAccount = errors.New("must wait for account")
55+
5456
// SetupWithManager sets up the controller with the Manager.
5557
func (r *AccountReconciler) SetupWithManager(mgr ctrl.Manager) error {
5658
return ctrl.NewControllerManagedBy(mgr).
@@ -98,35 +100,42 @@ func (r *AccountReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re
98100
return ctrl.Result{Requeue: true}, nil
99101
}
100102

101-
return r.reconcileAccount(ctx, account), nil
103+
return r.reconcileAccount(ctx, account)
102104
}
103105

104106
// reconcileAccount reconciles the account
105-
func (r *AccountReconciler) reconcileAccount(ctx context.Context, account *cloudflareoperatoriov1.Account) ctrl.Result {
107+
func (r *AccountReconciler) reconcileAccount(ctx context.Context, account *cloudflareoperatoriov1.Account) (ctrl.Result, error) {
106108
secret := &corev1.Secret{}
107-
if err := r.Get(ctx, client.ObjectKey{Namespace: account.Spec.ApiToken.SecretRef.Namespace, Name: account.Spec.ApiToken.SecretRef.Name}, secret); err != nil {
109+
if err := r.Get(ctx, client.ObjectKey{
110+
Namespace: account.Spec.ApiToken.SecretRef.Namespace,
111+
Name: account.Spec.ApiToken.SecretRef.Name,
112+
}, secret); err != nil {
108113
intconditions.MarkFalse(account, err)
109-
return ctrl.Result{RequeueAfter: r.RetryInterval}
114+
if apierrors.IsNotFound(err) {
115+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
116+
}
117+
return ctrl.Result{}, err
110118
}
111119

112120
cloudflareAPIToken := string(secret.Data["apiToken"])
113121
if cloudflareAPIToken == "" {
114122
intconditions.MarkFalse(account, errors.New("secret has no key named \"apiToken\""))
115-
return ctrl.Result{RequeueAfter: r.RetryInterval}
123+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
116124
}
117125

118126
if r.CloudflareAPI.APIToken != cloudflareAPIToken {
119127
cloudflareAPI, err := cloudflare.NewWithAPIToken(cloudflareAPIToken)
120128
if err != nil {
121129
intconditions.MarkFalse(account, err)
122-
return ctrl.Result{RequeueAfter: time.Second * 30}
130+
return ctrl.Result{}, err
123131
}
132+
124133
*r.CloudflareAPI = *cloudflareAPI
125134
}
126135

127136
intconditions.MarkTrue(account, "Account is ready")
128137

129-
return ctrl.Result{RequeueAfter: account.Spec.Interval.Duration}
138+
return ctrl.Result{RequeueAfter: account.Spec.Interval.Duration}, nil
130139
}
131140

132141
// reconcileDelete reconciles the deletion of the account

internal/controller/account_controller_test.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ func TestAccountReconciler_reconcileAccount(t *testing.T) {
8282
CloudflareAPI: &cloudflareAPI,
8383
}
8484

85-
_ = r.reconcileAccount(context.TODO(), account)
85+
_, err := r.reconcileAccount(context.TODO(), account)
86+
g.Expect(err).ToNot(HaveOccurred())
8687

8788
g.Expect(account.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
8889
*conditions.TrueCondition(cloudflareoperatoriov1.ConditionTypeReady, cloudflareoperatoriov1.ConditionReasonReady, "Account is ready"),
@@ -116,7 +117,8 @@ func TestAccountReconciler_reconcileAccount(t *testing.T) {
116117
CloudflareAPI: &cloudflareAPI,
117118
}
118119

119-
_ = r.reconcileAccount(context.TODO(), account)
120+
_, err := r.reconcileAccount(context.TODO(), account)
121+
g.Expect(err).ToNot(HaveOccurred())
120122

121123
g.Expect(account.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
122124
*conditions.FalseCondition(cloudflareoperatoriov1.ConditionTypeReady, cloudflareoperatoriov1.ConditionReasonFailed, "secrets \"secret\" not found"),
@@ -158,7 +160,8 @@ func TestAccountReconciler_reconcileAccount(t *testing.T) {
158160
CloudflareAPI: &cloudflareAPI,
159161
}
160162

161-
_ = r.reconcileAccount(context.TODO(), account)
163+
_, err := r.reconcileAccount(context.TODO(), account)
164+
g.Expect(err).ToNot(HaveOccurred())
162165

163166
g.Expect(account.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
164167
*conditions.FalseCondition(cloudflareoperatoriov1.ConditionTypeReady, cloudflareoperatoriov1.ConditionReasonFailed, "secret has no key named \"apiToken\""),

internal/controller/dnsrecord_controller.go

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"github.com/cloudflare/cloudflare-go"
4343
cloudflareoperatoriov1 "github.com/containeroo/cloudflare-operator/api/v1"
4444
intconditions "github.com/containeroo/cloudflare-operator/internal/conditions"
45+
interrors "github.com/containeroo/cloudflare-operator/internal/errors"
4546
"github.com/containeroo/cloudflare-operator/internal/metrics"
4647
intpredicates "github.com/containeroo/cloudflare-operator/internal/predicates"
4748
)
@@ -112,6 +113,13 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
112113
patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
113114
}
114115

116+
// We do not want to return these errors, but rather wait for the
117+
// designated RequeueAfter to expire and try again.
118+
// However, not returning an error will cause the patch helper to
119+
// patch the observed generation, which we do not want. So we ignore
120+
// these errors here after patching.
121+
retErr = interrors.Ignore(retErr, errWaitForAccount, errWaitForZone)
122+
115123
if err := patchHelper.Patch(ctx, dnsrecord, patchOpts...); err != nil {
116124
if !dnsrecord.DeletionTimestamp.IsZero() {
117125
err = apierrutil.FilterOut(err, func(e error) bool { return apierrors.IsNotFound(e) })
@@ -130,7 +138,7 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
130138

131139
if len(zones.Items) == 0 {
132140
intconditions.MarkFalse(dnsrecord, fmt.Errorf("zone %q not found", zoneName))
133-
return ctrl.Result{}, nil
141+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
134142
}
135143

136144
zone := &zones.Items[0]
@@ -148,19 +156,19 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
148156
return ctrl.Result{Requeue: true}, nil
149157
}
150158

151-
return r.reconcileDNSRecord(ctx, dnsrecord, zone), nil
159+
return r.reconcileDNSRecord(ctx, dnsrecord, zone)
152160
}
153161

154162
// reconcileDNSRecord reconciles the dnsrecord
155-
func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord *cloudflareoperatoriov1.DNSRecord, zone *cloudflareoperatoriov1.Zone) ctrl.Result {
163+
func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord *cloudflareoperatoriov1.DNSRecord, zone *cloudflareoperatoriov1.Zone) (ctrl.Result, error) {
156164
if r.CloudflareAPI.APIToken == "" {
157165
intconditions.MarkUnknown(dnsrecord, "Cloudflare account is not ready")
158-
return ctrl.Result{RequeueAfter: r.RetryInterval}
166+
return ctrl.Result{RequeueAfter: r.RetryInterval}, errWaitForAccount
159167
}
160168

161169
if !conditions.IsTrue(zone, cloudflareoperatoriov1.ConditionTypeReady) {
162170
intconditions.MarkUnknown(dnsrecord, "Zone is not ready")
163-
return ctrl.Result{RequeueAfter: r.RetryInterval}
171+
return ctrl.Result{RequeueAfter: r.RetryInterval}, errWaitForZone
164172
}
165173

166174
var existingRecord cloudflare.DNSRecord
@@ -169,7 +177,7 @@ func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord
169177
existingRecord, err = r.CloudflareAPI.GetDNSRecord(ctx, cloudflare.ZoneIdentifier(zone.Status.ID), dnsrecord.Status.RecordID)
170178
if err != nil {
171179
intconditions.MarkFalse(dnsrecord, err)
172-
return ctrl.Result{}
180+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
173181
}
174182
} else {
175183
cloudflareExistingRecord, _, err := r.CloudflareAPI.ListDNSRecords(ctx, cloudflare.ZoneIdentifier(zone.Status.ID), cloudflare.ListDNSRecordsParams{
@@ -179,7 +187,7 @@ func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord
179187
})
180188
if err != nil {
181189
intconditions.MarkFalse(dnsrecord, err)
182-
return ctrl.Result{}
190+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
183191
}
184192
if len(cloudflareExistingRecord) > 0 {
185193
existingRecord = cloudflareExistingRecord[0]
@@ -191,14 +199,14 @@ func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord
191199
ip := &cloudflareoperatoriov1.IP{}
192200
if err := r.Get(ctx, client.ObjectKey{Name: dnsrecord.Spec.IPRef.Name}, ip); err != nil {
193201
intconditions.MarkFalse(dnsrecord, err)
194-
return ctrl.Result{RequeueAfter: r.RetryInterval}
202+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
195203
}
196204
dnsrecord.Spec.Content = ip.Spec.Address
197205
}
198206

199207
if *dnsrecord.Spec.Proxied && dnsrecord.Spec.TTL != 1 {
200208
intconditions.MarkFalse(dnsrecord, errors.New("TTL must be 1 when proxied"))
201-
return ctrl.Result{}
209+
return ctrl.Result{}, nil
202210
}
203211

204212
if existingRecord.ID == "" {
@@ -213,7 +221,7 @@ func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord
213221
})
214222
if err != nil {
215223
intconditions.MarkFalse(dnsrecord, err)
216-
return ctrl.Result{RequeueAfter: r.RetryInterval}
224+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
217225
}
218226
dnsrecord.Status.RecordID = newDNSRecord.ID
219227
} else if !r.compareDNSRecord(dnsrecord.Spec, existingRecord) {
@@ -228,13 +236,13 @@ func (r *DNSRecordReconciler) reconcileDNSRecord(ctx context.Context, dnsrecord
228236
Data: dnsrecord.Spec.Data,
229237
}); err != nil {
230238
intconditions.MarkFalse(dnsrecord, err)
231-
return ctrl.Result{RequeueAfter: r.RetryInterval}
239+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
232240
}
233241
}
234242

235243
intconditions.MarkTrue(dnsrecord, "DNS record synced")
236244

237-
return ctrl.Result{RequeueAfter: dnsrecord.Spec.Interval.Duration}
245+
return ctrl.Result{RequeueAfter: dnsrecord.Spec.Interval.Duration}, nil
238246
}
239247

240248
// compareDNSRecord compares the DNS record to the DNSRecord object

internal/controller/dnsrecord_controller_test.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ func TestDNSRecordReconciler_reconcileDNSRecord(t *testing.T) {
8585
Proxied: new(bool),
8686
}
8787

88-
_ = r.reconcileDNSRecord(context.TODO(), dnsRecord, zone)
88+
_, err := r.reconcileDNSRecord(context.TODO(), dnsRecord, zone)
89+
g.Expect(err).ToNot(HaveOccurred())
8990

9091
g.Expect(dnsRecord.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
9192
*conditions.TrueCondition(cloudflareoperatoriov1.ConditionTypeReady, cloudflareoperatoriov1.ConditionReasonReady, "DNS record synced"),
@@ -113,7 +114,8 @@ func TestDNSRecordReconciler_reconcileDNSRecord(t *testing.T) {
113114
},
114115
}
115116

116-
_ = r.reconcileDNSRecord(context.TODO(), dnsRecord, zone)
117+
_, err := r.reconcileDNSRecord(context.TODO(), dnsRecord, zone)
118+
g.Expect(err).ToNot(HaveOccurred())
117119

118120
g.Expect(dnsRecord.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
119121
*conditions.TrueCondition(cloudflareoperatoriov1.ConditionTypeReady, cloudflareoperatoriov1.ConditionReasonReady, "DNS record synced"),
@@ -148,7 +150,8 @@ func TestDNSRecordReconciler_reconcileDNSRecord(t *testing.T) {
148150
Proxied: cloudflareDNSRecord.Proxied,
149151
}
150152

151-
_ = r.reconcileDNSRecord(context.TODO(), dnsRecord, zone)
153+
_, err = r.reconcileDNSRecord(context.TODO(), dnsRecord, zone)
154+
g.Expect(err).ToNot(HaveOccurred())
152155

153156
g.Expect(dnsRecord.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{
154157
*conditions.TrueCondition(cloudflareoperatoriov1.ConditionTypeReady, cloudflareoperatoriov1.ConditionReasonReady, "DNS record synced"),

internal/controller/ingress_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ func (r *IngressReconciler) reconcileIngress(ctx context.Context, ingress *netwo
9999

100100
if err := r.reconcileDNSRecords(ctx, ingress, dnsRecordSpec, existingRecords, ingressHosts); err != nil {
101101
log.Error(err, "Failed to reconcile DNS records")
102-
return ctrl.Result{}, err
102+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
103103
}
104104

105105
return ctrl.Result{}, nil

internal/controller/ip_controller.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ type IPReconciler struct {
5757

5858
HTTPClientTimeout time.Duration
5959
DefaultReconcileInterval time.Duration
60+
61+
RetryInterval time.Duration
6062
}
6163

6264
// SetupWithManager sets up the controller with the Manager.
@@ -120,7 +122,7 @@ func (r *IPReconciler) reconcileIP(ctx context.Context, ip *cloudflareoperatorio
120122
case "dynamic":
121123
if err := r.handleDynamic(ctx, ip); err != nil {
122124
intconditions.MarkFalse(ip, err)
123-
return ctrl.Result{}
125+
return ctrl.Result{RequeueAfter: r.RetryInterval}
124126
}
125127
}
126128

internal/controller/zone_controller.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controller
1919
import (
2020
"context"
2121
"errors"
22+
"fmt"
2223
"strings"
2324
"time"
2425

@@ -36,6 +37,7 @@ import (
3637

3738
cloudflareoperatoriov1 "github.com/containeroo/cloudflare-operator/api/v1"
3839
intconditions "github.com/containeroo/cloudflare-operator/internal/conditions"
40+
interrors "github.com/containeroo/cloudflare-operator/internal/errors"
3941
"github.com/containeroo/cloudflare-operator/internal/metrics"
4042
"github.com/fluxcd/pkg/runtime/patch"
4143
)
@@ -60,6 +62,8 @@ type ZoneReconciler struct {
6062
CloudflareAPI *cloudflare.API
6163
}
6264

65+
var errWaitForZone = errors.New("must wait for zone")
66+
6367
// SetupWithManager sets up the controller with the Manager.
6468
func (r *ZoneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
6569
if err := mgr.GetFieldIndexer().IndexField(ctx, &cloudflareoperatoriov1.Zone{}, cloudflareoperatoriov1.ZoneNameIndexKey,
@@ -96,6 +100,13 @@ func (r *ZoneReconciler) Reconcile(ctx context.Context, req ctrl.Request) (resul
96100
patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
97101
}
98102

103+
// We do not want to return these errors, but rather wait for the
104+
// designated RequeueAfter to expire and try again.
105+
// However, not returning an error will cause the patch helper to
106+
// patch the observed generation, which we do not want. So we ignore
107+
// these errors here after patching.
108+
retErr = interrors.Ignore(retErr, errWaitForAccount, errWaitForZone)
109+
99110
if err := patchHelper.Patch(ctx, zone, patchOpts...); err != nil {
100111
if !zone.DeletionTimestamp.IsZero() {
101112
err = apierrutil.FilterOut(err, func(e error) bool { return apierrors.IsNotFound(e) })
@@ -114,34 +125,34 @@ func (r *ZoneReconciler) Reconcile(ctx context.Context, req ctrl.Request) (resul
114125
return ctrl.Result{Requeue: true}, nil
115126
}
116127

117-
return r.reconcileZone(ctx, zone), nil
128+
return r.reconcileZone(ctx, zone)
118129
}
119130

120131
// reconcileZone reconciles the zone
121-
func (r *ZoneReconciler) reconcileZone(ctx context.Context, zone *cloudflareoperatoriov1.Zone) ctrl.Result {
132+
func (r *ZoneReconciler) reconcileZone(ctx context.Context, zone *cloudflareoperatoriov1.Zone) (ctrl.Result, error) {
122133
if r.CloudflareAPI.APIToken == "" {
123134
intconditions.MarkUnknown(zone, "Cloudflare account is not ready")
124-
return ctrl.Result{RequeueAfter: r.RetryInterval}
135+
return ctrl.Result{RequeueAfter: r.RetryInterval}, errWaitForAccount
125136
}
126137

127138
zoneID, err := r.CloudflareAPI.ZoneIDByName(zone.Spec.Name)
128139
if err != nil {
129140
intconditions.MarkFalse(zone, err)
130-
return ctrl.Result{}
141+
return ctrl.Result{RequeueAfter: r.RetryInterval}, errWaitForZone
131142
}
132143

133144
zone.Status.ID = zoneID
134145

135146
if zone.Spec.Prune {
136147
if err := r.handlePrune(ctx, zone); err != nil {
137-
intconditions.MarkFalse(zone, err)
138-
return ctrl.Result{RequeueAfter: r.RetryInterval}
148+
intconditions.MarkFalse(zone, fmt.Errorf("failed to prune DNS records: %v", err))
149+
return ctrl.Result{RequeueAfter: r.RetryInterval}, nil
139150
}
140151
}
141152

142153
intconditions.MarkTrue(zone, "Zone is ready")
143154

144-
return ctrl.Result{RequeueAfter: zone.Spec.Interval.Duration}
155+
return ctrl.Result{RequeueAfter: zone.Spec.Interval.Duration}, nil
145156
}
146157

147158
// handlePrune deletes DNS records that are not managed by the operator if enabled
@@ -151,7 +162,7 @@ func (r *ZoneReconciler) handlePrune(ctx context.Context, zone *cloudflareoperat
151162
dnsRecords := &cloudflareoperatoriov1.DNSRecordList{}
152163
if err := r.List(ctx, dnsRecords); err != nil {
153164
log.Error(err, "Failed to list DNSRecords")
154-
return err
165+
return client.IgnoreNotFound(err)
155166
}
156167

157168
cloudflareDNSRecords, _, err := r.CloudflareAPI.ListDNSRecords(ctx, cloudflare.ZoneIdentifier(zone.Status.ID), cloudflare.ListDNSRecordsParams{})

0 commit comments

Comments
 (0)