Skip to content

Commit 0fb2fc8

Browse files
Avoid leaving orphaned InfrastructureCluster when create control plane fails
1 parent 55a6f44 commit 0fb2fc8

File tree

2 files changed

+238
-55
lines changed

2 files changed

+238
-55
lines changed

internal/controllers/topology/cluster/reconcile_state.go

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
apierrors "k8s.io/apimachinery/pkg/api/errors"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
30+
kerrors "k8s.io/apimachinery/pkg/util/errors"
3031
"k8s.io/apimachinery/pkg/util/sets"
3132
"k8s.io/apimachinery/pkg/util/validation/field"
3233
"k8s.io/apimachinery/pkg/util/wait"
@@ -75,17 +76,30 @@ func (r *Reconciler) reconcileState(ctx context.Context, s *scope.Scope) error {
7576
}
7677

7778
// Reconcile desired state of the InfrastructureCluster object.
78-
if err := r.reconcileInfrastructureCluster(ctx, s); err != nil {
79-
return err
79+
createdInfraCluster, errInfraCluster := r.reconcileInfrastructureCluster(ctx, s)
80+
if errInfraCluster != nil {
81+
return errInfraCluster
8082
}
8183

8284
// Reconcile desired state of the ControlPlane object.
83-
if err := r.reconcileControlPlane(ctx, s); err != nil {
84-
return err
85+
createdControlPlane, errControlPlane := r.reconcileControlPlane(ctx, s)
86+
if errControlPlane != nil {
87+
// NOTE: report control plane error immediately only if we did not just create the infrastructure cluster; otherwise attempt reconcile cluster before returning.
88+
if !createdInfraCluster {
89+
return errControlPlane
90+
}
91+
92+
// In this case (reconcileInfrastructureCluster passed reporting creation of the infrastructure cluster object, reconcileControlPlane - which is expected to create the control plane object - failed),
93+
// if the creation of the control plane actually did not happen, blank out ControlPlaneRef from desired cluster.
94+
if s.Current.Cluster.Spec.ControlPlaneRef == nil && !createdControlPlane {
95+
s.Desired.Cluster.Spec.ControlPlaneRef = nil
96+
}
8597
}
8698

8799
// Reconcile desired state of the Cluster object.
88-
if err := r.reconcileCluster(ctx, s); err != nil {
100+
errCluster := r.reconcileCluster(ctx, s)
101+
err := kerrors.NewAggregate([]error{errControlPlane, errCluster})
102+
if err != nil {
89103
return err
90104
}
91105

@@ -283,12 +297,12 @@ func (r *Reconciler) callAfterClusterUpgrade(ctx context.Context, s *scope.Scope
283297
}
284298

285299
// reconcileInfrastructureCluster reconciles the desired state of the InfrastructureCluster object.
286-
func (r *Reconciler) reconcileInfrastructureCluster(ctx context.Context, s *scope.Scope) error {
300+
func (r *Reconciler) reconcileInfrastructureCluster(ctx context.Context, s *scope.Scope) (bool, error) {
287301
ctx, _ = tlog.LoggerFrom(ctx).WithObject(s.Desired.InfrastructureCluster).Into(ctx)
288302

289303
ignorePaths, err := contract.InfrastructureCluster().IgnorePaths(s.Desired.InfrastructureCluster)
290304
if err != nil {
291-
return errors.Wrap(err, "failed to calculate ignore paths")
305+
return false, errors.Wrap(err, "failed to calculate ignore paths")
292306
}
293307

294308
return r.reconcileReferencedObject(ctx, reconcileReferencedObjectInput{
@@ -301,30 +315,30 @@ func (r *Reconciler) reconcileInfrastructureCluster(ctx context.Context, s *scop
301315

302316
// reconcileControlPlane works to bring the current state of a managed topology in line with the desired state. This involves
303317
// updating the cluster where needed.
304-
func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) error {
318+
func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) (bool, error) {
305319
// If the ControlPlane has defined a current or desired MachineHealthCheck attempt to reconcile it.
306320
// MHC changes are not Kubernetes version dependent, therefore proceed with MHC reconciliation
307321
// even if the Control Plane is pending an upgrade.
308322
if s.Desired.ControlPlane.MachineHealthCheck != nil || s.Current.ControlPlane.MachineHealthCheck != nil {
309323
// Reconcile the current and desired state of the MachineHealthCheck.
310324
if err := r.reconcileMachineHealthCheck(ctx, s.Current.ControlPlane.MachineHealthCheck, s.Desired.ControlPlane.MachineHealthCheck); err != nil {
311-
return err
325+
return false, err
312326
}
313327
}
314328

315329
// Return early if the control plane is pending an upgrade.
316330
// Do not reconcile the control plane yet to avoid updating the control plane while it is still pending a
317331
// version upgrade. This will prevent the control plane from performing a double rollout.
318332
if s.UpgradeTracker.ControlPlane.IsPendingUpgrade {
319-
return nil
333+
return false, nil
320334
}
321335
// If the clusterClass mandates the controlPlane has infrastructureMachines, reconcile it.
322336
if s.Blueprint.HasControlPlaneInfrastructureMachine() {
323337
ctx, _ := tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.InfrastructureMachineTemplate).Into(ctx)
324338

325339
cpInfraRef, err := contract.ControlPlane().MachineTemplate().InfrastructureRef().Get(s.Desired.ControlPlane.Object)
326340
if err != nil {
327-
return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.InfrastructureMachineTemplate})
341+
return false, errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.InfrastructureMachineTemplate})
328342
}
329343

330344
// Create or update the MachineInfrastructureTemplate of the control plane.
@@ -337,25 +351,26 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope)
337351
templateNamePrefix: controlPlaneInfrastructureMachineTemplateNamePrefix(s.Current.Cluster.Name),
338352
},
339353
); err != nil {
340-
return err
354+
return false, err
341355
}
342356

343357
// The controlPlaneObject.Spec.machineTemplate.infrastructureRef has to be updated in the desired object
344358
err = contract.ControlPlane().MachineTemplate().InfrastructureRef().Set(s.Desired.ControlPlane.Object, refToUnstructured(cpInfraRef))
345359
if err != nil {
346-
return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.Object})
360+
return false, errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.Object})
347361
}
348362
}
349363

350364
// Create or update the ControlPlaneObject for the ControlPlaneState.
351365
ctx, _ = tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.Object).Into(ctx)
352-
if err := r.reconcileReferencedObject(ctx, reconcileReferencedObjectInput{
366+
created, err := r.reconcileReferencedObject(ctx, reconcileReferencedObjectInput{
353367
cluster: s.Current.Cluster,
354368
current: s.Current.ControlPlane.Object,
355369
desired: s.Desired.ControlPlane.Object,
356370
versionGetter: contract.ControlPlane().Version().Get,
357-
}); err != nil {
358-
return err
371+
})
372+
if err != nil {
373+
return created, err
359374
}
360375

361376
// If the controlPlane has infrastructureMachines and the InfrastructureMachineTemplate has changed on this reconcile
@@ -364,15 +379,15 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope)
364379
if s.Blueprint.HasControlPlaneInfrastructureMachine() && s.Current.ControlPlane.InfrastructureMachineTemplate != nil {
365380
if s.Current.ControlPlane.InfrastructureMachineTemplate.GetName() != s.Desired.ControlPlane.InfrastructureMachineTemplate.GetName() {
366381
if err := r.Client.Delete(ctx, s.Current.ControlPlane.InfrastructureMachineTemplate); err != nil {
367-
return errors.Wrapf(err, "failed to delete oldinfrastructure machine template %s of control plane %s",
382+
return created, errors.Wrapf(err, "failed to delete oldinfrastructure machine template %s of control plane %s",
368383
tlog.KObj{Obj: s.Current.ControlPlane.InfrastructureMachineTemplate},
369384
tlog.KObj{Obj: s.Current.ControlPlane.Object},
370385
)
371386
}
372387
}
373388
}
374389

375-
return nil
390+
return created, nil
376391
}
377392

378393
// reconcileMachineHealthCheck creates, updates, deletes or leaves untouched a MachineHealthCheck depending on the difference between the
@@ -824,15 +839,15 @@ func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp *
824839
log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object)
825840
cluster := s.Current.Cluster
826841
infraCtx, _ := log.WithObject(mp.InfrastructureMachinePoolObject).Into(ctx)
827-
if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{
842+
if _, err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{
828843
cluster: cluster,
829844
desired: mp.InfrastructureMachinePoolObject,
830845
}); err != nil {
831846
return errors.Wrapf(err, "failed to create %s", mp.Object.Kind)
832847
}
833848

834849
bootstrapCtx, _ := log.WithObject(mp.BootstrapObject).Into(ctx)
835-
if err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{
850+
if _, err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{
836851
cluster: cluster,
837852
desired: mp.BootstrapObject,
838853
}); err != nil {
@@ -883,7 +898,7 @@ func (r *Reconciler) updateMachinePool(ctx context.Context, s *scope.Scope, curr
883898

884899
cluster := s.Current.Cluster
885900
infraCtx, _ := log.WithObject(desiredMP.InfrastructureMachinePoolObject).Into(ctx)
886-
if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{
901+
if _, err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{
887902
cluster: cluster,
888903
current: currentMP.InfrastructureMachinePoolObject,
889904
desired: desiredMP.InfrastructureMachinePoolObject,
@@ -892,7 +907,7 @@ func (r *Reconciler) updateMachinePool(ctx context.Context, s *scope.Scope, curr
892907
}
893908

894909
bootstrapCtx, _ := log.WithObject(desiredMP.BootstrapObject).Into(ctx)
895-
if err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{
910+
if _, err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{
896911
cluster: cluster,
897912
current: currentMP.BootstrapObject,
898913
desired: desiredMP.BootstrapObject,
@@ -1022,44 +1037,44 @@ type reconcileReferencedObjectInput struct {
10221037
// reconcileReferencedObject reconciles the desired state of the referenced object.
10231038
// NOTE: After a referenced object is created it is assumed that the reference should
10241039
// never change (only the content of the object can eventually change). Thus, we are checking for strict compatibility.
1025-
func (r *Reconciler) reconcileReferencedObject(ctx context.Context, in reconcileReferencedObjectInput) error {
1040+
func (r *Reconciler) reconcileReferencedObject(ctx context.Context, in reconcileReferencedObjectInput) (bool, error) {
10261041
log := tlog.LoggerFrom(ctx)
10271042

10281043
// If there is no current object, create it.
10291044
if in.current == nil {
10301045
log.Infof("Creating %s", tlog.KObj{Obj: in.desired})
10311046
helper, err := r.patchHelperFactory(ctx, nil, in.desired, structuredmerge.IgnorePaths(in.ignorePaths))
10321047
if err != nil {
1033-
return errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper")
1048+
return false, errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper")
10341049
}
10351050
if err := helper.Patch(ctx); err != nil {
1036-
return createErrorWithoutObjectName(ctx, err, in.desired)
1051+
return false, createErrorWithoutObjectName(ctx, err, in.desired)
10371052
}
10381053
r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: in.desired})
1039-
return nil
1054+
return true, nil
10401055
}
10411056

10421057
// Check if the current and desired referenced object are compatible.
10431058
if allErrs := check.ObjectsAreStrictlyCompatible(in.current, in.desired); len(allErrs) > 0 {
1044-
return allErrs.ToAggregate()
1059+
return false, allErrs.ToAggregate()
10451060
}
10461061

10471062
// Check differences between current and desired state, and eventually patch the current object.
10481063
patchHelper, err := r.patchHelperFactory(ctx, in.current, in.desired, structuredmerge.IgnorePaths(in.ignorePaths))
10491064
if err != nil {
1050-
return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: in.current})
1065+
return false, errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: in.current})
10511066
}
10521067
if !patchHelper.HasChanges() {
10531068
log.V(3).Infof("No changes for %s", tlog.KObj{Obj: in.desired})
1054-
return nil
1069+
return false, nil
10551070
}
10561071

10571072
log.Infof("Patching %s", tlog.KObj{Obj: in.desired})
10581073
if err := patchHelper.Patch(ctx); err != nil {
1059-
return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: in.current})
1074+
return false, errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: in.current})
10601075
}
10611076
r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: in.desired}, logUnstructuredVersionChange(in.current, in.desired, in.versionGetter))
1062-
return nil
1077+
return false, nil
10631078
}
10641079

10651080
func logUnstructuredVersionChange(current, desired *unstructured.Unstructured, versionGetter unstructuredVersionGetter) string {

0 commit comments

Comments
 (0)