Skip to content

Commit 7c8ba1d

Browse files
Add machine UpToDate condition to KCP
# Conflicts: # controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go
1 parent bf61b1e commit 7c8ba1d

File tree

11 files changed

+624
-117
lines changed

11 files changed

+624
-117
lines changed

api/v1beta1/machine_types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,15 @@ const (
123123
const (
124124
// MachineUpToDateV1Beta2Condition is true if the Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
125125
// The Machine's owner (e.g. MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec.
126+
// NOTE: The Machine's owner might use this condition to surface also other use cases when Machine is considered not up to date, e.g. when MachineDeployment spec.rolloutAfter
127+
// is expired and the Machine needs to be rolled out.
126128
MachineUpToDateV1Beta2Condition = "UpToDate"
129+
130+
// MachineUpToDateV1Beta2Reason surface when a Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
131+
MachineUpToDateV1Beta2Reason = "UpToDate"
132+
133+
// MachineNotUpToDateV1Beta2Reason surface when a Machine spec does not match the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
134+
MachineNotUpToDateV1Beta2Reason = "NotUpToDate"
127135
)
128136

129137
// Machine's BootstrapConfigReady condition and corresponding reasons that will be used in v1Beta2 API version.

controlplane/kubeadm/internal/control_plane.go

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ type ControlPlane struct {
4444
Machines collections.Machines
4545
machinesPatchHelpers map[string]*patch.Helper
4646

47+
machinesNotUptoDate collections.Machines
48+
machinesNotUptoDateLogMessages map[string][]string
49+
machinesNotUptoDateConditionMessages map[string][]string
50+
4751
// reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations
4852
reconciliationTime metav1.Time
4953

@@ -97,15 +101,35 @@ func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, c
97101
patchHelpers[machine.Name] = patchHelper
98102
}
99103

104+
// Select machines that should be rolled out because of an outdated configuration or because rolloutAfter/Before expired.
105+
reconciliationTime := metav1.Now()
106+
machinesNotUptoDate := make(collections.Machines, len(ownedMachines))
107+
machinesNotUptoDateLogMessages := map[string][]string{}
108+
machinesNotUptoDateConditionMessages := map[string][]string{}
109+
for _, m := range ownedMachines {
110+
upToDate, logMessages, conditionMessages, err := UpToDate(m, kcp, &reconciliationTime, infraObjects, kubeadmConfigs)
111+
if err != nil {
112+
return nil, err
113+
}
114+
if !upToDate {
115+
machinesNotUptoDate.Insert(m)
116+
machinesNotUptoDateLogMessages[m.Name] = logMessages
117+
machinesNotUptoDateConditionMessages[m.Name] = conditionMessages
118+
}
119+
}
120+
100121
return &ControlPlane{
101-
KCP: kcp,
102-
Cluster: cluster,
103-
Machines: ownedMachines,
104-
machinesPatchHelpers: patchHelpers,
105-
KubeadmConfigs: kubeadmConfigs,
106-
InfraResources: infraObjects,
107-
reconciliationTime: metav1.Now(),
108-
managementCluster: managementCluster,
122+
KCP: kcp,
123+
Cluster: cluster,
124+
Machines: ownedMachines,
125+
machinesPatchHelpers: patchHelpers,
126+
machinesNotUptoDate: machinesNotUptoDate,
127+
machinesNotUptoDateLogMessages: machinesNotUptoDateLogMessages,
128+
machinesNotUptoDateConditionMessages: machinesNotUptoDateConditionMessages,
129+
KubeadmConfigs: kubeadmConfigs,
130+
InfraResources: infraObjects,
131+
reconciliationTime: reconciliationTime,
132+
managementCluster: managementCluster,
109133
}, nil
110134
}
111135

@@ -152,16 +176,12 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin
152176
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)
153177
}
154178

155-
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines.
179+
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines.
156180
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) {
157181
if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
158182
return nil, nil
159183
}
160-
upToDateMachines, err := c.UpToDateMachines()
161-
if err != nil {
162-
return nil, errors.Wrapf(err, "failed to determine next failure domain for scale up")
163-
}
164-
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), upToDateMachines), nil
184+
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
165185
}
166186

167187
// InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
@@ -198,40 +218,21 @@ func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.Kubead
198218
}
199219

200220
// MachinesNeedingRollout return a list of machines that need to be rolled out.
201-
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string, error) {
202-
// Ignore machines to be deleted.
203-
machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
221+
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string][]string) {
222+
// Note: Machines already deleted are dropped because they will be replaced by new machines after deletion completes.
223+
return c.machinesNotUptoDate.Filter(collections.Not(collections.HasDeletionTimestamp)), c.machinesNotUptoDateLogMessages
224+
}
204225

205-
// Return machines if they are scheduled for rollout or if with an outdated configuration.
206-
machinesNeedingRollout := make(collections.Machines, len(machines))
207-
rolloutReasons := map[string]string{}
208-
for _, m := range machines {
209-
reason, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
210-
if err != nil {
211-
return nil, nil, err
212-
}
213-
if needsRollout {
214-
machinesNeedingRollout.Insert(m)
215-
rolloutReasons[m.Name] = reason
216-
}
217-
}
218-
return machinesNeedingRollout, rolloutReasons, nil
226+
// NotUpToDateMachines return a list of machines that are not up to date with the control
227+
// plane's configuration.
228+
func (c *ControlPlane) NotUpToDateMachines() (collections.Machines, map[string][]string) {
229+
return c.machinesNotUptoDate, c.machinesNotUptoDateConditionMessages
219230
}
220231

221232
// UpToDateMachines returns the machines that are up to date with the control
222-
// plane's configuration and therefore do not require rollout.
223-
func (c *ControlPlane) UpToDateMachines() (collections.Machines, error) {
224-
upToDateMachines := make(collections.Machines, len(c.Machines))
225-
for _, m := range c.Machines {
226-
_, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
227-
if err != nil {
228-
return nil, err
229-
}
230-
if !needsRollout {
231-
upToDateMachines.Insert(m)
232-
}
233-
}
234-
return upToDateMachines, nil
233+
// plane's configuration.
234+
func (c *ControlPlane) UpToDateMachines() collections.Machines {
235+
return c.Machines.Difference(c.machinesNotUptoDate)
235236
}
236237

237238
// getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource.
@@ -316,6 +317,7 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error {
316317
controlplanev1.MachineEtcdPodHealthyCondition,
317318
controlplanev1.MachineEtcdMemberHealthyCondition,
318319
}}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{
320+
clusterv1.MachineUpToDateV1Beta2Condition,
319321
controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition,
320322
controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition,
321323
controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition,

controlplane/kubeadm/internal/control_plane_test.go

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
. "github.com/onsi/gomega"
2323
corev1 "k8s.io/api/core/v1"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/utils/ptr"
2526

2627
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2728
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
@@ -30,8 +31,6 @@ import (
3031
)
3132

3233
func TestControlPlane(t *testing.T) {
33-
g := NewWithT(t)
34-
3534
t.Run("Failure domains", func(t *testing.T) {
3635
controlPlane := &ControlPlane{
3736
KCP: &controlplanev1.KubeadmControlPlane{},
@@ -53,14 +52,88 @@ func TestControlPlane(t *testing.T) {
5352
}
5453

5554
t.Run("With all machines in known failure domain, should return the FD with most number of machines", func(*testing.T) {
55+
g := NewWithT(t)
5656
g.Expect(*controlPlane.FailureDomainWithMostMachines(ctx, controlPlane.Machines)).To(Equal("two"))
5757
})
5858

5959
t.Run("With some machines in non defined failure domains", func(*testing.T) {
60+
g := NewWithT(t)
6061
controlPlane.Machines.Insert(machine("machine-5", withFailureDomain("unknown")))
6162
g.Expect(*controlPlane.FailureDomainWithMostMachines(ctx, controlPlane.Machines)).To(Equal("unknown"))
6263
})
6364
})
65+
66+
t.Run("MachinesUpToDate", func(t *testing.T) {
67+
g := NewWithT(t)
68+
cluster := &clusterv1.Cluster{
69+
Status: clusterv1.ClusterStatus{
70+
FailureDomains: clusterv1.FailureDomains{
71+
"one": failureDomain(true),
72+
"two": failureDomain(true),
73+
"three": failureDomain(true),
74+
},
75+
},
76+
}
77+
kcp := &controlplanev1.KubeadmControlPlane{
78+
Spec: controlplanev1.KubeadmControlPlaneSpec{
79+
Version: "v1.31.0",
80+
},
81+
}
82+
machines := collections.Machines{
83+
"machine-1": &clusterv1.Machine{
84+
ObjectMeta: metav1.ObjectMeta{Name: "m1"},
85+
Spec: clusterv1.MachineSpec{
86+
Version: ptr.To("v1.31.0"),
87+
FailureDomain: ptr.To("one"),
88+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m1"},
89+
}},
90+
"machine-2": &clusterv1.Machine{
91+
ObjectMeta: metav1.ObjectMeta{Name: "m2"},
92+
Spec: clusterv1.MachineSpec{
93+
Version: ptr.To("v1.29.0"), // not up-to-date
94+
FailureDomain: ptr.To("two"),
95+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m2"},
96+
}},
97+
"machine-3": &clusterv1.Machine{
98+
ObjectMeta: metav1.ObjectMeta{Name: "m3", DeletionTimestamp: ptr.To(metav1.Now())}, // deleted
99+
Spec: clusterv1.MachineSpec{
100+
Version: ptr.To("v1.29.3"), // not up-to-date
101+
FailureDomain: ptr.To("three"),
102+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m3"},
103+
}},
104+
"machine-4": &clusterv1.Machine{
105+
ObjectMeta: metav1.ObjectMeta{Name: "m4", DeletionTimestamp: ptr.To(metav1.Now())}, // deleted
106+
Spec: clusterv1.MachineSpec{
107+
Version: ptr.To("v1.31.0"),
108+
FailureDomain: ptr.To("two"),
109+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m1"},
110+
}},
111+
}
112+
controlPlane, err := NewControlPlane(ctx, nil, env.GetClient(), cluster, kcp, machines)
113+
g.Expect(err).NotTo(HaveOccurred())
114+
115+
g.Expect(controlPlane.Machines).To(HaveLen(4))
116+
117+
machinesNotUptoDate, machinesNotUptoDateConditionMessages := controlPlane.NotUpToDateMachines()
118+
g.Expect(machinesNotUptoDate.Names()).To(ConsistOf("m2", "m3"))
119+
g.Expect(machinesNotUptoDateConditionMessages).To(HaveLen(2))
120+
g.Expect(machinesNotUptoDateConditionMessages).To(HaveKeyWithValue("m2", []string{"Version v1.29.0, v1.31.0 required"}))
121+
g.Expect(machinesNotUptoDateConditionMessages).To(HaveKeyWithValue("m3", []string{"Version v1.29.3, v1.31.0 required"}))
122+
123+
machinesNeedingRollout, machinesNotUptoDateLogMessages := controlPlane.MachinesNeedingRollout()
124+
g.Expect(machinesNeedingRollout.Names()).To(ConsistOf("m2"))
125+
g.Expect(machinesNotUptoDateLogMessages).To(HaveLen(2))
126+
g.Expect(machinesNotUptoDateLogMessages).To(HaveKeyWithValue("m2", []string{"Machine version \"v1.29.0\" is not equal to KCP version \"v1.31.0\""}))
127+
g.Expect(machinesNotUptoDateLogMessages).To(HaveKeyWithValue("m3", []string{"Machine version \"v1.29.3\" is not equal to KCP version \"v1.31.0\""}))
128+
129+
upToDateMachines := controlPlane.UpToDateMachines()
130+
g.Expect(upToDateMachines).To(HaveLen(2))
131+
g.Expect(upToDateMachines.Names()).To(ConsistOf("m1", "m4"))
132+
133+
fd, err := controlPlane.NextFailureDomainForScaleUp(ctx)
134+
g.Expect(err).NotTo(HaveOccurred())
135+
g.Expect(fd).To(Equal(ptr.To("two"))) // deleted up-to-date machines should not be counted when picking the next failure domain for scale up
136+
})
64137
}
65138

66139
func TestHasMachinesToBeRemediated(t *testing.T) {

0 commit comments

Comments
 (0)