Skip to content

Commit 0f319c6

Browse files
authored
Merge pull request #11389 from fabriziopandini/add-machine-upToDate-condition-to-kcp
✨ Add machine UpToDate condition to KCP
2 parents fe0aa63 + 7c8ba1d commit 0f319c6

File tree

11 files changed

+624
-117
lines changed

11 files changed

+624
-117
lines changed

api/v1beta1/machine_types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,15 @@ const (
123123
const (
124124
// MachineUpToDateV1Beta2Condition is true if the Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
125125
// The Machine's owner (e.g. MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec.
126+
// NOTE: The Machine's owner might use this condition to surface also other use cases when Machine is considered not up to date, e.g. when MachineDeployment spec.rolloutAfter
127+
// is expired and the Machine needs to be rolled out.
126128
MachineUpToDateV1Beta2Condition = "UpToDate"
129+
130+
// MachineUpToDateV1Beta2Reason surface when a Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
131+
MachineUpToDateV1Beta2Reason = "UpToDate"
132+
133+
// MachineNotUpToDateV1Beta2Reason surface when a Machine spec does not match the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment.
134+
MachineNotUpToDateV1Beta2Reason = "NotUpToDate"
127135
)
128136

129137
// Machine's BootstrapConfigReady condition and corresponding reasons that will be used in v1Beta2 API version.

controlplane/kubeadm/internal/control_plane.go

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ type ControlPlane struct {
4545
Machines collections.Machines
4646
machinesPatchHelpers map[string]*patch.Helper
4747

48+
machinesNotUptoDate collections.Machines
49+
machinesNotUptoDateLogMessages map[string][]string
50+
machinesNotUptoDateConditionMessages map[string][]string
51+
4852
// reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations
4953
reconciliationTime metav1.Time
5054

@@ -108,15 +112,35 @@ func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, c
108112
patchHelpers[machine.Name] = patchHelper
109113
}
110114

115+
// Select machines that should be rolled out because of an outdated configuration or because rolloutAfter/Before expired.
116+
reconciliationTime := metav1.Now()
117+
machinesNotUptoDate := make(collections.Machines, len(ownedMachines))
118+
machinesNotUptoDateLogMessages := map[string][]string{}
119+
machinesNotUptoDateConditionMessages := map[string][]string{}
120+
for _, m := range ownedMachines {
121+
upToDate, logMessages, conditionMessages, err := UpToDate(m, kcp, &reconciliationTime, infraObjects, kubeadmConfigs)
122+
if err != nil {
123+
return nil, err
124+
}
125+
if !upToDate {
126+
machinesNotUptoDate.Insert(m)
127+
machinesNotUptoDateLogMessages[m.Name] = logMessages
128+
machinesNotUptoDateConditionMessages[m.Name] = conditionMessages
129+
}
130+
}
131+
111132
return &ControlPlane{
112-
KCP: kcp,
113-
Cluster: cluster,
114-
Machines: ownedMachines,
115-
machinesPatchHelpers: patchHelpers,
116-
KubeadmConfigs: kubeadmConfigs,
117-
InfraResources: infraObjects,
118-
reconciliationTime: metav1.Now(),
119-
managementCluster: managementCluster,
133+
KCP: kcp,
134+
Cluster: cluster,
135+
Machines: ownedMachines,
136+
machinesPatchHelpers: patchHelpers,
137+
machinesNotUptoDate: machinesNotUptoDate,
138+
machinesNotUptoDateLogMessages: machinesNotUptoDateLogMessages,
139+
machinesNotUptoDateConditionMessages: machinesNotUptoDateConditionMessages,
140+
KubeadmConfigs: kubeadmConfigs,
141+
InfraResources: infraObjects,
142+
reconciliationTime: reconciliationTime,
143+
managementCluster: managementCluster,
120144
}, nil
121145
}
122146

@@ -163,16 +187,12 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin
163187
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)
164188
}
165189

166-
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines.
190+
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines.
167191
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) {
168192
if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
169193
return nil, nil
170194
}
171-
upToDateMachines, err := c.UpToDateMachines()
172-
if err != nil {
173-
return nil, errors.Wrapf(err, "failed to determine next failure domain for scale up")
174-
}
175-
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), upToDateMachines), nil
195+
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
176196
}
177197

178198
// InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
@@ -209,40 +229,21 @@ func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.Kubead
209229
}
210230

211231
// MachinesNeedingRollout return a list of machines that need to be rolled out.
212-
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string, error) {
213-
// Ignore machines to be deleted.
214-
machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
232+
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string][]string) {
233+
// Note: Machines already deleted are dropped because they will be replaced by new machines after deletion completes.
234+
return c.machinesNotUptoDate.Filter(collections.Not(collections.HasDeletionTimestamp)), c.machinesNotUptoDateLogMessages
235+
}
215236

216-
// Return machines if they are scheduled for rollout or if with an outdated configuration.
217-
machinesNeedingRollout := make(collections.Machines, len(machines))
218-
rolloutReasons := map[string]string{}
219-
for _, m := range machines {
220-
reason, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
221-
if err != nil {
222-
return nil, nil, err
223-
}
224-
if needsRollout {
225-
machinesNeedingRollout.Insert(m)
226-
rolloutReasons[m.Name] = reason
227-
}
228-
}
229-
return machinesNeedingRollout, rolloutReasons, nil
237+
// NotUpToDateMachines return a list of machines that are not up to date with the control
238+
// plane's configuration.
239+
func (c *ControlPlane) NotUpToDateMachines() (collections.Machines, map[string][]string) {
240+
return c.machinesNotUptoDate, c.machinesNotUptoDateConditionMessages
230241
}
231242

232243
// UpToDateMachines returns the machines that are up to date with the control
233-
// plane's configuration and therefore do not require rollout.
234-
func (c *ControlPlane) UpToDateMachines() (collections.Machines, error) {
235-
upToDateMachines := make(collections.Machines, len(c.Machines))
236-
for _, m := range c.Machines {
237-
_, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
238-
if err != nil {
239-
return nil, err
240-
}
241-
if !needsRollout {
242-
upToDateMachines.Insert(m)
243-
}
244-
}
245-
return upToDateMachines, nil
244+
// plane's configuration.
245+
func (c *ControlPlane) UpToDateMachines() collections.Machines {
246+
return c.Machines.Difference(c.machinesNotUptoDate)
246247
}
247248

248249
// getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource.
@@ -327,6 +328,7 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error {
327328
controlplanev1.MachineEtcdPodHealthyCondition,
328329
controlplanev1.MachineEtcdMemberHealthyCondition,
329330
}}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{
331+
clusterv1.MachineUpToDateV1Beta2Condition,
330332
controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition,
331333
controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition,
332334
controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition,

controlplane/kubeadm/internal/control_plane_test.go

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
. "github.com/onsi/gomega"
2323
corev1 "k8s.io/api/core/v1"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/utils/ptr"
2526

2627
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2728
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
@@ -30,8 +31,6 @@ import (
3031
)
3132

3233
func TestControlPlane(t *testing.T) {
33-
g := NewWithT(t)
34-
3534
t.Run("Failure domains", func(t *testing.T) {
3635
controlPlane := &ControlPlane{
3736
KCP: &controlplanev1.KubeadmControlPlane{},
@@ -53,14 +52,88 @@ func TestControlPlane(t *testing.T) {
5352
}
5453

5554
t.Run("With all machines in known failure domain, should return the FD with most number of machines", func(*testing.T) {
55+
g := NewWithT(t)
5656
g.Expect(*controlPlane.FailureDomainWithMostMachines(ctx, controlPlane.Machines)).To(Equal("two"))
5757
})
5858

5959
t.Run("With some machines in non defined failure domains", func(*testing.T) {
60+
g := NewWithT(t)
6061
controlPlane.Machines.Insert(machine("machine-5", withFailureDomain("unknown")))
6162
g.Expect(*controlPlane.FailureDomainWithMostMachines(ctx, controlPlane.Machines)).To(Equal("unknown"))
6263
})
6364
})
65+
66+
t.Run("MachinesUpToDate", func(t *testing.T) {
67+
g := NewWithT(t)
68+
cluster := &clusterv1.Cluster{
69+
Status: clusterv1.ClusterStatus{
70+
FailureDomains: clusterv1.FailureDomains{
71+
"one": failureDomain(true),
72+
"two": failureDomain(true),
73+
"three": failureDomain(true),
74+
},
75+
},
76+
}
77+
kcp := &controlplanev1.KubeadmControlPlane{
78+
Spec: controlplanev1.KubeadmControlPlaneSpec{
79+
Version: "v1.31.0",
80+
},
81+
}
82+
machines := collections.Machines{
83+
"machine-1": &clusterv1.Machine{
84+
ObjectMeta: metav1.ObjectMeta{Name: "m1"},
85+
Spec: clusterv1.MachineSpec{
86+
Version: ptr.To("v1.31.0"),
87+
FailureDomain: ptr.To("one"),
88+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m1"},
89+
}},
90+
"machine-2": &clusterv1.Machine{
91+
ObjectMeta: metav1.ObjectMeta{Name: "m2"},
92+
Spec: clusterv1.MachineSpec{
93+
Version: ptr.To("v1.29.0"), // not up-to-date
94+
FailureDomain: ptr.To("two"),
95+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m2"},
96+
}},
97+
"machine-3": &clusterv1.Machine{
98+
ObjectMeta: metav1.ObjectMeta{Name: "m3", DeletionTimestamp: ptr.To(metav1.Now())}, // deleted
99+
Spec: clusterv1.MachineSpec{
100+
Version: ptr.To("v1.29.3"), // not up-to-date
101+
FailureDomain: ptr.To("three"),
102+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m3"},
103+
}},
104+
"machine-4": &clusterv1.Machine{
105+
ObjectMeta: metav1.ObjectMeta{Name: "m4", DeletionTimestamp: ptr.To(metav1.Now())}, // deleted
106+
Spec: clusterv1.MachineSpec{
107+
Version: ptr.To("v1.31.0"),
108+
FailureDomain: ptr.To("two"),
109+
InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m1"},
110+
}},
111+
}
112+
controlPlane, err := NewControlPlane(ctx, nil, env.GetClient(), cluster, kcp, machines)
113+
g.Expect(err).NotTo(HaveOccurred())
114+
115+
g.Expect(controlPlane.Machines).To(HaveLen(4))
116+
117+
machinesNotUptoDate, machinesNotUptoDateConditionMessages := controlPlane.NotUpToDateMachines()
118+
g.Expect(machinesNotUptoDate.Names()).To(ConsistOf("m2", "m3"))
119+
g.Expect(machinesNotUptoDateConditionMessages).To(HaveLen(2))
120+
g.Expect(machinesNotUptoDateConditionMessages).To(HaveKeyWithValue("m2", []string{"Version v1.29.0, v1.31.0 required"}))
121+
g.Expect(machinesNotUptoDateConditionMessages).To(HaveKeyWithValue("m3", []string{"Version v1.29.3, v1.31.0 required"}))
122+
123+
machinesNeedingRollout, machinesNotUptoDateLogMessages := controlPlane.MachinesNeedingRollout()
124+
g.Expect(machinesNeedingRollout.Names()).To(ConsistOf("m2"))
125+
g.Expect(machinesNotUptoDateLogMessages).To(HaveLen(2))
126+
g.Expect(machinesNotUptoDateLogMessages).To(HaveKeyWithValue("m2", []string{"Machine version \"v1.29.0\" is not equal to KCP version \"v1.31.0\""}))
127+
g.Expect(machinesNotUptoDateLogMessages).To(HaveKeyWithValue("m3", []string{"Machine version \"v1.29.3\" is not equal to KCP version \"v1.31.0\""}))
128+
129+
upToDateMachines := controlPlane.UpToDateMachines()
130+
g.Expect(upToDateMachines).To(HaveLen(2))
131+
g.Expect(upToDateMachines.Names()).To(ConsistOf("m1", "m4"))
132+
133+
fd, err := controlPlane.NextFailureDomainForScaleUp(ctx)
134+
g.Expect(err).NotTo(HaveOccurred())
135+
g.Expect(fd).To(Equal(ptr.To("two"))) // deleted up-to-date machines should not be counted when picking the next failure domain for scale up
136+
})
64137
}
65138

66139
func TestHasMachinesToBeRemediated(t *testing.T) {

0 commit comments

Comments
 (0)