Skip to content

[WIP] 🌱 Refine v1beta2 kcp available condition #11442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions controlplane/kubeadm/internal/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,16 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
// already surfacing status from etcd member and all control plane pods hosted on every machine.
// Note: we intentionally use the number of etcd members to determine the etcd quorum because
// etcd members might not match with machines, e.g. while provisioning a new machine.
etcdQuorum := (len(etcdMembers) / 2.0) + 1
activeEtcdMembers := 0
learnerEtcdMembers := 0
for _, etcdMember := range etcdMembers {
if etcdMember.IsLearner {
learnerEtcdMembers++
continue
}
activeEtcdMembers++
}
etcdQuorum := (activeEtcdMembers / 2.0) + 1
k8sControlPlaneHealthy := 0
k8sControlPlaneNotHealthy := 0
etcdMembersHealthy := 0
Expand Down Expand Up @@ -584,11 +593,15 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
messages := []string{}

if etcdIsManaged && etcdMembersNotHealthy > 0 {
switch len(etcdMembers) - etcdMembersNotHealthy {
learnerMsg := ""
if learnerEtcdMembers > 0 {
learnerMsg = fmt.Sprintf(" %d member in learner mode,", learnerEtcdMembers)
}
switch len(etcdMembers) - etcdMembersNotHealthy - learnerEtcdMembers {
case 1:
messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy, at least %d required for etcd quorum", len(etcdMembers), etcdQuorum))
messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy,%s at least %d healthy required for etcd quorum", len(etcdMembers), learnerMsg, etcdQuorum))
default:
messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy, at least %d required for etcd quorum", len(etcdMembers)-etcdMembersNotHealthy, len(etcdMembers), etcdQuorum))
messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy,%s at least %d healthy required for etcd quorum", len(etcdMembers)-etcdMembersNotHealthy-learnerEtcdMembers, len(etcdMembers), learnerMsg, etcdQuorum))
}
}

Expand Down Expand Up @@ -620,13 +633,17 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
}

if etcdIsManaged && etcdMembersHealthy < etcdQuorum {
learnerMsg := ""
if learnerEtcdMembers > 0 {
learnerMsg = fmt.Sprintf(" %d member in learner mode,", learnerEtcdMembers)
}
switch etcdMembersHealthy {
case 0:
messages = append(messages, fmt.Sprintf("* There are no healthy etcd member, at least %d required for etcd quorum", etcdQuorum))
messages = append(messages, fmt.Sprintf("* There are no healthy etcd member,%s at least %d healthy required for etcd quorum", learnerMsg, etcdQuorum))
case 1:
messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy, at least %d required for etcd quorum", len(etcdMembers), etcdQuorum))
messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy,%s at least %d healthy required for etcd quorum", len(etcdMembers), learnerMsg, etcdQuorum))
default:
messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy, at least %d required for etcd quorum", etcdMembersHealthy, len(etcdMembers), etcdQuorum))
messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy,%s at least %d healthy required for etcd quorum", etcdMembersHealthy, len(etcdMembers), learnerMsg, etcdQuorum))
}
}

Expand Down
32 changes: 31 additions & 1 deletion controlplane/kubeadm/internal/controllers/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ func Test_setAvailableCondition(t *testing.T) {

etcdMemberHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{Time: reconcileTime}}
etcdMemberNotHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, LastTransitionTime: metav1.Time{Time: reconcileTime}}
etcdMemberNotHealthy11s := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, LastTransitionTime: metav1.Time{Time: reconcileTime.Add(-11 * time.Second)}}

tests := []struct {
name string
Expand Down Expand Up @@ -1046,6 +1047,35 @@ func Test_setAvailableCondition(t *testing.T) {
Message: "* Control plane certificates are not available",
},
},
{
name: "Learning etcd members should not be considered for quorum",
controlPlane: &internal.ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{
Status: controlplanev1.KubeadmControlPlaneStatus{
Initialized: true,
V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{
Conditions: []metav1.Condition{certificatesReady},
},
},
},
Machines: collections.FromMachines(
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}},
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}},
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}}},
),
EtcdMembers: []*etcd.Member{{}, {}, {IsLearner: true}},
EtcdMembersAgreeOnMemberList: true,
EtcdMembersAgreeOnClusterID: true,
EtcdMembersAndMachinesAreMatching: true,
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason,
Message: "* 1 of 3 etcd members is healthy, 1 member in learner mode, at least 2 healthy required for etcd quorum\n" +
"* 2 of 3 Machines have healthy control plane components, at least 1 required",
},
},
{
name: "Not enough healthy etcd members",
controlPlane: &internal.ControlPlane{
Expand All @@ -1071,7 +1101,7 @@ func Test_setAvailableCondition(t *testing.T) {
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
Message: "* 1 of 3 etcd members is healthy, at least 2 required for etcd quorum",
Message: "* 1 of 3 etcd members is healthy, at least 2 healthy required for etcd quorum",
},
},
{
Expand Down