diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index 118d355c0606..30dfa96982ba 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -526,7 +526,16 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl // already surfacing status from etcd member and all control plane pods hosted on every machine. // Note: we intentionally use the number of etcd members to determine the etcd quorum because // etcd members might not match with machines, e.g. while provisioning a new machine. - etcdQuorum := (len(etcdMembers) / 2.0) + 1 + activeEtcdMembers := 0 + learnerEtcdMembers := 0 + for _, etcdMember := range etcdMembers { + if etcdMember.IsLearner { + learnerEtcdMembers++ + continue + } + activeEtcdMembers++ + } + etcdQuorum := (activeEtcdMembers / 2.0) + 1 k8sControlPlaneHealthy := 0 k8sControlPlaneNotHealthy := 0 etcdMembersHealthy := 0 @@ -584,11 +593,15 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl messages := []string{} if etcdIsManaged && etcdMembersNotHealthy > 0 { - switch len(etcdMembers) - etcdMembersNotHealthy { + learnerMsg := "" + if learnerEtcdMembers > 0 { + learnerMsg = fmt.Sprintf(" %d member in learner mode,", learnerEtcdMembers) + } + switch len(etcdMembers) - etcdMembersNotHealthy - learnerEtcdMembers { case 1: - messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy, at least %d required for etcd quorum", len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy,%s at least %d healthy required for etcd quorum", len(etcdMembers), learnerMsg, etcdQuorum)) default: - messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy, at least %d required for etcd quorum", len(etcdMembers)-etcdMembersNotHealthy, len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy,%s at least %d healthy required for etcd quorum", len(etcdMembers)-etcdMembersNotHealthy-learnerEtcdMembers, len(etcdMembers), learnerMsg, etcdQuorum)) } } @@ -620,13 +633,17 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl } if etcdIsManaged && etcdMembersHealthy < etcdQuorum { + learnerMsg := "" + if learnerEtcdMembers > 0 { + learnerMsg = fmt.Sprintf(" %d member in learner mode,", learnerEtcdMembers) + } switch etcdMembersHealthy { case 0: - messages = append(messages, fmt.Sprintf("* There are no healthy etcd member, at least %d required for etcd quorum", etcdQuorum)) + messages = append(messages, fmt.Sprintf("* There are no healthy etcd member,%s at least %d healthy required for etcd quorum", learnerMsg, etcdQuorum)) case 1: - messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy, at least %d required for etcd quorum", len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* 1 of %d etcd members is healthy,%s at least %d healthy required for etcd quorum", len(etcdMembers), learnerMsg, etcdQuorum)) default: - messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy, at least %d required for etcd quorum", etcdMembersHealthy, len(etcdMembers), etcdQuorum)) + messages = append(messages, fmt.Sprintf("* %d of %d etcd members are healthy,%s at least %d healthy required for etcd quorum", etcdMembersHealthy, len(etcdMembers), learnerMsg, etcdQuorum)) } } diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 1f3653ea78ed..bb30d75e984c 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -726,6 +726,7 @@ func Test_setAvailableCondition(t *testing.T) { etcdMemberHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{Time: reconcileTime}} etcdMemberNotHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, LastTransitionTime: metav1.Time{Time: reconcileTime}} + etcdMemberNotHealthy11s := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, LastTransitionTime: metav1.Time{Time: reconcileTime.Add(-11 * time.Second)}} tests := []struct { name string @@ -1046,6 +1047,35 @@ func Test_setAvailableCondition(t *testing.T) { Message: "* Control plane certificates are not available", }, }, + { + name: "Learning etcd members should not be considered for quorum", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy11s}}}}, + ), + EtcdMembers: []*etcd.Member{{}, {}, {IsLearner: true}}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + Message: "* 1 of 3 etcd members is healthy, 1 member in learner mode, at least 2 healthy required for etcd quorum\n" + + "* 2 of 3 Machines have healthy control plane components, at least 1 required", + }, + }, { name: "Not enough healthy etcd members", controlPlane: &internal.ControlPlane{ @@ -1071,7 +1101,7 @@ func Test_setAvailableCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, - Message: "* 1 of 3 etcd members is healthy, at least 2 required for etcd quorum", + Message: "* 1 of 3 etcd members is healthy, at least 2 healthy required for etcd quorum", }, }, {