Skip to content

Commit 301de7a

Browse files
Drop unnecessary etcd call from KCP
1 parent 7a7ba01 commit 301de7a

File tree

5 files changed

+79
-45
lines changed

5 files changed

+79
-45
lines changed

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,7 +1095,25 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
10951095
return nil
10961096
}
10971097

1098+
// No op if there are potential issues affecting the list of etcdMembers
1099+
if !controlPlane.EtcdMembersAgreeOnMemberList || !controlPlane.EtcdMembersAgreeOnClusterID {
1100+
return nil
1101+
}
1102+
1103+
// No op if for any reason the etcdMember list is not populated at this stage.
1104+
if controlPlane.EtcdMembers == nil {
1105+
return nil
1106+
}
1107+
1108+
// Potential inconsistencies between the list of members and the list of machines/nodes are
1109+
// surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early.
1110+
if conditions.IsTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) {
1111+
return nil
1112+
}
1113+
10981114
// Collect all the node names.
1115+
// Note: EtcdClusterHealthyCondition true also implies that there are no machines still provisioning,
1116+
// so we can ignore this case.
10991117
nodeNames := []string{}
11001118
for _, machine := range controlPlane.Machines {
11011119
if machine.Status.NodeRef == nil {
@@ -1105,19 +1123,13 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
11051123
nodeNames = append(nodeNames, machine.Status.NodeRef.Name)
11061124
}
11071125

1108-
// Potential inconsistencies between the list of members and the list of machines/nodes are
1109-
// surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early.
1110-
if conditions.IsTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) {
1111-
return nil
1112-
}
1113-
11141126
workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
11151127
if err != nil {
11161128
// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
11171129
return errors.Wrap(err, "cannot get remote client to workload cluster")
11181130
}
11191131

1120-
removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames)
1132+
removedMembers, err := workloadCluster.ReconcileEtcdMembersAndControlPlaneNodes(ctx, controlPlane.EtcdMembers, nodeNames)
11211133
if err != nil {
11221134
return errors.Wrap(err, "failed attempt to reconcile etcd members")
11231135
}

controlplane/kubeadm/internal/controllers/fakes_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2828
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
2929
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal"
30+
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
3031
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
3132
"sigs.k8s.io/cluster-api/util/collections"
3233
)
@@ -85,7 +86,7 @@ func (f *fakeWorkloadCluster) ForwardEtcdLeadership(_ context.Context, _ *cluste
8586
return nil
8687
}
8788

88-
func (f *fakeWorkloadCluster) ReconcileEtcdMembers(_ context.Context, _ []string) ([]string, error) {
89+
func (f *fakeWorkloadCluster) ReconcileEtcdMembersAndControlPlaneNodes(_ context.Context, _ []*etcd.Member, _ []string) ([]string, error) {
8990
return nil, nil
9091
}
9192

controlplane/kubeadm/internal/workload_cluster.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
4747
kubeadmtypes "sigs.k8s.io/cluster-api/bootstrap/kubeadm/types"
4848
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
49+
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
4950
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/proxy"
5051
"sigs.k8s.io/cluster-api/internal/util/kubeadm"
5152
"sigs.k8s.io/cluster-api/util"
@@ -117,7 +118,7 @@ type WorkloadCluster interface {
117118
UpdateClusterConfiguration(ctx context.Context, version semver.Version, mutators ...func(*bootstrapv1.ClusterConfiguration)) error
118119

119120
// State recovery tasks.
120-
ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error)
121+
ReconcileEtcdMembersAndControlPlaneNodes(ctx context.Context, members []*etcd.Member, nodeNames []string) ([]string, error)
121122
}
122123

123124
// Workload defines operations on workload clusters.

controlplane/kubeadm/internal/workload_cluster_etcd.go

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,37 +33,14 @@ type etcdClientFor interface {
3333
forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error)
3434
}
3535

36-
// ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes.
36+
// ReconcileEtcdMembersAndControlPlaneNodes iterates over all etcd members and finds members that do not have corresponding nodes.
3737
// If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them.
38-
func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) {
39-
allRemovedMembers := []string{}
40-
allErrs := []error{}
41-
for _, nodeName := range nodeNames {
42-
removedMembers, errs := w.reconcileEtcdMember(ctx, nodeNames, nodeName)
43-
allRemovedMembers = append(allRemovedMembers, removedMembers...)
44-
allErrs = append(allErrs, errs...)
45-
}
46-
47-
return allRemovedMembers, kerrors.NewAggregate(allErrs)
48-
}
49-
50-
func (w *Workload) reconcileEtcdMember(ctx context.Context, nodeNames []string, nodeName string) ([]string, []error) {
51-
// Create the etcd Client for the etcd Pod scheduled on the Node
52-
etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, []string{nodeName})
53-
if err != nil {
54-
return nil, nil
55-
}
56-
defer etcdClient.Close()
57-
58-
members, err := etcdClient.Members(ctx)
59-
if err != nil {
60-
return nil, nil
61-
}
62-
38+
func (w *Workload) ReconcileEtcdMembersAndControlPlaneNodes(ctx context.Context, members []*etcd.Member, nodeNames []string) ([]string, error) {
6339
// Check if any member's node is missing from workload cluster
6440
// If any, delete it with best effort
6541
removedMembers := []string{}
6642
errs := []error{}
43+
6744
loopmembers:
6845
for _, member := range members {
6946
// If this member is just added, it has a empty name until the etcd pod starts. Ignore it.
@@ -84,7 +61,8 @@ loopmembers:
8461
errs = append(errs, err)
8562
}
8663
}
87-
return removedMembers, errs
64+
65+
return removedMembers, kerrors.NewAggregate(errs)
8866
}
8967

9068
// UpdateEtcdLocalInKubeadmConfigMap sets etcd local configuration in the kubeadm config map.

controlplane/kubeadm/internal/workload_cluster_etcd_test.go

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ func TestForwardEtcdLeadership(t *testing.T) {
552552
})
553553
}
554554

555-
func TestReconcileEtcdMembers(t *testing.T) {
555+
func TestReconcileEtcdMembersAndControlPlaneNodes(t *testing.T) {
556556
kubeadmConfig := &corev1.ConfigMap{
557557
ObjectMeta: metav1.ObjectMeta{
558558
Name: kubeadmConfigKey,
@@ -590,13 +590,15 @@ func TestReconcileEtcdMembers(t *testing.T) {
590590
}
591591
node2 := node1.DeepCopy()
592592
node2.Name = "ip-10-0-0-2.ec2.internal"
593+
node3 := node1.DeepCopy()
594+
node3.Name = "ip-10-0-0-3.ec2.internal"
593595

594596
fakeEtcdClient := &fake2.FakeEtcdClient{
595597
MemberListResponse: &clientv3.MemberListResponse{
596598
Members: []*pb.Member{
597-
{Name: "ip-10-0-0-1.ec2.internal", ID: uint64(1)},
598-
{Name: "ip-10-0-0-2.ec2.internal", ID: uint64(2)},
599-
{Name: "ip-10-0-0-3.ec2.internal", ID: uint64(3)},
599+
{Name: node1.Name, ID: uint64(1)},
600+
{Name: node2.Name, ID: uint64(2)},
601+
{Name: node3.Name, ID: uint64(3)},
600602
},
601603
},
602604
AlarmResponse: &clientv3.AlarmResponse{
@@ -607,16 +609,51 @@ func TestReconcileEtcdMembers(t *testing.T) {
607609
tests := []struct {
608610
name string
609611
objs []client.Object
612+
members []*etcd.Member
610613
nodes []string
611614
etcdClientGenerator etcdClientFor
612615
expectErr bool
613616
assert func(*WithT, client.Client)
614617
}{
618+
{
619+
// no op if nodes and members match
620+
name: "no op if nodes and members match",
621+
objs: []client.Object{node1.DeepCopy(), node2.DeepCopy(), node3.DeepCopy(), kubeadmConfigWithoutClusterStatus.DeepCopy()},
622+
members: []*etcd.Member{
623+
{Name: node1.Name, ID: uint64(1)},
624+
{Name: node2.Name, ID: uint64(2)},
625+
{Name: node3.Name, ID: uint64(3)},
626+
},
627+
nodes: []string{node1.Name, node2.Name, node3.Name},
628+
etcdClientGenerator: &fakeEtcdClientGenerator{
629+
forNodesClient: &etcd.Client{
630+
EtcdClient: fakeEtcdClient,
631+
},
632+
},
633+
expectErr: false,
634+
assert: func(g *WithT, c client.Client) {
635+
g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(0))) // no member removed
636+
637+
var actualConfig corev1.ConfigMap
638+
g.Expect(c.Get(
639+
ctx,
640+
client.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem},
641+
&actualConfig,
642+
)).To(Succeed())
643+
// Kubernetes version >= 1.22.0 does not have ClusterStatus
644+
g.Expect(actualConfig.Data).ToNot(HaveKey(clusterStatusKey))
645+
},
646+
},
615647
{
616648
// the node to be removed is ip-10-0-0-3.ec2.internal since the
617649
// other two have nodes
618-
name: "successfully removes the etcd member without a node",
619-
objs: []client.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfigWithoutClusterStatus.DeepCopy()},
650+
name: "successfully removes the etcd member without a node",
651+
objs: []client.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfigWithoutClusterStatus.DeepCopy()},
652+
members: []*etcd.Member{
653+
{Name: node1.Name, ID: uint64(1)},
654+
{Name: node2.Name, ID: uint64(2)},
655+
{Name: node3.Name, ID: uint64(3)},
656+
},
620657
nodes: []string{node1.Name, node2.Name},
621658
etcdClientGenerator: &fakeEtcdClientGenerator{
622659
forNodesClient: &etcd.Client{
@@ -638,8 +675,13 @@ func TestReconcileEtcdMembers(t *testing.T) {
638675
},
639676
},
640677
{
641-
name: "return error if there aren't enough control plane nodes",
642-
objs: []client.Object{node1.DeepCopy(), kubeadmConfig.DeepCopy()},
678+
// only one node left, no removal should happen
679+
name: "return error if there aren't enough control plane nodes",
680+
objs: []client.Object{node1.DeepCopy(), kubeadmConfig.DeepCopy()},
681+
members: []*etcd.Member{
682+
{Name: "ip-10-0-0-1.ec2.internal", ID: uint64(1)},
683+
{Name: "ip-10-0-0-2.ec2.internal", ID: uint64(2)},
684+
},
643685
nodes: []string{node1.Name},
644686
etcdClientGenerator: &fakeEtcdClientGenerator{
645687
forNodesClient: &etcd.Client{
@@ -666,7 +708,7 @@ func TestReconcileEtcdMembers(t *testing.T) {
666708
etcdClientGenerator: tt.etcdClientGenerator,
667709
}
668710
ctx := context.TODO()
669-
_, err := w.ReconcileEtcdMembers(ctx, tt.nodes)
711+
_, err := w.ReconcileEtcdMembersAndControlPlaneNodes(ctx, tt.members, tt.nodes)
670712
if tt.expectErr {
671713
g.Expect(err).To(HaveOccurred())
672714
return

0 commit comments

Comments
 (0)