Skip to content

Commit 78c4806

Browse files
🌱 Call etcd member list and alarms once in KCP's updateManagedEtcdConditions (#11815)
* Call etcd member list and alarms once in KCP's updateManagedEtcdConditions * Address comments and fix tests * More comments
1 parent 5f2ce5b commit 78c4806

10 files changed

+273
-555
lines changed

‎controlplane/kubeadm/internal/control_plane.go

-4
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,8 @@ type ControlPlane struct {
6969

7070
// EtcdMembers is the list of members read while computing reconcileControlPlaneConditions; also additional info below
7171
// comes from the same func.
72-
// NOTE: Those info are computed based on the info KCP was able to collect during inspection (e.g. if on a 3 CP
73-
// control plane one etcd member is down, those info are based on the answer collected from two members only).
7472
// NOTE: Those info are specifically designed for computing KCP's Available condition.
7573
EtcdMembers []*etcd.Member
76-
EtcdMembersAgreeOnMemberList bool
77-
EtcdMembersAgreeOnClusterID bool
7874
EtcdMembersAndMachinesAreMatching bool
7975

8076
managementCluster ManagementCluster

‎controlplane/kubeadm/internal/controllers/controller.go

-5
Original file line numberDiff line numberDiff line change
@@ -1090,11 +1090,6 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
10901090
return nil
10911091
}
10921092

1093-
// No op if there are potential issues affecting the list of etcdMembers
1094-
if !controlPlane.EtcdMembersAgreeOnMemberList || !controlPlane.EtcdMembersAgreeOnClusterID {
1095-
return nil
1096-
}
1097-
10981093
// No op if for any reason the etcdMember list is not populated at this stage.
10991094
if controlPlane.EtcdMembers == nil {
11001095
return nil

‎controlplane/kubeadm/internal/controllers/status.go

+2-22
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ func (r *KubeadmControlPlaneReconciler) updateV1Beta2Status(ctx context.Context,
168168
setMachinesUpToDateCondition(ctx, controlPlane.KCP, controlPlane.Machines)
169169
setRemediatingCondition(ctx, controlPlane.KCP, controlPlane.MachinesToBeRemediatedByKCP(), controlPlane.UnhealthyMachines())
170170
setDeletingCondition(ctx, controlPlane.KCP, controlPlane.DeletingReason, controlPlane.DeletingMessage)
171-
setAvailableCondition(ctx, controlPlane.KCP, controlPlane.IsEtcdManaged(), controlPlane.EtcdMembers, controlPlane.EtcdMembersAgreeOnMemberList, controlPlane.EtcdMembersAgreeOnClusterID, controlPlane.EtcdMembersAndMachinesAreMatching, controlPlane.Machines)
171+
setAvailableCondition(ctx, controlPlane.KCP, controlPlane.IsEtcdManaged(), controlPlane.EtcdMembers, controlPlane.EtcdMembersAndMachinesAreMatching, controlPlane.Machines)
172172
}
173173

174174
func setReplicas(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines) {
@@ -513,7 +513,7 @@ func setDeletingCondition(_ context.Context, kcp *controlplanev1.KubeadmControlP
513513
})
514514
}
515515

516-
func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, etcdIsManaged bool, etcdMembers []*etcd.Member, etcdMembersAgreeOnMemberList, etcdMembersAgreeOnClusterID, etcdMembersAndMachinesAreMatching bool, machines collections.Machines) {
516+
func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, etcdIsManaged bool, etcdMembers []*etcd.Member, etcdMembersAndMachinesAreMatching bool, machines collections.Machines) {
517517
if !kcp.Status.Initialized {
518518
v1beta2conditions.Set(kcp, metav1.Condition{
519519
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
@@ -549,26 +549,6 @@ func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
549549
return
550550
}
551551

552-
if !etcdMembersAgreeOnMemberList {
553-
v1beta2conditions.Set(kcp, metav1.Condition{
554-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
555-
Status: metav1.ConditionFalse,
556-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
557-
Message: "At least one etcd member reports a list of etcd members different than the list reported by other members",
558-
})
559-
return
560-
}
561-
562-
if !etcdMembersAgreeOnClusterID {
563-
v1beta2conditions.Set(kcp, metav1.Condition{
564-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
565-
Status: metav1.ConditionFalse,
566-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
567-
Message: "At least one etcd member reports a cluster ID different than the cluster ID reported by other members",
568-
})
569-
return
570-
}
571-
572552
if !etcdMembersAndMachinesAreMatching {
573553
v1beta2conditions.Set(kcp, metav1.Condition{
574554
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,

‎controlplane/kubeadm/internal/controllers/status_test.go

+2-88
Original file line numberDiff line numberDiff line change
@@ -871,8 +871,7 @@ func Test_setAvailableCondition(t *testing.T) {
871871
},
872872
},
873873
},
874-
EtcdMembers: []*etcd.Member{},
875-
EtcdMembersAgreeOnMemberList: false,
874+
EtcdMembers: []*etcd.Member{},
876875
},
877876
expectCondition: metav1.Condition{
878877
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
@@ -908,8 +907,6 @@ func Test_setAvailableCondition(t *testing.T) {
908907
EtcdMembers: []*etcd.Member{
909908
{Name: "m1", IsLearner: false},
910909
},
911-
EtcdMembersAgreeOnMemberList: true,
912-
EtcdMembersAgreeOnClusterID: true,
913910
EtcdMembersAndMachinesAreMatching: true,
914911
},
915912
expectCondition: metav1.Condition{
@@ -960,8 +957,6 @@ func Test_setAvailableCondition(t *testing.T) {
960957
{Name: "m2", IsLearner: false},
961958
{Name: "m3", IsLearner: false},
962959
},
963-
EtcdMembersAgreeOnMemberList: true,
964-
EtcdMembersAgreeOnClusterID: true,
965960
EtcdMembersAndMachinesAreMatching: true,
966961
},
967962
expectCondition: metav1.Condition{
@@ -1031,53 +1026,6 @@ func Test_setAvailableCondition(t *testing.T) {
10311026
Message: "Failed to get etcd members",
10321027
},
10331028
},
1034-
{
1035-
name: "KCP is not available, etcd members do not agree on member list",
1036-
controlPlane: &internal.ControlPlane{
1037-
KCP: &controlplanev1.KubeadmControlPlane{
1038-
Spec: controlplanev1.KubeadmControlPlaneSpec{
1039-
KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{
1040-
ClusterConfiguration: &bootstrapv1.ClusterConfiguration{
1041-
Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}},
1042-
},
1043-
},
1044-
},
1045-
Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true},
1046-
},
1047-
EtcdMembers: []*etcd.Member{},
1048-
EtcdMembersAgreeOnMemberList: false,
1049-
},
1050-
expectCondition: metav1.Condition{
1051-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
1052-
Status: metav1.ConditionFalse,
1053-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
1054-
Message: "At least one etcd member reports a list of etcd members different than the list reported by other members",
1055-
},
1056-
},
1057-
{
1058-
name: "KCP is not available, etcd members do not agree on cluster ID",
1059-
controlPlane: &internal.ControlPlane{
1060-
KCP: &controlplanev1.KubeadmControlPlane{
1061-
Spec: controlplanev1.KubeadmControlPlaneSpec{
1062-
KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{
1063-
ClusterConfiguration: &bootstrapv1.ClusterConfiguration{
1064-
Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}},
1065-
},
1066-
},
1067-
},
1068-
Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true},
1069-
},
1070-
EtcdMembers: []*etcd.Member{},
1071-
EtcdMembersAgreeOnMemberList: true,
1072-
EtcdMembersAgreeOnClusterID: false,
1073-
},
1074-
expectCondition: metav1.Condition{
1075-
Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition,
1076-
Status: metav1.ConditionFalse,
1077-
Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason,
1078-
Message: "At least one etcd member reports a cluster ID different than the cluster ID reported by other members",
1079-
},
1080-
},
10811029
{
10821030
name: "KCP is not available, etcd members and machines list do not match",
10831031
controlPlane: &internal.ControlPlane{
@@ -1092,8 +1040,6 @@ func Test_setAvailableCondition(t *testing.T) {
10921040
Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true},
10931041
},
10941042
EtcdMembers: []*etcd.Member{},
1095-
EtcdMembersAgreeOnMemberList: true,
1096-
EtcdMembersAgreeOnClusterID: true,
10971043
EtcdMembersAndMachinesAreMatching: false,
10981044
},
10991045
expectCondition: metav1.Condition{
@@ -1146,8 +1092,6 @@ func Test_setAvailableCondition(t *testing.T) {
11461092
{Name: "m2", IsLearner: false},
11471093
{Name: "m3", IsLearner: false},
11481094
},
1149-
EtcdMembersAgreeOnMemberList: true,
1150-
EtcdMembersAgreeOnClusterID: true,
11511095
EtcdMembersAndMachinesAreMatching: true,
11521096
},
11531097
expectCondition: metav1.Condition{
@@ -1198,8 +1142,6 @@ func Test_setAvailableCondition(t *testing.T) {
11981142
{Name: "m2", IsLearner: false},
11991143
{Name: "m3", IsLearner: false},
12001144
},
1201-
EtcdMembersAgreeOnMemberList: true,
1202-
EtcdMembersAgreeOnClusterID: true,
12031145
EtcdMembersAndMachinesAreMatching: true,
12041146
},
12051147
expectCondition: metav1.Condition{
@@ -1252,8 +1194,6 @@ func Test_setAvailableCondition(t *testing.T) {
12521194
{Name: "m2", IsLearner: false},
12531195
{Name: "m3", IsLearner: false},
12541196
},
1255-
EtcdMembersAgreeOnMemberList: true,
1256-
EtcdMembersAgreeOnClusterID: true,
12571197
EtcdMembersAndMachinesAreMatching: true,
12581198
},
12591199
expectCondition: metav1.Condition{
@@ -1294,8 +1234,6 @@ func Test_setAvailableCondition(t *testing.T) {
12941234
EtcdMembers: []*etcd.Member{
12951235
{Name: "m1", IsLearner: false},
12961236
},
1297-
EtcdMembersAgreeOnMemberList: true,
1298-
EtcdMembersAgreeOnClusterID: true,
12991237
EtcdMembersAndMachinesAreMatching: true,
13001238
},
13011239
expectCondition: metav1.Condition{
@@ -1356,8 +1294,6 @@ func Test_setAvailableCondition(t *testing.T) {
13561294
{Name: "m3", IsLearner: false},
13571295
{Name: "", IsLearner: false},
13581296
},
1359-
EtcdMembersAgreeOnMemberList: true,
1360-
EtcdMembersAgreeOnClusterID: true,
13611297
EtcdMembersAndMachinesAreMatching: true,
13621298
},
13631299
expectCondition: metav1.Condition{
@@ -1421,8 +1357,6 @@ func Test_setAvailableCondition(t *testing.T) {
14211357
{Name: "m3", IsLearner: false},
14221358
{Name: "m4", IsLearner: false},
14231359
},
1424-
EtcdMembersAgreeOnMemberList: true,
1425-
EtcdMembersAgreeOnClusterID: true,
14261360
EtcdMembersAndMachinesAreMatching: true,
14271361
},
14281362
expectCondition: metav1.Condition{
@@ -1484,8 +1418,6 @@ func Test_setAvailableCondition(t *testing.T) {
14841418
{Name: "m3", IsLearner: false},
14851419
{Name: "m4", IsLearner: false},
14861420
},
1487-
EtcdMembersAgreeOnMemberList: true,
1488-
EtcdMembersAgreeOnClusterID: true,
14891421
EtcdMembersAndMachinesAreMatching: true,
14901422
},
14911423
expectCondition: metav1.Condition{
@@ -1538,8 +1470,6 @@ func Test_setAvailableCondition(t *testing.T) {
15381470
{Name: "m2", IsLearner: false},
15391471
{Name: "m3", IsLearner: false},
15401472
},
1541-
EtcdMembersAgreeOnMemberList: true,
1542-
EtcdMembersAgreeOnClusterID: true,
15431473
EtcdMembersAndMachinesAreMatching: true,
15441474
},
15451475
expectCondition: metav1.Condition{
@@ -1600,8 +1530,6 @@ func Test_setAvailableCondition(t *testing.T) {
16001530
{Name: "m3", IsLearner: false},
16011531
{Name: "m4", IsLearner: true},
16021532
},
1603-
EtcdMembersAgreeOnMemberList: true,
1604-
EtcdMembersAgreeOnClusterID: true,
16051533
EtcdMembersAndMachinesAreMatching: true,
16061534
},
16071535
expectCondition: metav1.Condition{
@@ -1646,8 +1574,6 @@ func Test_setAvailableCondition(t *testing.T) {
16461574
EtcdMembers: []*etcd.Member{
16471575
{Name: "m1", IsLearner: false},
16481576
},
1649-
EtcdMembersAgreeOnMemberList: true,
1650-
EtcdMembersAgreeOnClusterID: true,
16511577
EtcdMembersAndMachinesAreMatching: true,
16521578
},
16531579
expectCondition: metav1.Condition{
@@ -1699,8 +1625,6 @@ func Test_setAvailableCondition(t *testing.T) {
16991625
{Name: "m2", IsLearner: false},
17001626
{Name: "m3", IsLearner: false},
17011627
},
1702-
EtcdMembersAgreeOnMemberList: true,
1703-
EtcdMembersAgreeOnClusterID: true,
17041628
EtcdMembersAndMachinesAreMatching: true,
17051629
},
17061630
expectCondition: metav1.Condition{
@@ -1739,8 +1663,6 @@ func Test_setAvailableCondition(t *testing.T) {
17391663
},
17401664
),
17411665
EtcdMembers: []*etcd.Member{{}, {}, {}},
1742-
EtcdMembersAgreeOnMemberList: true,
1743-
EtcdMembersAgreeOnClusterID: true,
17441666
EtcdMembersAndMachinesAreMatching: true,
17451667
},
17461668
expectCondition: metav1.Condition{
@@ -1789,8 +1711,6 @@ func Test_setAvailableCondition(t *testing.T) {
17891711
},
17901712
),
17911713
EtcdMembers: nil,
1792-
EtcdMembersAgreeOnMemberList: false,
1793-
EtcdMembersAgreeOnClusterID: false,
17941714
EtcdMembersAndMachinesAreMatching: false,
17951715
},
17961716
expectCondition: metav1.Condition{
@@ -1836,8 +1756,6 @@ func Test_setAvailableCondition(t *testing.T) {
18361756
},
18371757
),
18381758
EtcdMembers: nil,
1839-
EtcdMembersAgreeOnMemberList: false,
1840-
EtcdMembersAgreeOnClusterID: false,
18411759
EtcdMembersAndMachinesAreMatching: false,
18421760
},
18431761
expectCondition: metav1.Condition{
@@ -1874,8 +1792,6 @@ func Test_setAvailableCondition(t *testing.T) {
18741792
EtcdMembers: []*etcd.Member{
18751793
{Name: "m1", IsLearner: false},
18761794
},
1877-
EtcdMembersAgreeOnMemberList: true,
1878-
EtcdMembersAgreeOnClusterID: true,
18791795
EtcdMembersAndMachinesAreMatching: true,
18801796
},
18811797
expectCondition: metav1.Condition{
@@ -1913,8 +1829,6 @@ func Test_setAvailableCondition(t *testing.T) {
19131829
},
19141830
),
19151831
EtcdMembers: []*etcd.Member{{Name: "m1"}},
1916-
EtcdMembersAgreeOnMemberList: true,
1917-
EtcdMembersAgreeOnClusterID: true,
19181832
EtcdMembersAndMachinesAreMatching: true,
19191833
},
19201834
expectCondition: metav1.Condition{
@@ -1929,7 +1843,7 @@ func Test_setAvailableCondition(t *testing.T) {
19291843
t.Run(tt.name, func(t *testing.T) {
19301844
g := NewWithT(t)
19311845

1932-
setAvailableCondition(ctx, tt.controlPlane.KCP, tt.controlPlane.IsEtcdManaged(), tt.controlPlane.EtcdMembers, tt.controlPlane.EtcdMembersAgreeOnMemberList, tt.controlPlane.EtcdMembersAgreeOnClusterID, tt.controlPlane.EtcdMembersAndMachinesAreMatching, tt.controlPlane.Machines)
1846+
setAvailableCondition(ctx, tt.controlPlane.KCP, tt.controlPlane.IsEtcdManaged(), tt.controlPlane.EtcdMembers, tt.controlPlane.EtcdMembersAndMachinesAreMatching, tt.controlPlane.Machines)
19331847

19341848
availableCondition := v1beta2conditions.Get(tt.controlPlane.KCP, controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition)
19351849
g.Expect(availableCondition).ToNot(BeNil())

‎controlplane/kubeadm/internal/etcd/etcd.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ var (
145145
func NewClient(ctx context.Context, config ClientConfiguration) (*Client, error) {
146146
dialer, err := proxy.NewDialer(config.Proxy)
147147
if err != nil {
148-
return nil, errors.Wrap(err, "unable to create a dialer for etcd client")
148+
return nil, errors.Wrapf(err, "unable to create a dialer for the etcd client connecting to %s", config.Endpoint)
149149
}
150150

151151
etcdClient, err := clientv3.New(clientv3.Config{
@@ -169,15 +169,15 @@ func NewClient(ctx context.Context, config ClientConfiguration) (*Client, error)
169169
client, err := newEtcdClient(ctx, etcdClient, callTimeout)
170170
if err != nil {
171171
closeErr := etcdClient.Close()
172-
return nil, errors.Wrap(kerrors.NewAggregate([]error{err, closeErr}), "unable to create etcd client")
172+
return nil, kerrors.NewAggregate([]error{err, closeErr})
173173
}
174174
return client, nil
175175
}
176176

177177
func newEtcdClient(ctx context.Context, etcdClient etcd, callTimeout time.Duration) (*Client, error) {
178178
endpoints := etcdClient.Endpoints()
179179
if len(endpoints) == 0 {
180-
return nil, errors.New("etcd client was not configured with any endpoints")
180+
return nil, errors.New("invalid argument: newEtcdClient cannot be called without any endpoint")
181181
}
182182

183183
ctx, cancel := context.WithTimeoutCause(ctx, callTimeout, errors.New("call timeout expired"))
@@ -209,7 +209,7 @@ func (c *Client) Members(ctx context.Context) ([]*Member, error) {
209209

210210
response, err := c.EtcdClient.MemberList(ctx)
211211
if err != nil {
212-
return nil, errors.Wrap(err, "failed to get list of members for etcd cluster")
212+
return nil, errors.Wrap(err, "failed to get etcd members")
213213
}
214214

215215
clusterID := response.Header.GetClusterId()
@@ -229,7 +229,7 @@ func (c *Client) MoveLeader(ctx context.Context, newLeaderID uint64) error {
229229
defer cancel()
230230

231231
_, err := c.EtcdClient.MoveLeader(ctx, newLeaderID)
232-
return errors.Wrapf(err, "failed to move etcd leader: %v", newLeaderID)
232+
return errors.Wrapf(err, "failed to move etcd leader to: %v", newLeaderID)
233233
}
234234

235235
// RemoveMember removes a given member.
@@ -238,7 +238,7 @@ func (c *Client) RemoveMember(ctx context.Context, id uint64) error {
238238
defer cancel()
239239

240240
_, err := c.EtcdClient.MemberRemove(ctx, id)
241-
return errors.Wrapf(err, "failed to remove member: %v", id)
241+
return errors.Wrapf(err, "failed to remove etcd member: %v", id)
242242
}
243243

244244
// Alarms retrieves all alarms on a cluster.
@@ -248,7 +248,7 @@ func (c *Client) Alarms(ctx context.Context) ([]MemberAlarm, error) {
248248

249249
alarmResponse, err := c.EtcdClient.AlarmList(ctx)
250250
if err != nil {
251-
return nil, errors.Wrap(err, "failed to get alarms for etcd cluster")
251+
return nil, errors.Wrap(err, "failed to get etcd alarms")
252252
}
253253

254254
memberAlarms := make([]MemberAlarm, 0, len(alarmResponse.Alarms))

‎controlplane/kubeadm/internal/etcd_client_generator.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package internal
1919
import (
2020
"context"
2121
"crypto/tls"
22+
"strings"
2223
"time"
2324

2425
"github.com/pkg/errors"
@@ -83,7 +84,7 @@ func (c *EtcdClientGenerator) forFirstAvailableNode(ctx context.Context, nodeNam
8384
}
8485
return client, nil
8586
}
86-
return nil, errors.Wrap(kerrors.NewAggregate(errs), "could not establish a connection to any etcd node")
87+
return nil, errors.Wrapf(kerrors.NewAggregate(errs), "could not establish a connection to etcd members hosted on %s", strings.Join(nodeNames, ","))
8788
}
8889

8990
// forLeader takes a list of nodes and returns a client to the leader node.

0 commit comments

Comments
 (0)