Skip to content

Commit a2b7dd1

Browse files
authored
Merge pull request #10628 from sbueringer/pr-replace-reflect-equal
🌱 Add compare util using go-cmp, modify webhooks & KCP controller
2 parents ae215d6 + 57dc231 commit a2b7dd1

File tree

14 files changed

+475
-98
lines changed

14 files changed

+475
-98
lines changed

controlplane/kubeadm/internal/control_plane.go

+19-9
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,15 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin
131131
}
132132

133133
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines.
134-
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) *string {
134+
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) {
135135
if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
136-
return nil
136+
return nil, nil
137137
}
138-
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines())
138+
upToDateMachines, err := c.UpToDateMachines()
139+
if err != nil {
140+
return nil, errors.Wrapf(err, "failed to determine next failure domain for scale up")
141+
}
142+
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), upToDateMachines), nil
139143
}
140144

141145
// InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
@@ -167,34 +171,40 @@ func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.Kubead
167171
}
168172

169173
// MachinesNeedingRollout return a list of machines that need to be rolled out.
170-
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string) {
174+
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string, error) {
171175
// Ignore machines to be deleted.
172176
machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
173177

174178
// Return machines if they are scheduled for rollout or if with an outdated configuration.
175179
machinesNeedingRollout := make(collections.Machines, len(machines))
176180
rolloutReasons := map[string]string{}
177181
for _, m := range machines {
178-
reason, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
182+
reason, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
183+
if err != nil {
184+
return nil, nil, err
185+
}
179186
if needsRollout {
180187
machinesNeedingRollout.Insert(m)
181188
rolloutReasons[m.Name] = reason
182189
}
183190
}
184-
return machinesNeedingRollout, rolloutReasons
191+
return machinesNeedingRollout, rolloutReasons, nil
185192
}
186193

187194
// UpToDateMachines returns the machines that are up to date with the control
188195
// plane's configuration and therefore do not require rollout.
189-
func (c *ControlPlane) UpToDateMachines() collections.Machines {
196+
func (c *ControlPlane) UpToDateMachines() (collections.Machines, error) {
190197
upToDateMachines := make(collections.Machines, len(c.Machines))
191198
for _, m := range c.Machines {
192-
_, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
199+
_, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
200+
if err != nil {
201+
return nil, err
202+
}
193203
if !needsRollout {
194204
upToDateMachines.Insert(m)
195205
}
196206
}
197-
return upToDateMachines
207+
return upToDateMachines, nil
198208
}
199209

200210
// getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource.

controlplane/kubeadm/internal/controllers/controller.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
208208
if errors.As(err, &connFailure) {
209209
log.Error(err, "Could not connect to workload cluster to fetch status")
210210
} else {
211-
log.Error(err, "Failed to update KubeadmControlPlane Status")
211+
log.Error(err, "Failed to update KubeadmControlPlane status")
212212
reterr = kerrors.NewAggregate([]error{reterr, err})
213213
}
214214
}
@@ -399,7 +399,10 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
399399
}
400400

401401
// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
402-
machinesNeedingRollout, rolloutReasons := controlPlane.MachinesNeedingRollout()
402+
machinesNeedingRollout, rolloutReasons, err := controlPlane.MachinesNeedingRollout()
403+
if err != nil {
404+
return ctrl.Result{}, err
405+
}
403406
switch {
404407
case len(machinesNeedingRollout) > 0:
405408
var reasons []string

controlplane/kubeadm/internal/controllers/scale.go

+10-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
3939
logger := ctrl.LoggerFrom(ctx)
4040

4141
bootstrapSpec := controlPlane.InitialControlPlaneConfig()
42-
fd := controlPlane.NextFailureDomainForScaleUp(ctx)
42+
fd, err := controlPlane.NextFailureDomainForScaleUp(ctx)
43+
if err != nil {
44+
return ctrl.Result{}, err
45+
}
46+
4347
if err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd); err != nil {
4448
logger.Error(err, "Failed to create initial control plane Machine")
4549
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedInitialization", "Failed to create initial control plane Machine for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err)
@@ -60,7 +64,11 @@ func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context,
6064

6165
// Create the bootstrap configuration
6266
bootstrapSpec := controlPlane.JoinControlPlaneConfig()
63-
fd := controlPlane.NextFailureDomainForScaleUp(ctx)
67+
fd, err := controlPlane.NextFailureDomainForScaleUp(ctx)
68+
if err != nil {
69+
return ctrl.Result{}, err
70+
}
71+
6472
if err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd); err != nil {
6573
logger.Error(err, "Failed to create additional control plane Machine")
6674
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedScaleUp", "Failed to create additional control plane Machine for cluster % control plane: %v", klog.KObj(controlPlane.Cluster), err)

controlplane/kubeadm/internal/controllers/status.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, contro
3636
// This is necessary for CRDs including scale subresources.
3737
controlPlane.KCP.Status.Selector = selector.String()
3838

39-
controlPlane.KCP.Status.UpdatedReplicas = int32(len(controlPlane.UpToDateMachines()))
39+
upToDateMachines, err := controlPlane.UpToDateMachines()
40+
if err != nil {
41+
return errors.Wrapf(err, "failed to update status")
42+
}
43+
controlPlane.KCP.Status.UpdatedReplicas = int32(len(upToDateMachines))
4044

4145
replicas := int32(len(controlPlane.Machines))
4246
desiredReplicas := *controlPlane.KCP.Spec.Replicas

controlplane/kubeadm/internal/filters.go

+50-24
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ import (
2222
"reflect"
2323
"strings"
2424

25+
"github.com/pkg/errors"
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2728

2829
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2930
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
3031
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
32+
"sigs.k8s.io/cluster-api/internal/util/compare"
3133
"sigs.k8s.io/cluster-api/util/collections"
3234
)
3335

@@ -39,7 +41,7 @@ import (
3941
// - mutated in-place (ex: NodeDrainTimeout)
4042
// - are not dictated by KCP (ex: ProviderID)
4143
// - are not relevant for the rollout decision (ex: failureDomain).
42-
func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
44+
func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool, error) {
4345
mismatchReasons := []string{}
4446

4547
if !collections.MatchesKubernetesVersion(kcp.Spec.Version)(machine) {
@@ -50,7 +52,11 @@ func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, mach
5052
mismatchReasons = append(mismatchReasons, fmt.Sprintf("Machine version %q is not equal to KCP version %q", machineVersion, kcp.Spec.Version))
5153
}
5254

53-
if reason, matches := matchesKubeadmBootstrapConfig(machineConfigs, kcp, machine); !matches {
55+
reason, matches, err := matchesKubeadmBootstrapConfig(machineConfigs, kcp, machine)
56+
if err != nil {
57+
return "", false, errors.Wrapf(err, "failed to match Machine spec")
58+
}
59+
if !matches {
5460
mismatchReasons = append(mismatchReasons, reason)
5561
}
5662

@@ -59,14 +65,14 @@ func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, mach
5965
}
6066

6167
if len(mismatchReasons) > 0 {
62-
return strings.Join(mismatchReasons, ","), false
68+
return strings.Join(mismatchReasons, ","), false, nil
6369
}
6470

65-
return "", true
71+
return "", true, nil
6672
}
6773

6874
// NeedsRollout checks if a Machine needs to be rolled out and returns the reason why.
69-
func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore *controlplanev1.RolloutBefore, infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
75+
func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore *controlplanev1.RolloutBefore, infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool, error) {
7076
rolloutReasons := []string{}
7177

7278
// Machines whose certificates are about to expire.
@@ -81,15 +87,19 @@ func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore *
8187
}
8288

8389
// Machines that do not match with KCP config.
84-
if mismatchReason, matches := matchesMachineSpec(infraConfigs, machineConfigs, kcp, machine); !matches {
90+
mismatchReason, matches, err := matchesMachineSpec(infraConfigs, machineConfigs, kcp, machine)
91+
if err != nil {
92+
return "", false, errors.Wrapf(err, "failed to determine if Machine %s needs rollout", machine.Name)
93+
}
94+
if !matches {
8595
rolloutReasons = append(rolloutReasons, mismatchReason)
8696
}
8797

8898
if len(rolloutReasons) > 0 {
89-
return fmt.Sprintf("Machine %s needs rollout: %s", machine.Name, strings.Join(rolloutReasons, ",")), true
99+
return fmt.Sprintf("Machine %s needs rollout: %s", machine.Name, strings.Join(rolloutReasons, ",")), true, nil
90100
}
91101

92-
return "", false
102+
return "", false, nil
93103
}
94104

95105
// matchesTemplateClonedFrom checks if a Machine has a corresponding infrastructure machine that
@@ -130,50 +140,58 @@ func matchesTemplateClonedFrom(infraConfigs map[string]*unstructured.Unstructure
130140
// matchesKubeadmBootstrapConfig checks if machine's KubeadmConfigSpec is equivalent with KCP's KubeadmConfigSpec.
131141
// Note: Differences to the labels and annotations on the KubeadmConfig are not considered for matching
132142
// criteria, because changes to labels and annotations are propagated in-place to KubeadmConfig.
133-
func matchesKubeadmBootstrapConfig(machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
143+
func matchesKubeadmBootstrapConfig(machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool, error) {
134144
if machine == nil {
135-
return "Machine KubeadmConfig cannot be compared: Machine is nil", false
145+
return "Machine KubeadmConfig cannot be compared: Machine is nil", false, nil
136146
}
137147

138148
// Check if KCP and machine ClusterConfiguration matches, if not return
139-
if !matchClusterConfiguration(kcp, machine) {
140-
return "Machine ClusterConfiguration is outdated", false
149+
match, diff, err := matchClusterConfiguration(kcp, machine)
150+
if err != nil {
151+
return "", false, errors.Wrapf(err, "failed to match KubeadmConfig")
152+
}
153+
if !match {
154+
return fmt.Sprintf("Machine KubeadmConfig ClusterConfiguration is outdated: diff: %s", diff), false, nil
141155
}
142156

143157
bootstrapRef := machine.Spec.Bootstrap.ConfigRef
144158
if bootstrapRef == nil {
145159
// Missing bootstrap reference should not be considered as unmatching.
146160
// This is a safety precaution to avoid selecting machines that are broken, which in the future should be remediated separately.
147-
return "", true
161+
return "", true, nil
148162
}
149163

150164
machineConfig, found := machineConfigs[machine.Name]
151165
if !found {
152166
// Return true here because failing to get KubeadmConfig should not be considered as unmatching.
153167
// This is a safety precaution to avoid rolling out machines if the client or the api-server is misbehaving.
154-
return "", true
168+
return "", true, nil
155169
}
156170

157171
// Check if KCP and machine InitConfiguration or JoinConfiguration matches
158172
// NOTE: only one between init configuration and join configuration is set on a machine, depending
159173
// on the fact that the machine was the initial control plane node or a joining control plane node.
160-
if !matchInitOrJoinConfiguration(machineConfig, kcp) {
161-
return "Machine InitConfiguration or JoinConfiguration are outdated", false
174+
match, diff, err = matchInitOrJoinConfiguration(machineConfig, kcp)
175+
if err != nil {
176+
return "", false, errors.Wrapf(err, "failed to match KubeadmConfig")
177+
}
178+
if !match {
179+
return fmt.Sprintf("Machine KubeadmConfig InitConfiguration or JoinConfiguration are outdated: diff: %s", diff), false, nil
162180
}
163181

164-
return "", true
182+
return "", true, nil
165183
}
166184

167185
// matchClusterConfiguration verifies if KCP and machine ClusterConfiguration matches.
168186
// NOTE: Machines that have KubeadmClusterConfigurationAnnotation will have to match with KCP ClusterConfiguration.
169187
// If the annotation is not present (machine is either old or adopted), we won't roll out on any possible changes
170188
// made in KCP's ClusterConfiguration given that we don't have enough information to make a decision.
171189
// Users should use KCP.Spec.RolloutAfter field to force a rollout in this case.
172-
func matchClusterConfiguration(kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) bool {
190+
func matchClusterConfiguration(kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (bool, string, error) {
173191
machineClusterConfigStr, ok := machine.GetAnnotations()[controlplanev1.KubeadmClusterConfigurationAnnotation]
174192
if !ok {
175193
// We don't have enough information to make a decision; don't' trigger a roll out.
176-
return true
194+
return true, "", nil
177195
}
178196

179197
machineClusterConfig := &bootstrapv1.ClusterConfiguration{}
@@ -182,7 +200,7 @@ func matchClusterConfiguration(kcp *controlplanev1.KubeadmControlPlane, machine
182200
// otherwise we won't be able to handle a nil ClusterConfiguration (that is serialized into "null").
183201
// See https://github.com/kubernetes-sigs/cluster-api/issues/3353.
184202
if err := json.Unmarshal([]byte(machineClusterConfigStr), &machineClusterConfig); err != nil {
185-
return false
203+
return false, "", nil //nolint:nilerr // Intentionally not returning the error here
186204
}
187205

188206
// If any of the compared values are nil, treat them the same as an empty ClusterConfiguration.
@@ -199,16 +217,20 @@ func matchClusterConfiguration(kcp *controlplanev1.KubeadmControlPlane, machine
199217
machineClusterConfig.DNS = kcpLocalClusterConfiguration.DNS
200218

201219
// Compare and return.
202-
return reflect.DeepEqual(machineClusterConfig, kcpLocalClusterConfiguration)
220+
match, diff, err := compare.Diff(machineClusterConfig, kcpLocalClusterConfiguration)
221+
if err != nil {
222+
return false, "", errors.Wrapf(err, "failed to match ClusterConfiguration")
223+
}
224+
return match, diff, nil
203225
}
204226

205227
// matchInitOrJoinConfiguration verifies if KCP and machine InitConfiguration or JoinConfiguration matches.
206228
// NOTE: By extension this method takes care of detecting changes in other fields of the KubeadmConfig configuration (e.g. Files, Mounts etc.)
207-
func matchInitOrJoinConfiguration(machineConfig *bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) bool {
229+
func matchInitOrJoinConfiguration(machineConfig *bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) (bool, string, error) {
208230
if machineConfig == nil {
209231
// Return true here because failing to get KubeadmConfig should not be considered as unmatching.
210232
// This is a safety precaution to avoid rolling out machines if the client or the api-server is misbehaving.
211-
return true
233+
return true, "", nil
212234
}
213235

214236
// takes the KubeadmConfigSpec from KCP and applies the transformations required
@@ -225,7 +247,11 @@ func matchInitOrJoinConfiguration(machineConfig *bootstrapv1.KubeadmConfig, kcp
225247
// cleanups all the fields that are not relevant for the comparison.
226248
cleanupConfigFields(kcpConfig, machineConfig)
227249

228-
return reflect.DeepEqual(&machineConfig.Spec, kcpConfig)
250+
match, diff, err := compare.Diff(&machineConfig.Spec, kcpConfig)
251+
if err != nil {
252+
return false, "", errors.Wrapf(err, "failed to match InitConfiguration or JoinConfiguration")
253+
}
254+
return match, diff, nil
229255
}
230256

231257
// getAdjustedKcpConfig takes the KubeadmConfigSpec from KCP and applies the transformations required

0 commit comments

Comments
 (0)