Skip to content

Commit 0320e57

Browse files
Add blocking end-to-end tests for lifecycle hooks
Signed-off-by: killianmuldoon <[email protected]>
1 parent 23cd3fe commit 0320e57

File tree

4 files changed

+268
-59
lines changed

4 files changed

+268
-59
lines changed

test/e2e/cluster_upgrade_runtimesdk.go

Lines changed: 206 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,22 @@ import (
2929
"github.com/pkg/errors"
3030
corev1 "k8s.io/api/core/v1"
3131
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
"k8s.io/apimachinery/pkg/types"
3233
"k8s.io/utils/pointer"
3334
"sigs.k8s.io/controller-runtime/pkg/client"
3435

36+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
37+
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
3538
runtimev1 "sigs.k8s.io/cluster-api/exp/runtime/api/v1alpha1"
39+
"sigs.k8s.io/cluster-api/test/e2e/internal/log"
3640
"sigs.k8s.io/cluster-api/test/framework"
3741
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
3842
"sigs.k8s.io/cluster-api/util"
43+
"sigs.k8s.io/cluster-api/util/conditions"
3944
)
4045

46+
var hookFailedMessage = "hook failed"
47+
4148
// clusterUpgradeWithRuntimeSDKSpecInput is the input for clusterUpgradeWithRuntimeSDKSpec.
4249
type clusterUpgradeWithRuntimeSDKSpecInput struct {
4350
E2EConfig *clusterctl.E2EConfig
@@ -113,7 +120,7 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
113120
workerMachineCount = *input.WorkerMachineCount
114121
}
115122

116-
// Setup a Namespace where to host objects for this spec and create a watcher for the Namespace events.
123+
// Set up a Namespace where to host objects for this spec and create a watcher for the Namespace events.
117124
namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)
118125
clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
119126
})
@@ -156,6 +163,12 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
156163
ControlPlaneMachineCount: pointer.Int64Ptr(controlPlaneMachineCount),
157164
WorkerMachineCount: pointer.Int64Ptr(workerMachineCount),
158165
},
166+
PreWaitForCluster: func() {
167+
beforeClusterCreateTestHandler(ctx,
168+
input.BootstrapClusterProxy.GetClient(),
169+
namespace.Name, clusterName,
170+
input.E2EConfig.GetIntervals(specName, "wait-cluster"))
171+
},
159172
WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"),
160173
WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
161174
WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
@@ -176,6 +189,21 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
176189
WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
177190
WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
178191
WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
192+
PreWaitForControlPlaneToBeUpgraded: func() {
193+
beforeClusterUpgradeTestHandler(ctx,
194+
input.BootstrapClusterProxy.GetClient(),
195+
namespace.Name,
196+
clusterName,
197+
input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"))
198+
},
199+
PreWaitForMachineDeploymentToBeUpgraded: func() {
200+
afterControlPlaneUpgradeTestHandler(ctx,
201+
input.BootstrapClusterProxy.GetClient(),
202+
namespace.Name,
203+
clusterName,
204+
input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo),
205+
input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"))
206+
},
179207
})
180208

181209
// Only attempt to upgrade MachinePools if they were provided in the template.
@@ -201,13 +229,13 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
201229
})
202230

203231
By("Checking all lifecycle hooks have been called")
204-
// Assert that each hook passed to this function is marked as "true" in the response configmap
205-
err = checkLifecycleHooks(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{
206-
"BeforeClusterCreate": "",
207-
"BeforeClusterUpgrade": "",
208-
"AfterControlPlaneInitialized": "",
209-
"AfterControlPlaneUpgrade": "",
210-
"AfterClusterUpgrade": "",
232+
// Assert that each hook has been called and returned "Success" during the test.
233+
err = checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{
234+
"BeforeClusterCreate": "Success",
235+
"BeforeClusterUpgrade": "Success",
236+
"AfterControlPlaneInitialized": "Success",
237+
"AfterControlPlaneUpgrade": "Success",
238+
"AfterClusterUpgrade": "Success",
211239
})
212240
Expect(err).ToNot(HaveOccurred(), "Lifecycle hook calls were not as expected")
213241

@@ -266,26 +294,182 @@ func responsesConfigMap(name string, namespace *corev1.Namespace) *corev1.Config
266294
Name: fmt.Sprintf("%s-hookresponses", name),
267295
Namespace: namespace.Name,
268296
},
269-
// Every response contain only Status:Success. The test checks whether each handler has been called at least once.
297+
// Set the initial preloadedResponses for each of the tested hooks.
270298
Data: map[string]string{
271-
"BeforeClusterCreate-response": `{"Status": "Success"}`,
272-
"BeforeClusterUpgrade-response": `{"Status": "Success"}`,
273-
"AfterControlPlaneInitialized-response": `{"Status": "Success"}`,
274-
"AfterControlPlaneUpgrade-response": `{"Status": "Success"}`,
275-
"AfterClusterUpgrade-response": `{"Status": "Success"}`,
299+
// Blocking hooks are set to Status:Failure initially. These will be changed during the test.
300+
"BeforeClusterCreate-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
301+
"BeforeClusterUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
302+
"AfterControlPlaneUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
303+
304+
// Non-blocking hooks are set to Status:Success.
305+
"AfterControlPlaneInitialized-preloadedResponse": `{"Status": "Success"}`,
306+
"AfterClusterUpgrade-preloadedResponse": `{"Status": "Success"}`,
276307
},
277308
}
278309
}
279310

280-
func checkLifecycleHooks(ctx context.Context, c client.Client, namespace string, clusterName string, hooks map[string]string) error {
281-
configMap := &corev1.ConfigMap{}
282-
configMapName := clusterName + "-hookresponses"
283-
err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: configMapName}, configMap)
284-
Expect(err).ToNot(HaveOccurred(), "Failed to get the hook response configmap")
285-
for hook := range hooks {
286-
if _, ok := configMap.Data[hook+"-called"]; !ok {
287-
return errors.Errorf("hook %s call not recorded in configMap %s/%s", hook, namespace, configMapName)
311+
// Check that each hook in hooks has been called at least once by checking if its actualResponseStatus is in the hook response configmap.
312+
// If the provided hooks have both keys and values check that the values match those in the hook response configmap.
313+
func checkLifecycleHookResponses(ctx context.Context, c client.Client, namespace string, clusterName string, expectedHookResponses map[string]string) error {
314+
responseData := getLifecycleHookResponsesFromConfigMap(ctx, c, namespace, clusterName)
315+
for hookName, expectedResponse := range expectedHookResponses {
316+
actualResponse, ok := responseData[hookName+"-actualResponseStatus"]
317+
if !ok {
318+
return errors.Errorf("hook %s call not recorded in configMap %s/%s", hookName, namespace, clusterName+"-hookresponses")
319+
}
320+
if expectedResponse != "" && expectedResponse != actualResponse {
321+
return errors.Errorf("hook %s was expected to be %s in configMap got %s", expectedResponse, hookName, actualResponse)
322+
}
323+
}
324+
return nil
325+
}
326+
327+
// Check that each hook in expectedHooks has been called at least once by checking if its actualResponseStatus is in the hook response configmap.
328+
func checkLifecycleHooksCalledAtLeastOnce(ctx context.Context, c client.Client, namespace string, clusterName string, expectedHooks []string) error {
329+
responseData := getLifecycleHookResponsesFromConfigMap(ctx, c, namespace, clusterName)
330+
for _, hookName := range expectedHooks {
331+
if _, ok := responseData[hookName+"-actualResponseStatus"]; !ok {
332+
return errors.Errorf("hook %s call not recorded in configMap %s/%s", hookName, namespace, clusterName+"-hookresponses")
288333
}
289334
}
290335
return nil
291336
}
337+
338+
func getLifecycleHookResponsesFromConfigMap(ctx context.Context, c client.Client, namespace string, clusterName string) map[string]string {
339+
configMap := &corev1.ConfigMap{}
340+
configMapName := clusterName + "-hookresponses"
341+
Eventually(func() error {
342+
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: configMapName}, configMap)
343+
}).Should(Succeed(), "Failed to get the hook response configmap")
344+
return configMap.Data
345+
}
346+
347+
// beforeClusterCreateTestHandler provides an blockedCondition function which returns false if the Cluster has entered ClusterPhaseProvisioned.
348+
func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, namespace, clusterName string, intervals []interface{}) {
349+
log.Logf("Blocking with BeforeClusterCreate hook")
350+
hookName := "BeforeClusterCreate"
351+
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, func() bool {
352+
blocked := true
353+
// This hook should block the Cluster from entering the "Provisioned" state.
354+
cluster := &clusterv1.Cluster{}
355+
Eventually(func() error {
356+
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
357+
}).Should(Succeed())
358+
359+
// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
360+
if !clusterConditionShowsHookFailed(cluster, hookName) {
361+
blocked = false
362+
}
363+
if cluster.Status.Phase == string(clusterv1.ClusterPhaseProvisioned) {
364+
blocked = false
365+
}
366+
return blocked
367+
}, intervals)
368+
}
369+
370+
// beforeClusterUpgradeTestHandler provides an unblocked function which returns false if the Cluster has controlplanev1.RollingUpdateInProgressReason in its
371+
// ReadyCondition.
372+
func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName string, intervals []interface{}) {
373+
log.Logf("Blocking with BeforeClusterUpgrade hook")
374+
hookName := "BeforeClusterUpgrade"
375+
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, func() bool {
376+
var blocked = true
377+
378+
cluster := &clusterv1.Cluster{}
379+
Eventually(func() error {
380+
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
381+
}).Should(Succeed())
382+
383+
// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
384+
if !clusterConditionShowsHookFailed(cluster, hookName) {
385+
blocked = false
386+
}
387+
// Check if the Cluster is showing the RollingUpdateInProgress condition reason. If it has the update process is unblocked.
388+
if conditions.IsFalse(cluster, clusterv1.ReadyCondition) &&
389+
conditions.GetReason(cluster, clusterv1.ReadyCondition) == controlplanev1.RollingUpdateInProgressReason {
390+
blocked = false
391+
}
392+
return blocked
393+
}, intervals)
394+
}
395+
396+
// afterControlPlaneUpgradeTestHandler provides an unblocked function which returns false if any MachineDeployment in the Cluster
397+
// has upgraded to the target Kubernetes version.
398+
func afterControlPlaneUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName, version string, intervals []interface{}) {
399+
log.Logf("Blocking with AfterControlPlaneUpgrade hook")
400+
hookName := "AfterControlPlaneUpgrade"
401+
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, func() bool {
402+
var blocked = true
403+
cluster := &clusterv1.Cluster{}
404+
Eventually(func() error {
405+
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
406+
}).Should(Succeed())
407+
408+
// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
409+
if !clusterConditionShowsHookFailed(cluster, hookName) {
410+
blocked = false
411+
}
412+
413+
mds := &clusterv1.MachineDeploymentList{}
414+
Eventually(func() error {
415+
return c.List(ctx, mds, client.MatchingLabels{
416+
clusterv1.ClusterLabelName: clusterName,
417+
clusterv1.ClusterTopologyOwnedLabel: "",
418+
})
419+
}).Should(Succeed())
420+
421+
// If any of the MachineDeployments have the target Kubernetes Version, the hook is unblocked.
422+
for _, md := range mds.Items {
423+
if *md.Spec.Template.Spec.Version == version {
424+
blocked = false
425+
}
426+
}
427+
return blocked
428+
}, intervals)
429+
}
430+
431+
// runtimeHookTestHandler runs a series of tests in sequence to check if the runtimeHook passed to is succeeds.
432+
// 1) Checks that the hook has been called at least once.
433+
// 2) Check that the hook's blockingCondition is consistently true.
434+
// - At this point the function sets the hook's response to be non-blocking.
435+
// 3) Check that the hook's blocking condition becomes true.
436+
// Note: runtimeHookTestHandler assumes that the hook passed to it is currently returning a blocking response.
437+
// Updating the response to be non-blocking happens inline in the function.
438+
func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clusterName, hookName string, blockingCondition func() bool, intervals []interface{}) {
439+
// First check that the LifecycleHook has been called at least once.
440+
Eventually(func() error {
441+
return checkLifecycleHooksCalledAtLeastOnce(ctx, c, namespace, clusterName, []string{hookName})
442+
}, 60*time.Second).Should(Succeed(), "%s has not been called", hookName)
443+
444+
// blockingCondition should consistently be true as the Runtime hook is returning "Failure".
445+
Consistently(func() bool {
446+
return blockingCondition()
447+
}, 30*time.Second).Should(BeTrue(),
448+
fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s not blocking", hookName))
449+
450+
// Patch the ConfigMap to set the hook response to "Success".
451+
Byf("Setting %s response to Status:Success to unblock the reconciliation", hookName)
452+
453+
configMap := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: clusterName + "-hookresponses", Namespace: namespace}}
454+
Eventually(func() error {
455+
return c.Get(ctx, util.ObjectKey(configMap), configMap)
456+
}).Should(Succeed())
457+
patch := client.RawPatch(types.MergePatchType,
458+
[]byte(fmt.Sprintf(`{"data":{"%s-preloadedResponse":%s}}`, hookName, "\"{\\\"Status\\\": \\\"Success\\\"}\"")))
459+
Eventually(func() error {
460+
return c.Patch(ctx, configMap, patch)
461+
}).Should(Succeed())
462+
463+
// Expect the Hook to pass, setting the blockingCondition to false before the timeout ends.
464+
Eventually(func() bool {
465+
return blockingCondition()
466+
}, intervals...).Should(BeFalse(),
467+
fmt.Sprintf("ClusterTopology reconcile did not unblock after updating hook response: hook %s still blocking", hookName))
468+
}
469+
470+
// clusterConditionShowsHookFailed checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
471+
func clusterConditionShowsHookFailed(cluster *clusterv1.Cluster, hookName string) bool {
472+
return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconcileFailedReason &&
473+
strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookFailedMessage) &&
474+
strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookName)
475+
}

0 commit comments

Comments
 (0)