From 5a3fe81ae06c1ece779c3cc5023af6e12e05f3f8 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Thu, 22 Aug 2024 08:16:43 +0200 Subject: [PATCH 01/10] test: allow deploying autoscaler to management cluster --- test/e2e/autoscaler.go | 9 +++++++++ test/framework/autoscaler_helpers.go | 27 ++++++++++++++++++--------- test/framework/deployment_helpers.go | 2 +- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index 99f9a83c24da..32ffd9ec7910 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -64,9 +64,17 @@ type AutoscalerSpecInput struct { InfrastructureMachinePoolKind string AutoscalerVersion string + // InstallOnManagementCluster steers if the autoscaler should get installed to the management or workload cluster. + // Depending on the CI environments, there may be no connectivity from the workload to the management cluster. + InstallOnManagementCluster bool + // Allows to inject a function to be run after test namespace is created. // If not specified, this is a no-op. PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string) + + // Allows to inject a function to be run after autoscaling test finished. + // If not specified this is a no-op. + PostAutoscalingTest func(managementClusterProxy framework.ClusterProxy, namespace, clusterName string) } // AutoscalerSpec implements a test for the autoscaler, and more specifically for the autoscaler @@ -173,6 +181,7 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) WorkloadClusterProxy: workloadClusterProxy, Cluster: clusterResources.Cluster, AutoscalerVersion: input.AutoscalerVersion, + AutoscalerOnManagementCluster: input.InstallOnManagementCluster, }, input.E2EConfig.GetIntervals(specName, "wait-controllers")...) By("Creating workload that forces the system to scale up") diff --git a/test/framework/autoscaler_helpers.go b/test/framework/autoscaler_helpers.go index 4259b875281f..b69a90744eca 100644 --- a/test/framework/autoscaler_helpers.go +++ b/test/framework/autoscaler_helpers.go @@ -74,6 +74,8 @@ type ApplyAutoscalerToWorkloadClusterInput struct { ManagementClusterProxy ClusterProxy Cluster *clusterv1.Cluster WorkloadClusterProxy ClusterProxy + + AutoscalerOnManagementCluster bool } // ApplyAutoscalerToWorkloadCluster installs autoscaler on the workload cluster. @@ -107,9 +109,8 @@ func ApplyAutoscalerToWorkloadCluster(ctx context.Context, input ApplyAutoscaler }, }) Expect(err).ToNot(HaveOccurred(), "failed to parse %s", workloadYamlTemplate) - Expect(input.WorkloadClusterProxy.CreateOrUpdate(ctx, workloadYaml)).To(Succeed(), "failed to apply %s", workloadYamlTemplate) - By("Wait for the autoscaler deployment and collect logs") + autoscalerProxy := input.WorkloadClusterProxy deployment := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "cluster-autoscaler", @@ -117,19 +118,27 @@ func ApplyAutoscalerToWorkloadCluster(ctx context.Context, input ApplyAutoscaler }, } - Expect(input.WorkloadClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(deployment), deployment)).To(Succeed(), fmt.Sprintf("failed to get Deployment %s", klog.KObj(deployment))) + if input.AutoscalerOnManagementCluster { + autoscalerProxy = input.ManagementClusterProxy + deployment.Namespace = input.Cluster.Namespace + } + + Expect(autoscalerProxy.CreateOrUpdate(ctx, workloadYaml)).To(Succeed(), "failed to apply %s", workloadYamlTemplate) + + By("Wait for the autoscaler deployment and collect logs") + Expect(autoscalerProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(deployment), deployment)).To(Succeed(), fmt.Sprintf("failed to get Deployment %s", klog.KObj(deployment))) WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{ - Getter: input.WorkloadClusterProxy.GetClient(), + Getter: autoscalerProxy.GetClient(), Deployment: deployment, }, intervals...) // Start streaming logs from the autoscaler deployment. WatchDeploymentLogsByName(ctx, WatchDeploymentLogsByNameInput{ - GetLister: input.WorkloadClusterProxy.GetClient(), - Cache: input.WorkloadClusterProxy.GetCache(ctx), - ClientSet: input.WorkloadClusterProxy.GetClientSet(), + GetLister: autoscalerProxy.GetClient(), + Cache: autoscalerProxy.GetCache(ctx), + ClientSet: autoscalerProxy.GetClientSet(), Deployment: deployment, - LogPath: filepath.Join(input.ArtifactFolder, "clusters", input.WorkloadClusterProxy.GetName(), "logs", deployment.GetNamespace()), + LogPath: filepath.Join(input.ArtifactFolder, "clusters", autoscalerProxy.GetName(), "logs", deployment.GetNamespace()), }) } @@ -143,7 +152,7 @@ type AddScaleUpDeploymentAndWaitInput struct { func AddScaleUpDeploymentAndWait(ctx context.Context, input AddScaleUpDeploymentAndWaitInput, intervals ...interface{}) { By("Create a scale up deployment with resource requests to force scale up") if input.ContainerImage == "" { - input.ContainerImage = "registry.k8s.io/pause" + input.ContainerImage = "registry.k8s.io/pause:3.10" } // gets the node size diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go index 5e4a0f946c63..a2b0c8c141a8 100644 --- a/test/framework/deployment_helpers.go +++ b/test/framework/deployment_helpers.go @@ -528,7 +528,7 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) Containers: []corev1.Container{ { Name: "web", - Image: "registry.k8s.io/pause:latest", + Image: "registry.k8s.io/pause:3.10", }, }, }, From 2ef8498aaf8ae36e3924c5e0b1c417539ac8df30 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Thu, 22 Aug 2024 09:49:52 +0200 Subject: [PATCH 02/10] test: make machine pools optional in autoscaler test --- test/e2e/autoscaler.go | 151 ++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 70 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index 32ffd9ec7910..045707ffddc7 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -123,6 +123,8 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) infrastructureProvider = *input.InfrastructureProvider } + hasMachinePool := input.InfrastructureMachinePoolTemplateKind != "" + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ ClusterProxy: input.BootstrapClusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ @@ -145,6 +147,7 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) }, clusterResources) Expect(clusterResources.Cluster.Spec.Topology).NotTo(BeNil(), "Autoscaler test expected a Classy Cluster") + // Ensure the MachineDeploymentTopology has the autoscaler annotations. mdTopology := clusterResources.Cluster.Spec.Topology.Workers.MachineDeployments[0] Expect(mdTopology.Metadata.Annotations).NotTo(BeNil(), "MachineDeployment is expected to have autoscaler annotations") @@ -153,21 +156,27 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) mdNodeGroupMaxSize, ok := mdTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation] Expect(ok).To(BeTrue(), "MachineDeploymentTopology %s does not have the %q autoscaler annotation", mdTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation) - // Ensure the MachinePoolTopology does NOT have the autoscaler annotations so we can test MachineDeployments first. - mpTopology := clusterResources.Cluster.Spec.Topology.Workers.MachinePools[0] - if mpTopology.Metadata.Annotations != nil { - _, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation] - Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMinSizeAnnotation) - _, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation] - Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation) + if hasMachinePool { + // Ensure the MachinePoolTopology does NOT have the autoscaler annotations so we can test MachineDeployments first. + mpTopology := clusterResources.Cluster.Spec.Topology.Workers.MachinePools[0] + if mpTopology.Metadata.Annotations != nil { + _, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMinSizeAnnotation] + Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMinSizeAnnotation) + _, ok = mpTopology.Metadata.Annotations[clusterv1.AutoscalerMaxSizeAnnotation] + Expect(ok).To(BeFalse(), "MachinePoolTopology %s does have the %q autoscaler annotation", mpTopology.Name, clusterv1.AutoscalerMaxSizeAnnotation) + } } // Get a ClusterProxy so we can interact with the workload cluster workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, clusterResources.Cluster.Namespace, clusterResources.Cluster.Name) mdOriginalReplicas := *clusterResources.MachineDeployments[0].Spec.Replicas Expect(strconv.Itoa(int(mdOriginalReplicas))).To(Equal(mdNodeGroupMinSize), "MachineDeployment should have replicas as defined in %s", clusterv1.AutoscalerMinSizeAnnotation) - mpOriginalReplicas := *clusterResources.MachinePools[0].Spec.Replicas - Expect(int(mpOriginalReplicas)).To(Equal(1), "MachinePool should default to 1 replica via the MachinePool webhook") + + var mpOriginalReplicas int32 + if hasMachinePool { + mpOriginalReplicas = *clusterResources.MachinePools[0].Spec.Replicas + Expect(int(mpOriginalReplicas)).To(Equal(1), "MachinePool should default to 1 replica via the MachinePool webhook") + } By("Installing the autoscaler on the workload cluster") autoscalerWorkloadYAMLPath := input.E2EConfig.GetVariable(AutoscalerWorkloadYAMLPath) @@ -248,67 +257,69 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) WaitForDelete: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), }) - By("Enabling autoscaler for the MachinePool") - // Enable autoscaler on the MachinePool. - framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - NodeGroupMinSize: mpNodeGroupMinSize, - NodeGroupMaxSize: mpNodeGroupMaxSize, - WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), - }) - - By("Creating workload that forces the system to scale up") - framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{ - ClusterProxy: workloadClusterProxy, - }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) - - By("Checking the MachinePool is scaled up") - mpScaledUpReplicas := mpOriginalReplicas + 1 - framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ - Getter: input.BootstrapClusterProxy.GetClient(), - MachinePool: clusterResources.MachinePools[0], - Replicas: mpScaledUpReplicas, - WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), - }) - - By("Disabling the autoscaler") - framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachinePoolTopologyAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"), - }) - - By("Checking we can manually scale up the MachinePool") - // Scale up the MachinePool. Since autoscaler is disabled we should be able to do this. - mpExcessReplicas := mpScaledUpReplicas + 1 - framework.ScaleMachinePoolTopologyAndWait(ctx, framework.ScaleMachinePoolTopologyAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - Replicas: mpExcessReplicas, - WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - Getter: input.BootstrapClusterProxy.GetClient(), - }) - - By("Checking enabling autoscaler will scale down the MachinePool to correct size") - // Enable autoscaler on the MachinePool. - framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - NodeGroupMinSize: mpNodeGroupMinSize, - NodeGroupMaxSize: mpNodeGroupMaxSize, - WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), - }) - - By("Checking the MachinePool is scaled down") - // Since we scaled up the MachinePool manually and the workload has not changed auto scaler - // should detect that there are unneeded nodes and scale down the MachinePool. - framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ - Getter: input.BootstrapClusterProxy.GetClient(), - MachinePool: clusterResources.MachinePools[0], - Replicas: mpScaledUpReplicas, - WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), - }) + if hasMachinePool { + By("Enabling autoscaler for the MachinePool") + // Enable autoscaler on the MachinePool. + framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + NodeGroupMinSize: mpNodeGroupMinSize, + NodeGroupMaxSize: mpNodeGroupMaxSize, + WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), + }) + + By("Creating workload that forces the system to scale up") + framework.AddScaleUpDeploymentAndWait(ctx, framework.AddScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) + + By("Checking the MachinePool is scaled up") + mpScaledUpReplicas := mpOriginalReplicas + 1 + framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachinePool: clusterResources.MachinePools[0], + Replicas: mpScaledUpReplicas, + WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), + }) + + By("Disabling the autoscaler") + framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachinePoolTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + + By("Checking we can manually scale up the MachinePool") + // Scale up the MachinePool. Since autoscaler is disabled we should be able to do this. + mpExcessReplicas := mpScaledUpReplicas + 1 + framework.ScaleMachinePoolTopologyAndWait(ctx, framework.ScaleMachinePoolTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + Replicas: mpExcessReplicas, + WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + Getter: input.BootstrapClusterProxy.GetClient(), + }) + + By("Checking enabling autoscaler will scale down the MachinePool to correct size") + // Enable autoscaler on the MachinePool. + framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + NodeGroupMinSize: mpNodeGroupMinSize, + NodeGroupMaxSize: mpNodeGroupMaxSize, + WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), + }) + + By("Checking the MachinePool is scaled down") + // Since we scaled up the MachinePool manually and the workload has not changed auto scaler + // should detect that there are unneeded nodes and scale down the MachinePool. + framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachinePool: clusterResources.MachinePools[0], + Replicas: mpScaledUpReplicas, + WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + } By("PASSED!") }) From eb722027f7285439263338466c6df4150d8bffc8 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Thu, 22 Aug 2024 13:37:53 +0200 Subject: [PATCH 03/10] test: implement optional scale from/to zero tests for autoscale --- test/e2e/autoscaler.go | 105 ++++++++++++++++++++++++++- test/framework/autoscaler_helpers.go | 28 +++++++ 2 files changed, 129 insertions(+), 4 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index 045707ffddc7..a3278eebf4c5 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -68,13 +68,12 @@ type AutoscalerSpecInput struct { // Depending on the CI environments, there may be no connectivity from the workload to the management cluster. InstallOnManagementCluster bool + // ScaleToAndFromZero enables tests to scale to and from zero. + ScaleToAndFromZero bool + // Allows to inject a function to be run after test namespace is created. // If not specified, this is a no-op. PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string) - - // Allows to inject a function to be run after autoscaling test finished. - // If not specified this is a no-op. - PostAutoscalingTest func(managementClusterProxy framework.ClusterProxy, namespace, clusterName string) } // AutoscalerSpec implements a test for the autoscaler, and more specifically for the autoscaler @@ -244,6 +243,46 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"), }) + if input.ScaleToAndFromZero { + By("Enabling autoscaler for the MachineDeployment to zero") + // Enable autoscaler on the MachineDeployment. + framework.EnableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.EnableAutoscalerForMachineDeploymentTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + NodeGroupMinSize: "0", + NodeGroupMaxSize: mdNodeGroupMaxSize, + WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), + }) + + By("Scaling the MachineDeployment scale up deployment to zero") + framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + Replicas: mpOriginalReplicas + 0, + }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) + + By("Checking the MachineDeployment finished scaling down to zero") + framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachineDeployment: clusterResources.MachineDeployments[0], + Replicas: 0, + WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + + By("Scaling the MachineDeployment scale up deployment to 1") + framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + Replicas: mpOriginalReplicas + 1, + }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) + + By("Checking the MachineDeployment finished scaling up") + framework.AssertMachineDeploymentReplicas(ctx, framework.AssertMachineDeploymentReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachineDeployment: clusterResources.MachineDeployments[0], + Replicas: 1, + WaitForMachineDeployment: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + } + By("Disabling the autoscaler for MachineDeployments to test MachinePools") framework.DisableAutoscalerForMachineDeploymentTopologyAndWait(ctx, framework.DisableAutoscalerForMachineDeploymentTopologyAndWaitInput{ ClusterProxy: input.BootstrapClusterProxy, @@ -319,6 +358,64 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) Replicas: mpScaledUpReplicas, WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), }) + + if input.ScaleToAndFromZero { + By("Enabling autoscaler for the MachinePool to zero") + // Enable autoscaler on the MachinePool. + framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + NodeGroupMinSize: "0", + NodeGroupMaxSize: mpNodeGroupMaxSize, + WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), + }) + + // We can savely assume that mdReplicas is 1. + var mdReplicas int32 = 1 + + By("Scaling the MachinePool scale up deployment to 1") + framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + // Set replicas to 1, because we still have 1 Machine from MachineDeployments. + Replicas: 1, + }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) + + By("Checking the MachinePool finished scaling down to zero") + framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachinePool: clusterResources.MachinePools[0], + Replicas: mdReplicas, + WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + + By("Scaling the MachinePool scale up deployment to 2") + framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + // Set replicas to 2, because we still have 1 Machine from MachineDeployments. + Replicas: mdReplicas + 1, + }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) + + By("Checking the MachineDeployment finished scaling up") + framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + MachinePool: clusterResources.MachinePools[0], + Replicas: 1, + WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + } + + By("Disabling the autoscaler for MachineDeployments to test MachinePools") + framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachinePoolTopologyAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"), + }) + + By("Deleting the MachineDeployment scale up deployment") + framework.DeleteScaleUpDeploymentAndWait(ctx, framework.DeleteScaleUpDeploymentAndWaitInput{ + ClusterProxy: workloadClusterProxy, + WaitForDelete: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), + }) } By("PASSED!") diff --git a/test/framework/autoscaler_helpers.go b/test/framework/autoscaler_helpers.go index b69a90744eca..0ad5e8397f2d 100644 --- a/test/framework/autoscaler_helpers.go +++ b/test/framework/autoscaler_helpers.go @@ -256,6 +256,34 @@ func DeleteScaleUpDeploymentAndWait(ctx context.Context, input DeleteScaleUpDepl }, input.WaitForDelete...).Should(Succeed()) } +// ScaleScaleUpDeploymentAndWaitInput is the input for ScaleScaleUpDeploymentAndWait. +type ScaleScaleUpDeploymentAndWaitInput struct { + ClusterProxy ClusterProxy + Name string + Replicas int32 +} + +// ScaleScaleUpDeploymentAndWait deletes the scale up deployment and waits for it to be deleted. +func ScaleScaleUpDeploymentAndWait(ctx context.Context, input ScaleScaleUpDeploymentAndWaitInput, intervals ...interface{}) { + By("Retrieving the scale up deployment") + deployment := &appsv1.Deployment{} + deploymentName := "scale-up" + if input.Name != "" { + deploymentName = input.Name + } + Expect(input.ClusterProxy.GetClient().Get(ctx, client.ObjectKey{Name: deploymentName, Namespace: metav1.NamespaceDefault}, deployment)).To(Succeed(), "failed to get the scale up deployment") + + By("Scaling the scale up deployment") + deployment.Spec.Replicas = &input.Replicas + Expect(input.ClusterProxy.GetClient().Update(ctx, deployment)).To(Succeed(), "failed to update the scale up deployment") + + By("Wait for the scale up deployment to become ready (this implies machines to be created)") + WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{ + Getter: input.ClusterProxy.GetClient(), + Deployment: deployment, + }, intervals...) +} + type ProcessYAMLInput struct { Template []byte ClusterctlConfigPath string From ea41984eec1ae8ae779a8a3ab3fd8b7d3ab21893 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Thu, 22 Aug 2024 14:59:45 +0200 Subject: [PATCH 04/10] test: allow modification of apigroup for infrastructure --- test/e2e/autoscaler.go | 2 ++ test/framework/autoscaler_helpers.go | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index a3278eebf4c5..f4a79282afdd 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -62,6 +62,7 @@ type AutoscalerSpecInput struct { InfrastructureMachineTemplateKind string InfrastructureMachinePoolTemplateKind string InfrastructureMachinePoolKind string + InfrastructureAPIGroup string AutoscalerVersion string // InstallOnManagementCluster steers if the autoscaler should get installed to the management or workload cluster. @@ -184,6 +185,7 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) InfrastructureMachineTemplateKind: input.InfrastructureMachineTemplateKind, InfrastructureMachinePoolTemplateKind: input.InfrastructureMachinePoolTemplateKind, InfrastructureMachinePoolKind: input.InfrastructureMachinePoolKind, + InfrastructureAPIGroup: input.InfrastructureAPIGroup, WorkloadYamlPath: autoscalerWorkloadYAMLPath, ManagementClusterProxy: input.BootstrapClusterProxy, WorkloadClusterProxy: workloadClusterProxy, diff --git a/test/framework/autoscaler_helpers.go b/test/framework/autoscaler_helpers.go index 0ad5e8397f2d..44bf5f51da3a 100644 --- a/test/framework/autoscaler_helpers.go +++ b/test/framework/autoscaler_helpers.go @@ -52,6 +52,7 @@ type ApplyAutoscalerToWorkloadClusterInput struct { InfrastructureMachineTemplateKind string InfrastructureMachinePoolTemplateKind string InfrastructureMachinePoolKind string + InfrastructureAPIGroup string // WorkloadYamlPath should point the yaml that will be applied on the workload cluster. // The YAML file should: // - Be creating the autoscaler deployment in the workload cluster @@ -90,11 +91,15 @@ func ApplyAutoscalerToWorkloadCluster(ctx context.Context, input ApplyAutoscaler workloadYamlTemplate, err := os.ReadFile(input.WorkloadYamlPath) Expect(err).ToNot(HaveOccurred(), "failed to load %s", workloadYamlTemplate) + if input.InfrastructureAPIGroup == "" { + input.InfrastructureAPIGroup = "infrastructure.cluster.x-k8s.io" + } + // Get a server address for the Management Cluster. // This address should be accessible from the workload cluster. serverAddr, mgtClusterCA := getServerAddrAndCA(ctx, input.ManagementClusterProxy) // Generate a token with the required permission that can be used by the autoscaler. - token := getAuthenticationTokenForAutoscaler(ctx, input.ManagementClusterProxy, input.Cluster.Namespace, input.Cluster.Name, input.InfrastructureMachineTemplateKind, input.InfrastructureMachinePoolTemplateKind, input.InfrastructureMachinePoolKind) + token := getAuthenticationTokenForAutoscaler(ctx, input.ManagementClusterProxy, input.Cluster.Namespace, input.Cluster.Name, input.InfrastructureAPIGroup, input.InfrastructureMachineTemplateKind, input.InfrastructureMachinePoolTemplateKind, input.InfrastructureMachinePoolKind) workloadYaml, err := ProcessYAML(&ProcessYAMLInput{ Template: workloadYamlTemplate, @@ -530,7 +535,7 @@ func EnableAutoscalerForMachinePoolTopologyAndWait(ctx context.Context, input En // getAuthenticationTokenForAutoscaler returns a bearer authenticationToken with minimal RBAC permissions that will be used // by the autoscaler running on the workload cluster to access the management cluster. -func getAuthenticationTokenForAutoscaler(ctx context.Context, managementClusterProxy ClusterProxy, namespace string, cluster string, infraMachineTemplateKind, infraMachinePoolTemplateKind, infraMachinePoolKind string) string { +func getAuthenticationTokenForAutoscaler(ctx context.Context, managementClusterProxy ClusterProxy, namespace string, cluster string, infraAPIGroup, infraMachineTemplateKind, infraMachinePoolTemplateKind, infraMachinePoolKind string) string { name := fmt.Sprintf("cluster-%s", cluster) sa := &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ @@ -553,7 +558,7 @@ func getAuthenticationTokenForAutoscaler(ctx context.Context, managementClusterP }, { Verbs: []string{"get", "list"}, - APIGroups: []string{"infrastructure.cluster.x-k8s.io"}, + APIGroups: []string{infraAPIGroup}, Resources: []string{infraMachineTemplateKind, infraMachinePoolTemplateKind, infraMachinePoolKind}, }, }, From 0eed1a2dc92784112ef573131e0527d54b1f5453 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Thu, 22 Aug 2024 16:15:29 +0200 Subject: [PATCH 05/10] test: wait for rollouts to finish in autoscaler tests --- test/framework/machinedeployment_helpers.go | 1 + test/framework/machinepool_helpers.go | 1 + 2 files changed, 2 insertions(+) diff --git a/test/framework/machinedeployment_helpers.go b/test/framework/machinedeployment_helpers.go index 3dc15faa8889..8a3f1ab422e2 100644 --- a/test/framework/machinedeployment_helpers.go +++ b/test/framework/machinedeployment_helpers.go @@ -648,5 +648,6 @@ func AssertMachineDeploymentReplicas(ctx context.Context, input AssertMachineDep g.Expect(input.Getter.Get(ctx, key, md)).To(Succeed(), fmt.Sprintf("failed to get MachineDeployment %s", klog.KObj(input.MachineDeployment))) g.Expect(md.Spec.Replicas).Should(Not(BeNil()), fmt.Sprintf("MachineDeployment %s replicas should not be nil", klog.KObj(md))) g.Expect(*md.Spec.Replicas).Should(Equal(input.Replicas), fmt.Sprintf("MachineDeployment %s replicas should match expected replicas", klog.KObj(md))) + g.Expect(md.Status.Replicas).Should(Equal(input.Replicas), fmt.Sprintf("MachineDeployment %s status.replicas should match expected replicas", klog.KObj(md))) }, input.WaitForMachineDeployment...).Should(Succeed()) } diff --git a/test/framework/machinepool_helpers.go b/test/framework/machinepool_helpers.go index 760b942343d7..af143feae605 100644 --- a/test/framework/machinepool_helpers.go +++ b/test/framework/machinepool_helpers.go @@ -365,5 +365,6 @@ func AssertMachinePoolReplicas(ctx context.Context, input AssertMachinePoolRepli g.Expect(input.Getter.Get(ctx, key, mp)).To(Succeed(), fmt.Sprintf("failed to get MachinePool %s", klog.KObj(input.MachinePool))) g.Expect(mp.Spec.Replicas).Should(Not(BeNil()), fmt.Sprintf("MachinePool %s replicas should not be nil", klog.KObj(mp))) g.Expect(*mp.Spec.Replicas).Should(Equal(input.Replicas), fmt.Sprintf("MachinePool %s replicas should match expected replicas", klog.KObj(mp))) + g.Expect(mp.Status.Replicas).Should(Equal(input.Replicas), fmt.Sprintf("MachinePool %s status.replicas should match expected replicas", klog.KObj(mp))) }, input.WaitForMachinePool...).Should(Succeed()) } From 3cb56320f4784655ba3383fea6bf513ac9698422 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 23 Aug 2024 15:36:28 +0200 Subject: [PATCH 06/10] test: drop cleaning up autoscaler for machine pools --- test/e2e/autoscaler.go | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index f4a79282afdd..6ded2e9d635f 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -405,19 +405,6 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), }) } - - By("Disabling the autoscaler for MachineDeployments to test MachinePools") - framework.DisableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.DisableAutoscalerForMachinePoolTopologyAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - WaitForAnnotationsToBeDropped: input.E2EConfig.GetIntervals(specName, "wait-controllers"), - }) - - By("Deleting the MachineDeployment scale up deployment") - framework.DeleteScaleUpDeploymentAndWait(ctx, framework.DeleteScaleUpDeploymentAndWaitInput{ - ClusterProxy: workloadClusterProxy, - WaitForDelete: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), - }) } By("PASSED!") From f48d9ff1428dc1c17c7596442a809bd250542622 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 23 Aug 2024 15:49:18 +0200 Subject: [PATCH 07/10] review fix --- test/framework/autoscaler_helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/autoscaler_helpers.go b/test/framework/autoscaler_helpers.go index 44bf5f51da3a..78d991350c69 100644 --- a/test/framework/autoscaler_helpers.go +++ b/test/framework/autoscaler_helpers.go @@ -268,7 +268,7 @@ type ScaleScaleUpDeploymentAndWaitInput struct { Replicas int32 } -// ScaleScaleUpDeploymentAndWait deletes the scale up deployment and waits for it to be deleted. +// ScaleScaleUpDeploymentAndWait scales the scale up deployment to a given value and waits for it to be deleted. func ScaleScaleUpDeploymentAndWait(ctx context.Context, input ScaleScaleUpDeploymentAndWaitInput, intervals ...interface{}) { By("Retrieving the scale up deployment") deployment := &appsv1.Deployment{} From 2ab2175a2dc213ab3044c627ea853f188db35ad6 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 23 Aug 2024 15:51:50 +0200 Subject: [PATCH 08/10] add comment about AutoScaleFromZero --- test/e2e/autoscaler.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index 6ded2e9d635f..30e256083e24 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -70,6 +70,10 @@ type AutoscalerSpecInput struct { InstallOnManagementCluster bool // ScaleToAndFromZero enables tests to scale to and from zero. + // To enable `ScaleToAndFromZero` the following needs to be implemented: + // * either provide the relevant annotations on the MachineDeployment or MachinePool + // * for MachineDeployments: implement .status.capacity on the InfraMachineTemplate + // * for MachinePools: implement .status.capacity on the InfraMachinePool ScaleToAndFromZero bool // Allows to inject a function to be run after test namespace is created. From ce7b49a885cc4251e2b91377e0479596d5d81735 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 23 Aug 2024 15:55:27 +0200 Subject: [PATCH 09/10] remove autoscale from zero test for unsupported MachinePools --- test/e2e/autoscaler.go | 50 +----------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index 30e256083e24..9edf8c089949 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -70,10 +70,7 @@ type AutoscalerSpecInput struct { InstallOnManagementCluster bool // ScaleToAndFromZero enables tests to scale to and from zero. - // To enable `ScaleToAndFromZero` the following needs to be implemented: - // * either provide the relevant annotations on the MachineDeployment or MachinePool - // * for MachineDeployments: implement .status.capacity on the InfraMachineTemplate - // * for MachinePools: implement .status.capacity on the InfraMachinePool + // Note: This is only implemented for MachineDeployments. ScaleToAndFromZero bool // Allows to inject a function to be run after test namespace is created. @@ -364,51 +361,6 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) Replicas: mpScaledUpReplicas, WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), }) - - if input.ScaleToAndFromZero { - By("Enabling autoscaler for the MachinePool to zero") - // Enable autoscaler on the MachinePool. - framework.EnableAutoscalerForMachinePoolTopologyAndWait(ctx, framework.EnableAutoscalerForMachinePoolTopologyAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - NodeGroupMinSize: "0", - NodeGroupMaxSize: mpNodeGroupMaxSize, - WaitForAnnotationsToBeAdded: input.E2EConfig.GetIntervals(specName, "wait-autoscaler"), - }) - - // We can savely assume that mdReplicas is 1. - var mdReplicas int32 = 1 - - By("Scaling the MachinePool scale up deployment to 1") - framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ - ClusterProxy: workloadClusterProxy, - // Set replicas to 1, because we still have 1 Machine from MachineDeployments. - Replicas: 1, - }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) - - By("Checking the MachinePool finished scaling down to zero") - framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ - Getter: input.BootstrapClusterProxy.GetClient(), - MachinePool: clusterResources.MachinePools[0], - Replicas: mdReplicas, - WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), - }) - - By("Scaling the MachinePool scale up deployment to 2") - framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ - ClusterProxy: workloadClusterProxy, - // Set replicas to 2, because we still have 1 Machine from MachineDeployments. - Replicas: mdReplicas + 1, - }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) - - By("Checking the MachineDeployment finished scaling up") - framework.AssertMachinePoolReplicas(ctx, framework.AssertMachinePoolReplicasInput{ - Getter: input.BootstrapClusterProxy.GetClient(), - MachinePool: clusterResources.MachinePools[0], - Replicas: 1, - WaitForMachinePool: input.E2EConfig.GetIntervals(specName, "wait-controllers"), - }) - } } By("PASSED!") From c5a75afdd96a7dbdc9b281f8c3c10f7d4a8aa043 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 23 Aug 2024 16:51:16 +0200 Subject: [PATCH 10/10] review fixes --- test/e2e/autoscaler.go | 8 ++++++-- test/framework/autoscaler_helpers.go | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index 9edf8c089949..5092dba69ce5 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -260,7 +260,9 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) By("Scaling the MachineDeployment scale up deployment to zero") framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ ClusterProxy: workloadClusterProxy, - Replicas: mpOriginalReplicas + 0, + // We need to sum up the expected number of MachineDeployment replicas and the current + // number of MachinePool replicas because otherwise the pods get scheduled on the MachinePool nodes. + Replicas: mpOriginalReplicas + 0, }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) By("Checking the MachineDeployment finished scaling down to zero") @@ -274,7 +276,9 @@ func AutoscalerSpec(ctx context.Context, inputGetter func() AutoscalerSpecInput) By("Scaling the MachineDeployment scale up deployment to 1") framework.ScaleScaleUpDeploymentAndWait(ctx, framework.ScaleScaleUpDeploymentAndWaitInput{ ClusterProxy: workloadClusterProxy, - Replicas: mpOriginalReplicas + 1, + // We need to sum up the expected number of MachineDeployment replicas and the current + // number of MachinePool replicas because otherwise the pods get scheduled on the MachinePool nodes. + Replicas: mpOriginalReplicas + 1, }, input.E2EConfig.GetIntervals(specName, "wait-autoscaler")...) By("Checking the MachineDeployment finished scaling up") diff --git a/test/framework/autoscaler_helpers.go b/test/framework/autoscaler_helpers.go index 78d991350c69..28b0f29a01df 100644 --- a/test/framework/autoscaler_helpers.go +++ b/test/framework/autoscaler_helpers.go @@ -268,7 +268,7 @@ type ScaleScaleUpDeploymentAndWaitInput struct { Replicas int32 } -// ScaleScaleUpDeploymentAndWait scales the scale up deployment to a given value and waits for it to be deleted. +// ScaleScaleUpDeploymentAndWait scales the scale up deployment to a given value and waits for it to become ready. func ScaleScaleUpDeploymentAndWait(ctx context.Context, input ScaleScaleUpDeploymentAndWaitInput, intervals ...interface{}) { By("Retrieving the scale up deployment") deployment := &appsv1.Deployment{}