Skip to content

Commit 0d61f5b

Browse files
authored
Merge pull request #5215 from damdo/fix-eventually
🌱 test: e2e: make managed suite more robust to errors with Eventually()
2 parents cc12c52 + 6cc4c6c commit 0d61f5b

File tree

5 files changed

+89
-45
lines changed

5 files changed

+89
-45
lines changed

test/e2e/data/e2e_eks_conf.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ variables:
137137
GC_WORKLOAD: "../../data/gcworkload.yaml"
138138

139139
intervals:
140+
default/wait-client-request: ["5m", "5s"]
140141
default/wait-cluster: ["40m", "10s"]
141142
default/wait-control-plane: ["35m", "10s"]
142143
default/wait-worker-nodes: ["30m", "10s"]

test/e2e/suites/managed/addon.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,9 @@ func CheckAddonExistsSpec(ctx context.Context, inputGetter func() CheckAddonExis
6262

6363
By(fmt.Sprintf("Getting control plane: %s", controlPlaneName))
6464
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
65-
err := mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
66-
Expect(err).ToNot(HaveOccurred())
65+
Eventually(func() error {
66+
return mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
67+
}, input.E2EConfig.GetIntervals("", "wait-client-request")...).Should(Succeed(), "eventually failed trying to get the AWSManagedControlPlane")
6768

6869
By(fmt.Sprintf("Checking EKS addon %s is installed on cluster %s and is active", input.AddonName, input.ClusterName))
6970
waitForEKSAddonToHaveStatus(waitForEKSAddonToHaveStatusInput{

test/e2e/suites/managed/aws_node_env.go

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ package managed
2121

2222
import (
2323
"context"
24+
"errors"
2425
"fmt"
2526

2627
"github.com/aws/aws-sdk-go/aws/client"
@@ -57,27 +58,34 @@ func CheckAwsNodeEnvVarsSet(ctx context.Context, inputGetter func() UpdateAwsNod
5758

5859
By(fmt.Sprintf("Getting control plane: %s", controlPlaneName))
5960
controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
60-
err := mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
61-
Expect(err).ToNot(HaveOccurred())
61+
Eventually(func() error {
62+
return mgmtClient.Get(ctx, crclient.ObjectKey{Namespace: input.Namespace.Name, Name: controlPlaneName}, controlPlane)
63+
}, input.E2EConfig.GetIntervals("", "wait-client-request")...).Should(Succeed(), "eventually failed trying to get the AWSManagedControlPlane")
6264

6365
By(fmt.Sprintf("Checking environment variables are set on AWSManagedControlPlane: %s", controlPlaneName))
6466
Expect(controlPlane.Spec.VpcCni.Env).NotTo(BeNil())
6567
Expect(len(controlPlane.Spec.VpcCni.Env)).Should(BeNumerically(">", 1))
6668

6769
By("Checking if aws-node has been updated with the defined environment variables on the workload cluster")
6870
daemonSet := &appsv1.DaemonSet{}
69-
7071
clusterClient := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName).GetClient()
71-
err = clusterClient.Get(ctx, crclient.ObjectKey{Namespace: "kube-system", Name: "aws-node"}, daemonSet)
72-
Expect(err).ToNot(HaveOccurred())
73-
74-
for _, container := range daemonSet.Spec.Template.Spec.Containers {
75-
if container.Name == "aws-node" {
76-
Expect(matchEnvVar(container.Env, corev1.EnvVar{Name: "FOO", Value: "BAR"})).Should(BeTrue())
77-
Expect(matchEnvVar(container.Env, corev1.EnvVar{Name: "ENABLE_PREFIX_DELEGATION", Value: "true"})).Should(BeTrue())
78-
break
72+
73+
Eventually(func() error {
74+
if err := clusterClient.Get(ctx, crclient.ObjectKey{Namespace: "kube-system", Name: "aws-node"}, daemonSet); err != nil {
75+
return fmt.Errorf("unable to get aws-node: %w", err)
7976
}
80-
}
77+
78+
for _, container := range daemonSet.Spec.Template.Spec.Containers {
79+
if container.Name == "aws-node" {
80+
if matchEnvVar(container.Env, corev1.EnvVar{Name: "FOO", Value: "BAR"}) &&
81+
matchEnvVar(container.Env, corev1.EnvVar{Name: "ENABLE_PREFIX_DELEGATION", Value: "true"}) {
82+
return nil
83+
}
84+
}
85+
}
86+
87+
return errors.New("unable to find the expected environment variables on the aws-node DaemonSet's container")
88+
}, input.E2EConfig.GetIntervals("", "wait-client-request")...).Should(Succeed(), "should have been able to find the expected aws-node DaemonSet")
8189
}
8290

8391
func matchEnvVar(s []corev1.EnvVar, ev corev1.EnvVar) bool {

test/e2e/suites/managed/eks_test.go

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ package managed
2222
import (
2323
"context"
2424
"fmt"
25-
"time"
2625

2726
"github.com/onsi/ginkgo/v2"
2827
. "github.com/onsi/gomega"
@@ -75,22 +74,15 @@ var _ = ginkgo.Describe("[managed] [general] EKS cluster tests", func() {
7574
})
7675

7776
ginkgo.By("should set environment variables on the aws-node daemonset")
78-
Eventually(func() error {
79-
defer ginkgo.GinkgoRecover()
80-
CheckAwsNodeEnvVarsSet(ctx, func() UpdateAwsNodeVersionSpecInput {
81-
return UpdateAwsNodeVersionSpecInput{
82-
E2EConfig: e2eCtx.E2EConfig,
83-
BootstrapClusterProxy: e2eCtx.Environment.BootstrapClusterProxy,
84-
AWSSession: e2eCtx.BootstrapUserAWSSession,
85-
Namespace: namespace,
86-
ClusterName: clusterName,
87-
}
88-
})
89-
return nil
90-
}).WithTimeout(5*time.Minute).WithPolling(10*time.Second).WithContext(ctx).Should(
91-
Succeed(),
92-
"Failed to verify AWS Node environment variables after 5 minutes of retries",
93-
)
77+
CheckAwsNodeEnvVarsSet(ctx, func() UpdateAwsNodeVersionSpecInput {
78+
return UpdateAwsNodeVersionSpecInput{
79+
E2EConfig: e2eCtx.E2EConfig,
80+
BootstrapClusterProxy: e2eCtx.Environment.BootstrapClusterProxy,
81+
AWSSession: e2eCtx.BootstrapUserAWSSession,
82+
Namespace: namespace,
83+
ClusterName: clusterName,
84+
}
85+
})
9486

9587
ginkgo.By("should have the VPC CNI installed")
9688
CheckAddonExistsSpec(ctx, func() CheckAddonExistsSpecInput {

test/e2e/suites/managed/helpers.go

Lines changed: 56 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
. "github.com/onsi/gomega"
3333
corev1 "k8s.io/api/core/v1"
3434
apimachinerytypes "k8s.io/apimachinery/pkg/types"
35+
"k8s.io/utils/ptr"
3536
crclient "sigs.k8s.io/controller-runtime/pkg/client"
3637

3738
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
@@ -51,6 +52,11 @@ const (
5152
EKSControlPlaneOnlyLegacyFlavor = "eks-control-plane-only-legacy"
5253
)
5354

55+
const (
56+
clientRequestTimeout = 2 * time.Minute
57+
clientRequestCheckInterval = 5 * time.Second
58+
)
59+
5460
type DefaultConfigClusterFn func(clusterName, namespace string) clusterctl.ConfigClusterInput
5561

5662
func getEKSClusterName(namespace, clusterName string) string {
@@ -74,14 +80,19 @@ func getASGName(clusterName string) string {
7480
}
7581

7682
func verifyClusterActiveAndOwned(eksClusterName string, sess client.ConfigProvider) {
77-
cluster, err := getEKSCluster(eksClusterName, sess)
78-
Expect(err).NotTo(HaveOccurred())
83+
var (
84+
cluster *eks.Cluster
85+
err error
86+
)
87+
Eventually(func() error {
88+
cluster, err = getEKSCluster(eksClusterName, sess)
89+
return err
90+
}, clientRequestTimeout, clientRequestCheckInterval).Should(Succeed(), fmt.Sprintf("eventually failed trying to get EKS Cluster %q", eksClusterName))
7991

8092
tagName := infrav1.ClusterTagKey(eksClusterName)
8193
tagValue, ok := cluster.Tags[tagName]
8294
Expect(ok).To(BeTrue(), "expecting the cluster owned tag to exist")
8395
Expect(*tagValue).To(BeEquivalentTo(string(infrav1.ResourceLifecycleOwned)))
84-
8596
Expect(*cluster.Status).To(BeEquivalentTo(eks.ClusterStatusActive))
8697
}
8798

@@ -102,6 +113,7 @@ func getEKSClusterAddon(eksClusterName, addonName string, sess client.ConfigProv
102113
AddonName: &addonName,
103114
ClusterName: &eksClusterName,
104115
}
116+
105117
describeOutput, err := eksClient.DescribeAddon(describeInput)
106118
if err != nil {
107119
return nil, fmt.Errorf("describing eks addon %s: %w", addonName, err)
@@ -112,16 +124,16 @@ func getEKSClusterAddon(eksClusterName, addonName string, sess client.ConfigProv
112124

113125
func verifySecretExists(ctx context.Context, secretName, namespace string, k8sclient crclient.Client) {
114126
secret := &corev1.Secret{}
115-
err := k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: secretName, Namespace: namespace}, secret)
116-
117-
Expect(err).ShouldNot(HaveOccurred())
127+
Eventually(func() error {
128+
return k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: secretName, Namespace: namespace}, secret)
129+
}, clientRequestTimeout, clientRequestCheckInterval).Should(Succeed(), fmt.Sprintf("eventually failed trying to verify Secret %q exists", secretName))
118130
}
119131

120132
func verifyConfigMapExists(ctx context.Context, name, namespace string, k8sclient crclient.Client) {
121133
cm := &corev1.ConfigMap{}
122134
Eventually(func() error {
123135
return k8sclient.Get(ctx, apimachinerytypes.NamespacedName{Name: name, Namespace: namespace}, cm)
124-
}, 2*time.Minute, 5*time.Second).Should(Succeed())
136+
}, clientRequestTimeout, clientRequestCheckInterval).Should(Succeed(), fmt.Sprintf("eventually failed trying to verify ConfigMap %q exists", name))
125137
}
126138

127139
func VerifyRoleExistsAndOwned(roleName string, eksClusterName string, checkOwned bool, sess client.ConfigProvider) {
@@ -130,8 +142,15 @@ func VerifyRoleExistsAndOwned(roleName string, eksClusterName string, checkOwned
130142
RoleName: aws.String(roleName),
131143
}
132144

133-
output, err := iamClient.GetRole(input)
134-
Expect(err).ShouldNot(HaveOccurred())
145+
var (
146+
output *iam.GetRoleOutput
147+
err error
148+
)
149+
150+
Eventually(func() error {
151+
output, err = iamClient.GetRole(input)
152+
return err
153+
}, clientRequestTimeout, clientRequestCheckInterval).Should(Succeed(), fmt.Sprintf("eventually failed trying to get IAM Role %q", roleName))
135154

136155
if checkOwned {
137156
found := false
@@ -152,9 +171,24 @@ func verifyManagedNodeGroup(eksClusterName, nodeGroupName string, checkOwned boo
152171
ClusterName: aws.String(eksClusterName),
153172
NodegroupName: aws.String(nodeGroupName),
154173
}
155-
result, err := eksClient.DescribeNodegroup(input)
156-
Expect(err).NotTo(HaveOccurred())
157-
Expect(*result.Nodegroup.Status).To(BeEquivalentTo(eks.NodegroupStatusActive))
174+
var (
175+
result *eks.DescribeNodegroupOutput
176+
err error
177+
)
178+
179+
Eventually(func() error {
180+
result, err = eksClient.DescribeNodegroup(input)
181+
if err != nil {
182+
return fmt.Errorf("error describing nodegroup: %w", err)
183+
}
184+
185+
nodeGroupStatus := ptr.Deref(result.Nodegroup.Status, "")
186+
if nodeGroupStatus != eks.NodegroupStatusActive {
187+
return fmt.Errorf("expected nodegroup.Status to be %q, was %q instead", eks.NodegroupStatusActive, nodeGroupStatus)
188+
}
189+
190+
return nil
191+
}, clientRequestTimeout, clientRequestCheckInterval).Should(Succeed(), "eventually failed trying to describe EKS Node group")
158192

159193
if checkOwned {
160194
tagName := infrav1.ClusterAWSCloudProviderTagKey(eksClusterName)
@@ -172,8 +206,16 @@ func verifyASG(eksClusterName, asgName string, checkOwned bool, sess client.Conf
172206
},
173207
}
174208

175-
result, err := asgClient.DescribeAutoScalingGroups(input)
176-
Expect(err).NotTo(HaveOccurred())
209+
var (
210+
result *autoscaling.DescribeAutoScalingGroupsOutput
211+
err error
212+
)
213+
214+
Eventually(func() error {
215+
result, err = asgClient.DescribeAutoScalingGroups(input)
216+
return err
217+
}, clientRequestTimeout, clientRequestCheckInterval).Should(Succeed())
218+
177219
for _, instance := range result.AutoScalingGroups[0].Instances {
178220
Expect(*instance.LifecycleState).To(Equal("InService"), "expecting the instance in service")
179221
}

0 commit comments

Comments
 (0)