@@ -30,10 +30,10 @@ import (
30
30
"k8s.io/utils/ptr"
31
31
32
32
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
33
- controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
34
33
"sigs.k8s.io/cluster-api/test/framework"
35
34
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
36
35
"sigs.k8s.io/cluster-api/util"
36
+ "sigs.k8s.io/cluster-api/util/conditions"
37
37
)
38
38
39
39
// NodeDrainTimeoutSpecInput is the input for NodeDrainTimeoutSpec.
@@ -52,10 +52,8 @@ type NodeDrainTimeoutSpecInput struct {
52
52
// able to identify the default.
53
53
InfrastructureProvider * string
54
54
55
- // Flavor, if specified, must refer to a template that contains
56
- // a KubeadmControlPlane resource with spec.machineTemplate.nodeDrainTimeout
57
- // configured and a MachineDeployment resource that has
58
- // spec.template.spec.nodeDrainTimeout configured.
55
+ // Flavor, if specified, must refer to a template that uses a Cluster with ClusterClass.
56
+ // The cluster must use a KubeadmControlPlane and a MachineDeployment.
59
57
// If not specified, "node-drain" is used.
60
58
Flavor * string
61
59
@@ -66,13 +64,11 @@ type NodeDrainTimeoutSpecInput struct {
66
64
67
65
func NodeDrainTimeoutSpec (ctx context.Context , inputGetter func () NodeDrainTimeoutSpecInput ) {
68
66
var (
69
- specName = "node-drain"
70
- input NodeDrainTimeoutSpecInput
71
- namespace * corev1.Namespace
72
- cancelWatches context.CancelFunc
73
- clusterResources * clusterctl.ApplyClusterTemplateAndWaitResult
74
- machineDeployments []* clusterv1.MachineDeployment
75
- controlplane * controlplanev1.KubeadmControlPlane
67
+ specName = "node-drain"
68
+ input NodeDrainTimeoutSpecInput
69
+ namespace * corev1.Namespace
70
+ cancelWatches context.CancelFunc
71
+ clusterResources * clusterctl.ApplyClusterTemplateAndWaitResult
76
72
)
77
73
78
74
BeforeEach (func () {
@@ -97,6 +93,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
97
93
if input .InfrastructureProvider != nil {
98
94
infrastructureProvider = * input .InfrastructureProvider
99
95
}
96
+
100
97
controlPlaneReplicas := 3
101
98
clusterctl .ApplyClusterTemplateAndWait (ctx , clusterctl.ApplyClusterTemplateAndWaitInput {
102
99
ClusterProxy : input .BootstrapClusterProxy ,
@@ -118,52 +115,167 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
118
115
WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
119
116
}, clusterResources )
120
117
cluster := clusterResources .Cluster
121
- controlplane = clusterResources .ControlPlane
122
- machineDeployments = clusterResources .MachineDeployments
118
+ controlplane : = clusterResources .ControlPlane
119
+ machineDeployments : = clusterResources .MachineDeployments
123
120
Expect (machineDeployments [0 ].Spec .Replicas ).To (Equal (ptr.To [int32 ](1 )))
124
121
125
- By ("Add a deployment with unevictable pods and podDisruptionBudget to the workload cluster. The deployed pods cannot be evicted in the node draining process." )
122
+ // This label will be added to all Machines so we can later create the unevictable Pods on the right Nodes.
123
+ nodeOwnerLabelKey := "owner.node.cluster.x-k8s.io"
124
+
125
+ By ("Ensure Node label is set & NodeDrainTimeout is set to 0 (wait forever) on ControlPlane and MachineDeployment topologies" )
126
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
127
+ ClusterProxy : input .BootstrapClusterProxy ,
128
+ Cluster : cluster ,
129
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
130
+ topology .NodeDrainTimeout = & metav1.Duration {Duration : time .Duration (0 )}
131
+ if topology .Metadata .Labels == nil {
132
+ topology .Metadata .Labels = map [string ]string {}
133
+ }
134
+ topology .Metadata .Labels [nodeOwnerLabelKey ] = "KubeadmControlPlane-" + controlplane .Name
135
+ },
136
+ WaitForControlPlane : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
137
+ })
138
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
139
+ ClusterProxy : input .BootstrapClusterProxy ,
140
+ Cluster : cluster ,
141
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
142
+ topology .NodeDrainTimeout = & metav1.Duration {Duration : time .Duration (0 )}
143
+ if topology .Metadata .Labels == nil {
144
+ topology .Metadata .Labels = map [string ]string {}
145
+ }
146
+ for _ , md := range machineDeployments {
147
+ if md .Labels [clusterv1 .ClusterTopologyMachineDeploymentNameLabel ] == topology .Name {
148
+ topology .Metadata .Labels [nodeOwnerLabelKey ] = "MachineDeployment-" + md .Name
149
+ }
150
+ }
151
+ },
152
+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
153
+ })
154
+
126
155
workloadClusterProxy := input .BootstrapClusterProxy .GetWorkloadCluster (ctx , cluster .Namespace , cluster .Name )
156
+ By ("Deploy Deployment with unevictable Pods on control plane Nodes." )
157
+
158
+ cpDeploymentAndPDBName := fmt .Sprintf ("%s-%s" , "unevictable-pod-cp" , util .RandomString (3 ))
127
159
framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
128
160
WorkloadClusterProxy : workloadClusterProxy ,
129
- DeploymentName : fmt .Sprintf ("%s-%s" , "unevictable-pod" , util .RandomString (3 )),
161
+ ControlPlane : controlplane ,
162
+ DeploymentName : cpDeploymentAndPDBName ,
130
163
Namespace : namespace .Name + "-unevictable-workload" ,
164
+ NodeSelector : map [string ]string {nodeOwnerLabelKey : "KubeadmControlPlane-" + controlplane .Name },
131
165
WaitForDeploymentAvailableInterval : input .E2EConfig .GetIntervals (specName , "wait-deployment-available" ),
132
166
})
133
-
134
- By ("Scale the machinedeployment down to zero. If we didn't have the NodeDrainTimeout duration, the node drain process would block this operator." )
135
- // Because all the machines of a machinedeployment can be deleted at the same time, so we only prepare the interval for 1 replica.
136
- nodeDrainTimeoutMachineDeploymentInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), machineDeployments [0 ].Spec .Template .Spec .NodeDrainTimeout , 1 )
167
+ By ("Deploy Deployment with unevictable Pods on MachineDeployment Nodes." )
168
+ mdDeploymentAndPDBNames := map [string ]string {}
137
169
for _ , md := range machineDeployments {
138
- framework .ScaleAndWaitMachineDeployment (ctx , framework.ScaleAndWaitMachineDeploymentInput {
139
- ClusterProxy : input .BootstrapClusterProxy ,
140
- Cluster : cluster ,
141
- MachineDeployment : md ,
142
- WaitForMachineDeployments : nodeDrainTimeoutMachineDeploymentInterval ,
143
- Replicas : 0 ,
170
+ mdDeploymentAndPDBNames [md .Name ] = fmt .Sprintf ("%s-%s" , "unevictable-pod-md" , util .RandomString (3 ))
171
+ framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
172
+ WorkloadClusterProxy : workloadClusterProxy ,
173
+ MachineDeployment : md ,
174
+ DeploymentName : mdDeploymentAndPDBNames [md .Name ],
175
+ Namespace : namespace .Name + "-unevictable-workload" ,
176
+ NodeSelector : map [string ]string {nodeOwnerLabelKey : "MachineDeployment-" + md .Name },
177
+ WaitForDeploymentAvailableInterval : input .E2EConfig .GetIntervals (specName , "wait-deployment-available" ),
144
178
})
145
179
}
146
180
147
- By ("Deploy deployment with unevictable pods on control plane nodes." )
148
- framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
149
- WorkloadClusterProxy : workloadClusterProxy ,
150
- ControlPlane : controlplane ,
151
- DeploymentName : fmt .Sprintf ("%s-%s" , "unevictable-pod" , util .RandomString (3 )),
152
- Namespace : namespace .Name + "-unevictable-workload" ,
153
- WaitForDeploymentAvailableInterval : input .E2EConfig .GetIntervals (specName , "wait-deployment-available" ),
181
+ By ("Scale down the control plane to 1 and MachineDeployments to 0." )
182
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
183
+ ClusterProxy : input .BootstrapClusterProxy ,
184
+ Cluster : cluster ,
185
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
186
+ topology .Replicas = ptr.To [int32 ](1 )
187
+ },
188
+ WaitForControlPlane : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
189
+ })
190
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
191
+ ClusterProxy : input .BootstrapClusterProxy ,
192
+ Cluster : cluster ,
193
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
194
+ topology .Replicas = ptr.To [int32 ](0 )
195
+ },
196
+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
154
197
})
155
198
156
- By ("Scale down the controlplane of the workload cluster and make sure that nodes running workload can be deleted even the draining process is blocked." )
157
- // When we scale down the KCP, controlplane machines are by default deleted one by one, so it requires more time.
158
- nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), controlplane .Spec .MachineTemplate .NodeDrainTimeout , controlPlaneReplicas )
159
- framework .ScaleAndWaitControlPlane (ctx , framework.ScaleAndWaitControlPlaneInput {
160
- ClusterProxy : input .BootstrapClusterProxy ,
161
- Cluster : cluster ,
162
- ControlPlane : controlplane ,
163
- Replicas : 1 ,
164
- WaitForControlPlane : nodeDrainTimeoutKCPInterval ,
199
+ By ("Verify Node drains for control plane and MachineDeployment Machines are blocked" )
200
+ Eventually (func (g Gomega ) {
201
+ controlPlaneMachines := framework .GetControlPlaneMachinesByCluster (ctx , framework.GetControlPlaneMachinesByClusterInput {
202
+ Lister : input .BootstrapClusterProxy .GetClient (),
203
+ ClusterName : cluster .Name ,
204
+ Namespace : cluster .Namespace ,
205
+ })
206
+ var condition * clusterv1.Condition
207
+ for _ , machine := range controlPlaneMachines {
208
+ condition = conditions .Get (& machine , clusterv1 .DrainingSucceededCondition )
209
+ if condition != nil {
210
+ // We only expect to find the condition on one Machine (as KCP will only try to drain one Machine at a time)
211
+ break
212
+ }
213
+ }
214
+ g .Expect (condition ).ToNot (BeNil ())
215
+ g .Expect (condition .Status ).To (Equal (corev1 .ConditionFalse ))
216
+ g .Expect (condition .Message ).To (ContainSubstring (fmt .Sprintf ("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs" , cpDeploymentAndPDBName )))
217
+ }, input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )... ).Should (Succeed ())
218
+ for _ , md := range machineDeployments {
219
+ Eventually (func (g Gomega ) {
220
+ machines := framework .GetMachinesByMachineDeployments (ctx , framework.GetMachinesByMachineDeploymentsInput {
221
+ Lister : input .BootstrapClusterProxy .GetClient (),
222
+ ClusterName : cluster .Name ,
223
+ Namespace : cluster .Namespace ,
224
+ MachineDeployment : * md ,
225
+ })
226
+ g .Expect (machines ).To (HaveLen (1 ))
227
+ condition := conditions .Get (& machines [0 ], clusterv1 .DrainingSucceededCondition )
228
+ g .Expect (condition ).ToNot (BeNil ())
229
+ g .Expect (condition .Status ).To (Equal (corev1 .ConditionFalse ))
230
+ g .Expect (condition .Message ).To (ContainSubstring (fmt .Sprintf ("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs" , mdDeploymentAndPDBNames [md .Name ])))
231
+ }, input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )... ).Should (Succeed ())
232
+ }
233
+
234
+ By ("Set NodeDrainTimeout to 1s to unblock Node drain" )
235
+ // Note: This also verifies that KCP & MachineDeployments are still propagating changes to NodeDrainTimeout down to
236
+ // Machines that already have a deletionTimestamp.
237
+ drainTimeout := & metav1.Duration {Duration : time .Duration (1 ) * time .Second }
238
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
239
+ ClusterProxy : input .BootstrapClusterProxy ,
240
+ Cluster : cluster ,
241
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
242
+ topology .NodeDrainTimeout = drainTimeout
243
+ },
244
+ WaitForControlPlane : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
245
+ })
246
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
247
+ ClusterProxy : input .BootstrapClusterProxy ,
248
+ Cluster : cluster ,
249
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
250
+ topology .NodeDrainTimeout = drainTimeout
251
+ },
252
+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
165
253
})
166
254
255
+ By ("Verify Node drains were unblocked" )
256
+ // When we scale down the KCP, controlplane machines are deleted one by one, so it requires more time
257
+ // MD Machine deletion is done in parallel and will be faster.
258
+ nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), drainTimeout , controlPlaneReplicas )
259
+ Eventually (func (g Gomega ) {
260
+ // When all drains complete we only have 1 control plane & 0 MD replicas left.
261
+ controlPlaneMachines := framework .GetControlPlaneMachinesByCluster (ctx , framework.GetControlPlaneMachinesByClusterInput {
262
+ Lister : input .BootstrapClusterProxy .GetClient (),
263
+ ClusterName : cluster .Name ,
264
+ Namespace : cluster .Namespace ,
265
+ })
266
+ g .Expect (controlPlaneMachines ).To (HaveLen (1 ))
267
+
268
+ for _ , md := range machineDeployments {
269
+ machines := framework .GetMachinesByMachineDeployments (ctx , framework.GetMachinesByMachineDeploymentsInput {
270
+ Lister : input .BootstrapClusterProxy .GetClient (),
271
+ ClusterName : cluster .Name ,
272
+ Namespace : cluster .Namespace ,
273
+ MachineDeployment : * md ,
274
+ })
275
+ g .Expect (machines ).To (BeEmpty ())
276
+ }
277
+ }, nodeDrainTimeoutKCPInterval ... ).Should (Succeed ())
278
+
167
279
By ("PASSED!" )
168
280
})
169
281
0 commit comments