@@ -30,10 +30,10 @@ import (
30
30
"k8s.io/utils/ptr"
31
31
32
32
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
33
- controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
34
33
"sigs.k8s.io/cluster-api/test/framework"
35
34
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
36
35
"sigs.k8s.io/cluster-api/util"
36
+ "sigs.k8s.io/cluster-api/util/conditions"
37
37
)
38
38
39
39
// NodeDrainTimeoutSpecInput is the input for NodeDrainTimeoutSpec.
@@ -66,13 +66,11 @@ type NodeDrainTimeoutSpecInput struct {
66
66
67
67
func NodeDrainTimeoutSpec (ctx context.Context , inputGetter func () NodeDrainTimeoutSpecInput ) {
68
68
var (
69
- specName = "node-drain"
70
- input NodeDrainTimeoutSpecInput
71
- namespace * corev1.Namespace
72
- cancelWatches context.CancelFunc
73
- clusterResources * clusterctl.ApplyClusterTemplateAndWaitResult
74
- machineDeployments []* clusterv1.MachineDeployment
75
- controlplane * controlplanev1.KubeadmControlPlane
69
+ specName = "node-drain"
70
+ input NodeDrainTimeoutSpecInput
71
+ namespace * corev1.Namespace
72
+ cancelWatches context.CancelFunc
73
+ clusterResources * clusterctl.ApplyClusterTemplateAndWaitResult
76
74
)
77
75
78
76
BeforeEach (func () {
@@ -97,6 +95,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
97
95
if input .InfrastructureProvider != nil {
98
96
infrastructureProvider = * input .InfrastructureProvider
99
97
}
98
+
100
99
controlPlaneReplicas := 3
101
100
clusterctl .ApplyClusterTemplateAndWait (ctx , clusterctl.ApplyClusterTemplateAndWaitInput {
102
101
ClusterProxy : input .BootstrapClusterProxy ,
@@ -118,52 +117,162 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
118
117
WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
119
118
}, clusterResources )
120
119
cluster := clusterResources .Cluster
121
- controlplane = clusterResources .ControlPlane
122
- machineDeployments = clusterResources .MachineDeployments
120
+ controlplane : = clusterResources .ControlPlane
121
+ machineDeployments : = clusterResources .MachineDeployments
123
122
Expect (machineDeployments [0 ].Spec .Replicas ).To (Equal (ptr.To [int32 ](1 )))
124
123
125
- By ("Add a deployment with unevictable pods and podDisruptionBudget to the workload cluster. The deployed pods cannot be evicted in the node draining process." )
124
+ // This label will be added to all Machines so we can later create the unevictable Pods on the right Nodes.
125
+ nodeLabelKey := "owner.node.cluster.x-k8s.io"
126
+
127
+ By ("Ensure Node label is set & NodeDrainTimeout is set to 0 (wait forever) on ControlPlane and MachineDeployment topologies" )
128
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
129
+ ClusterProxy : input .BootstrapClusterProxy ,
130
+ Cluster : cluster ,
131
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
132
+ topology .NodeDrainTimeout = & metav1.Duration {Duration : time .Duration (0 )}
133
+ if topology .Metadata .Labels == nil {
134
+ topology .Metadata .Labels = map [string ]string {}
135
+ }
136
+ topology .Metadata .Labels [nodeLabelKey ] = "KubeadmControlPlane-" + controlplane .Name
137
+ },
138
+ WaitForControlPlane : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
139
+ })
140
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
141
+ ClusterProxy : input .BootstrapClusterProxy ,
142
+ Cluster : cluster ,
143
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
144
+ topology .NodeDrainTimeout = & metav1.Duration {Duration : time .Duration (0 )}
145
+ if topology .Metadata .Labels == nil {
146
+ topology .Metadata .Labels = map [string ]string {}
147
+ }
148
+ for _ , md := range machineDeployments {
149
+ if md .Labels [clusterv1 .ClusterTopologyMachineDeploymentNameLabel ] == topology .Name {
150
+ topology .Metadata .Labels [nodeLabelKey ] = "MachineDeployment-" + md .Name
151
+ }
152
+ }
153
+ },
154
+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
155
+ })
156
+
126
157
workloadClusterProxy := input .BootstrapClusterProxy .GetWorkloadCluster (ctx , cluster .Namespace , cluster .Name )
158
+ By ("Deploy Deployment with unevictable pods on control plane Nodes." )
127
159
framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
128
160
WorkloadClusterProxy : workloadClusterProxy ,
129
- DeploymentName : fmt .Sprintf ("%s-%s" , "unevictable-pod" , util .RandomString (3 )),
161
+ ControlPlane : controlplane ,
162
+ DeploymentName : fmt .Sprintf ("%s-%s" , "unevictable-pod-cp" , util .RandomString (3 )),
130
163
Namespace : namespace .Name + "-unevictable-workload" ,
164
+ NodeSelector : map [string ]string {nodeLabelKey : "KubeadmControlPlane-" + controlplane .Name },
131
165
WaitForDeploymentAvailableInterval : input .E2EConfig .GetIntervals (specName , "wait-deployment-available" ),
132
166
})
133
-
134
- By ("Scale the machinedeployment down to zero. If we didn't have the NodeDrainTimeout duration, the node drain process would block this operator." )
135
- // Because all the machines of a machinedeployment can be deleted at the same time, so we only prepare the interval for 1 replica.
136
- nodeDrainTimeoutMachineDeploymentInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), machineDeployments [0 ].Spec .Template .Spec .NodeDrainTimeout , 1 )
167
+ By ("Deploy Deployment with unevictable pods on MachineDeployment Nodes." )
137
168
for _ , md := range machineDeployments {
138
- framework .ScaleAndWaitMachineDeployment (ctx , framework.ScaleAndWaitMachineDeploymentInput {
139
- ClusterProxy : input .BootstrapClusterProxy ,
140
- Cluster : cluster ,
141
- MachineDeployment : md ,
142
- WaitForMachineDeployments : nodeDrainTimeoutMachineDeploymentInterval ,
143
- Replicas : 0 ,
169
+ framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
170
+ WorkloadClusterProxy : workloadClusterProxy ,
171
+ MachineDeployment : md ,
172
+ DeploymentName : fmt .Sprintf ("%s-%s" , "unevictable-pod-md" , util .RandomString (3 )),
173
+ Namespace : namespace .Name + "-unevictable-workload" ,
174
+ NodeSelector : map [string ]string {nodeLabelKey : "MachineDeployment-" + md .Name },
175
+ WaitForDeploymentAvailableInterval : input .E2EConfig .GetIntervals (specName , "wait-deployment-available" ),
144
176
})
145
177
}
146
178
147
- By ("Deploy deployment with unevictable pods on control plane nodes." )
148
- framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
149
- WorkloadClusterProxy : workloadClusterProxy ,
150
- ControlPlane : controlplane ,
151
- DeploymentName : fmt .Sprintf ("%s-%s" , "unevictable-pod" , util .RandomString (3 )),
152
- Namespace : namespace .Name + "-unevictable-workload" ,
153
- WaitForDeploymentAvailableInterval : input .E2EConfig .GetIntervals (specName , "wait-deployment-available" ),
179
+ By ("Scale down the control plane to 1 and MachineDeployments to 0." )
180
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
181
+ ClusterProxy : input .BootstrapClusterProxy ,
182
+ Cluster : cluster ,
183
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
184
+ topology .Replicas = ptr.To [int32 ](1 )
185
+ },
186
+ WaitForControlPlane : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
154
187
})
188
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
189
+ ClusterProxy : input .BootstrapClusterProxy ,
190
+ Cluster : cluster ,
191
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
192
+ topology .Replicas = ptr.To [int32 ](0 )
193
+ },
194
+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
195
+ })
196
+
197
+ By ("Verify Node drains for control plane and MachineDeployment Machines are blocked" )
198
+ Eventually (func (g Gomega ) {
199
+ controlPlaneMachines := framework .GetControlPlaneMachinesByCluster (ctx , framework.GetControlPlaneMachinesByClusterInput {
200
+ Lister : input .BootstrapClusterProxy .GetClient (),
201
+ ClusterName : cluster .Name ,
202
+ Namespace : cluster .Namespace ,
203
+ })
204
+ var condition * clusterv1.Condition
205
+ for _ , machine := range controlPlaneMachines {
206
+ condition = conditions .Get (& machine , clusterv1 .DrainingSucceededCondition )
207
+ if condition != nil {
208
+ // We only expect to find the condition on one Machine (as KCP will only try to drain one Machine)
209
+ break
210
+ }
211
+ }
212
+ g .Expect (condition ).ToNot (BeNil ())
213
+ g .Expect (condition .Message ).To (ContainSubstring ("Cannot evict pod as it would violate the pod's disruption budget" ))
214
+
215
+ }, input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )... ).Should (Succeed ())
216
+ for _ , md := range machineDeployments {
217
+ Eventually (func (g Gomega ) {
218
+ machines := framework .GetMachinesByMachineDeployments (ctx , framework.GetMachinesByMachineDeploymentsInput {
219
+ Lister : input .BootstrapClusterProxy .GetClient (),
220
+ ClusterName : cluster .Name ,
221
+ Namespace : cluster .Namespace ,
222
+ MachineDeployment : * md ,
223
+ })
224
+ g .Expect (machines ).To (HaveLen (1 ))
225
+ condition := conditions .Get (& machines [0 ], clusterv1 .DrainingSucceededCondition )
226
+ g .Expect (condition ).ToNot (BeNil ())
227
+ g .Expect (condition .Message ).To (ContainSubstring ("Cannot evict pod as it would violate the pod's disruption budget" ))
228
+ }, input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )... ).Should (Succeed ())
229
+ }
155
230
156
- By ("Scale down the controlplane of the workload cluster and make sure that nodes running workload can be deleted even the draining process is blocked." )
157
- // When we scale down the KCP, controlplane machines are by default deleted one by one, so it requires more time.
158
- nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), controlplane .Spec .MachineTemplate .NodeDrainTimeout , controlPlaneReplicas )
159
- framework .ScaleAndWaitControlPlane (ctx , framework.ScaleAndWaitControlPlaneInput {
160
- ClusterProxy : input .BootstrapClusterProxy ,
161
- Cluster : cluster ,
162
- ControlPlane : controlplane ,
163
- Replicas : 1 ,
164
- WaitForControlPlane : nodeDrainTimeoutKCPInterval ,
231
+ By ("Set NodeDrainTimeout to 1s to unblock drain" )
232
+ // Note: This also verifies that KCP & MachineDeployments are still propagating changes to NodeDrainTimeout down to
233
+ // Machines that already have a deletionTimestamp.
234
+ drainTimeout := & metav1.Duration {Duration : time .Duration (1 ) * time .Second }
235
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
236
+ ClusterProxy : input .BootstrapClusterProxy ,
237
+ Cluster : cluster ,
238
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
239
+ topology .NodeDrainTimeout = drainTimeout
240
+ },
241
+ WaitForControlPlane : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
242
+ })
243
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
244
+ ClusterProxy : input .BootstrapClusterProxy ,
245
+ Cluster : cluster ,
246
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
247
+ topology .NodeDrainTimeout = drainTimeout
248
+ },
249
+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
165
250
})
166
251
252
+ By ("Verify Node drains were unblocked" )
253
+ // When we scale down the KCP, controlplane machines are deleted one by one, so it requires more time
254
+ // MD Machine deletion is done in parallel and will be faster.
255
+ nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), drainTimeout , controlPlaneReplicas )
256
+ Eventually (func (g Gomega ) {
257
+ // When all drains complete we only have 1 control plane & 0 MD replicas left.
258
+ controlPlaneMachines := framework .GetControlPlaneMachinesByCluster (ctx , framework.GetControlPlaneMachinesByClusterInput {
259
+ Lister : input .BootstrapClusterProxy .GetClient (),
260
+ ClusterName : cluster .Name ,
261
+ Namespace : cluster .Namespace ,
262
+ })
263
+ g .Expect (controlPlaneMachines ).To (HaveLen (1 ))
264
+
265
+ for _ , md := range machineDeployments {
266
+ machines := framework .GetMachinesByMachineDeployments (ctx , framework.GetMachinesByMachineDeploymentsInput {
267
+ Lister : input .BootstrapClusterProxy .GetClient (),
268
+ ClusterName : cluster .Name ,
269
+ Namespace : cluster .Namespace ,
270
+ MachineDeployment : * md ,
271
+ })
272
+ g .Expect (machines ).To (HaveLen (0 ))
273
+ }
274
+ }, nodeDrainTimeoutKCPInterval ... ).Should (Succeed ())
275
+
167
276
By ("PASSED!" )
168
277
})
169
278
0 commit comments