@@ -20,7 +20,6 @@ import (
20
20
"context"
21
21
"fmt"
22
22
"path/filepath"
23
- "regexp"
24
23
"strconv"
25
24
"strings"
26
25
"time"
@@ -69,80 +68,27 @@ const (
69
68
noStarvedResource = v1 .ResourceName ("none" )
70
69
)
71
70
72
- type EvictionTestConfig struct {
73
- Signal string
74
- PressureTimeout time.Duration
75
- ExpectedNodeCondition v1.NodeConditionType
76
- ExpectedStarvedResource v1.ResourceName
77
- IsHardEviction bool // true for hard eviction, false for soft eviction
78
- ResourceGetter func (summary * kubeletstatsv1alpha1.Summary ) uint64 // Gets available resources (bytes, inodes, etc.)
79
- ResourceThreshold uint64 // Consumed resources that trigger eviction
80
- ThresholdPercentage string // either uint64 or percentage
81
- EvictionGracePeriod string // Used for soft eviction
82
- MetricsLogger func (ctx context.Context )
83
- }
84
-
85
- func testRunner (f * framework.Framework , config EvictionTestConfig , specs []podEvictSpec ) {
86
-
71
+ // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
72
+ // Node disk pressure is induced by consuming all inodes on the node.
73
+ var _ = SIGDescribe ("InodeEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
74
+ f := framework .NewDefaultFramework ("inode-eviction-test" )
87
75
f .NamespacePodSecurityLevel = admissionapi .LevelPrivileged
88
-
89
- ginkgo .Context (fmt .Sprintf (testContextFmt , config .ExpectedNodeCondition ), func () {
76
+ expectedNodeCondition := v1 .NodeDiskPressure
77
+ expectedStarvedResource := resourceInodes
78
+ pressureTimeout := 15 * time .Minute
79
+ inodesConsumed := uint64 (200000 )
80
+ ginkgo .Context (fmt .Sprintf (testContextFmt , expectedNodeCondition ), func () {
90
81
tempSetCurrentKubeletConfig (f , func (ctx context.Context , initialConfig * kubeletconfig.KubeletConfiguration ) {
82
+ // Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
91
83
summary := eventuallyGetSummary (ctx )
92
- available := config .ResourceGetter (summary )
93
-
94
- if config .ThresholdPercentage == "" && available <= config .ResourceThreshold {
95
- e2eskipper .Skipf ("Too few resources free on the host for the eviction test to run" )
96
- }
97
-
98
- var thresholdValue string
99
- if config .ThresholdPercentage != "" {
100
- thresholdValue = config .ThresholdPercentage
101
- } else {
102
- thresholdValue = fmt .Sprintf ("%d" , available - config .ResourceThreshold )
103
- }
104
-
105
- if config .IsHardEviction {
106
- initialConfig .EvictionHard = map [string ]string {config .Signal : thresholdValue }
107
- } else {
108
- initialConfig .EvictionSoft = map [string ]string {config .Signal : thresholdValue }
109
- initialConfig .EvictionSoftGracePeriod = map [string ]string {config .Signal : config .EvictionGracePeriod }
110
- initialConfig .EvictionMaxPodGracePeriod = 30
84
+ inodesFree := * summary .Node .Fs .InodesFree
85
+ if inodesFree <= inodesConsumed {
86
+ e2eskipper .Skipf ("Too few inodes free on the host for the InodeEviction test to run" )
111
87
}
112
-
113
- // Add any special overrides for specific tests
88
+ initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalNodeFsInodesFree ): fmt .Sprintf ("%d" , inodesFree - inodesConsumed )}
114
89
initialConfig .EvictionMinimumReclaim = map [string ]string {}
115
-
116
- // Ensure that pods are not evicted because of the eviction-hard threshold
117
- // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
118
- if ! config .IsHardEviction {
119
- initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalMemoryAvailable ): "0%" }
120
- }
121
90
})
122
-
123
- runEvictionTest (f , config .PressureTimeout , config .ExpectedNodeCondition ,
124
- config .ExpectedStarvedResource , config .MetricsLogger , specs )
125
- })
126
- }
127
-
128
- // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
129
- // Node disk pressure is induced by consuming all inodes on the node.
130
- var _ = SIGDescribe ("InodeEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
131
- testRunner (
132
- framework .NewDefaultFramework ("inode-eviction-test" ),
133
- EvictionTestConfig {
134
- Signal : string (evictionapi .SignalNodeFsInodesFree ),
135
- PressureTimeout : 15 * time .Minute ,
136
- ExpectedNodeCondition : v1 .NodeDiskPressure ,
137
- ExpectedStarvedResource : resourceInodes ,
138
- IsHardEviction : true ,
139
- ResourceThreshold : uint64 (200000 ), // Inodes consumed
140
- MetricsLogger : logInodeMetrics ,
141
- ResourceGetter : func (summary * kubeletstatsv1alpha1.Summary ) uint64 {
142
- return * summary .Node .Fs .InodesFree
143
- },
144
- },
145
- []podEvictSpec {
91
+ runEvictionTest (f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logInodeMetrics , []podEvictSpec {
146
92
{
147
93
evictionPriority : 1 ,
148
94
// TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
@@ -154,6 +100,7 @@ var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(
154
100
pod : innocentPod (),
155
101
},
156
102
})
103
+ })
157
104
})
158
105
159
106
// ImageGCNoEviction tests that the eviction manager is able to prevent eviction
@@ -280,32 +227,41 @@ var _ = SIGDescribe("LocalStorageEviction", framework.WithSlow(), framework.With
280
227
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
281
228
// Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
282
229
var _ = SIGDescribe ("LocalStorageSoftEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
283
- diskConsumed := resource .MustParse ("4Gi" )
284
- testRunner (
285
- framework .NewDefaultFramework ("localstorage-eviction-test" ),
286
- EvictionTestConfig {
287
- Signal : string (evictionapi .SignalNodeFsAvailable ),
288
- PressureTimeout : 10 * time .Minute ,
289
- ExpectedNodeCondition : v1 .NodeDiskPressure ,
290
- ExpectedStarvedResource : v1 .ResourceEphemeralStorage ,
291
- ResourceThreshold : uint64 (diskConsumed .Value ()), // local storage
292
- IsHardEviction : false ,
293
- EvictionGracePeriod : "1m" ,
294
- MetricsLogger : logDiskMetrics ,
295
- ResourceGetter : func (summary * kubeletstatsv1alpha1.Summary ) uint64 {
296
- return * summary .Node .Fs .AvailableBytes
297
- },
298
- },
299
- []podEvictSpec {
230
+ f := framework .NewDefaultFramework ("localstorage-eviction-test" )
231
+ f .NamespacePodSecurityLevel = admissionapi .LevelPrivileged
232
+ pressureTimeout := 10 * time .Minute
233
+ expectedNodeCondition := v1 .NodeDiskPressure
234
+ expectedStarvedResource := v1 .ResourceEphemeralStorage
235
+ ginkgo .Context (fmt .Sprintf (testContextFmt , expectedNodeCondition ), func () {
236
+ tempSetCurrentKubeletConfig (f , func (ctx context.Context , initialConfig * kubeletconfig.KubeletConfiguration ) {
237
+ diskConsumed := resource .MustParse ("4Gi" )
238
+ summary := eventuallyGetSummary (ctx )
239
+ availableBytes := * (summary .Node .Fs .AvailableBytes )
240
+ if availableBytes <= uint64 (diskConsumed .Value ()) {
241
+ e2eskipper .Skipf ("Too little disk free on the host for the LocalStorageSoftEviction test to run" )
242
+ }
243
+ initialConfig .EvictionSoft = map [string ]string {string (evictionapi .SignalNodeFsAvailable ): fmt .Sprintf ("%d" , availableBytes - uint64 (diskConsumed .Value ()))}
244
+ initialConfig .EvictionSoftGracePeriod = map [string ]string {string (evictionapi .SignalNodeFsAvailable ): "1m" }
245
+ // Defer to the pod default grace period
246
+ initialConfig .EvictionMaxPodGracePeriod = 30
247
+ initialConfig .EvictionMinimumReclaim = map [string ]string {}
248
+ // Ensure that pods are not evicted because of the eviction-hard threshold
249
+ // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
250
+ initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalMemoryAvailable ): "0%" }
251
+ })
252
+ runEvictionTest (f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logDiskMetrics , []podEvictSpec {
300
253
{
301
254
evictionPriority : 1 ,
302
- pod : diskConsumingPod ("container-disk-hog" , lotsOfDisk , & v1.VolumeSource {EmptyDir : & v1.EmptyDirVolumeSource {}}, v1.ResourceRequirements {}),
255
+ // TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
256
+ // causing the test to fail. We provision an emptyDir volume to avoid relying on the runtime behavior.
257
+ pod : diskConsumingPod ("container-disk-hog" , lotsOfDisk , & v1.VolumeSource {EmptyDir : & v1.EmptyDirVolumeSource {}}, v1.ResourceRequirements {}),
303
258
},
304
259
{
305
260
evictionPriority : 0 ,
306
261
pod : innocentPod (),
307
262
},
308
263
})
264
+ })
309
265
})
310
266
311
267
var _ = SIGDescribe ("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodSeconds" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
@@ -348,28 +304,20 @@ var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodS
348
304
349
305
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
350
306
var _ = SIGDescribe ("LocalStorageCapacityIsolationEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .LocalStorageCapacityIsolationQuota , feature .Eviction , func () {
351
- sizeLimit := resource .MustParse ("40Mi" )
352
- useOverLimit := 41 /* Mb */
353
- useUnderLimit := 39 /* Mb */
354
- containerLimit := v1.ResourceList {v1 .ResourceEphemeralStorage : sizeLimit }
355
-
356
- testRunner (
357
- framework .NewDefaultFramework ("localstorage-eviction-test" ),
358
- EvictionTestConfig {
359
- Signal : string (evictionapi .SignalMemoryAvailable ),
360
- PressureTimeout : 10 * time .Minute ,
361
- ExpectedNodeCondition : noPressure ,
362
- ExpectedStarvedResource : noStarvedResource ,
363
- IsHardEviction : true ,
364
- ThresholdPercentage : "0%" , // Disabling this threshold to focus on pod-level limits
365
- MetricsLogger : logDiskMetrics ,
366
- ResourceGetter : func (summary * kubeletstatsv1alpha1.Summary ) uint64 {
367
- // We're not using node-level resource checks for this test
368
- // Just need a non-zero value to pass the resource check
369
- return 1024 * 1024 * 1024 // 1 GB (arbitrary non-zero value)
370
- },
371
- },
372
- []podEvictSpec {
307
+ f := framework .NewDefaultFramework ("localstorage-eviction-test" )
308
+ f .NamespacePodSecurityLevel = admissionapi .LevelPrivileged
309
+ evictionTestTimeout := 10 * time .Minute
310
+ ginkgo .Context (fmt .Sprintf (testContextFmt , "evictions due to pod local storage violations" ), func () {
311
+ tempSetCurrentKubeletConfig (f , func (ctx context.Context , initialConfig * kubeletconfig.KubeletConfiguration ) {
312
+ // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
313
+ initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalMemoryAvailable ): "0%" }
314
+ })
315
+ sizeLimit := resource .MustParse ("100Mi" )
316
+ useOverLimit := 101 /* Mb */
317
+ useUnderLimit := 99 /* Mb */
318
+ containerLimit := v1.ResourceList {v1 .ResourceEphemeralStorage : sizeLimit }
319
+
320
+ runEvictionTest (f , evictionTestTimeout , noPressure , noStarvedResource , logDiskMetrics , []podEvictSpec {
373
321
{
374
322
evictionPriority : 1 , // This pod should be evicted because emptyDir (default storage type) usage violation
375
323
pod : diskConsumingPod ("emptydir-disk-sizelimit" , useOverLimit , & v1.VolumeSource {
@@ -402,6 +350,7 @@ var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(
402
350
pod : diskConsumingPod ("container-disk-below-sizelimit" , useUnderLimit , nil , v1.ResourceRequirements {Limits : containerLimit }),
403
351
},
404
352
})
353
+ })
405
354
})
406
355
407
356
// PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods.
@@ -640,19 +589,6 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
640
589
// Nodes do not immediately report local storage capacity
641
590
// Sleep so that pods requesting local storage do not fail to schedule
642
591
time .Sleep (30 * time .Second )
643
- // Check for Pressure
644
- ginkgo .By ("make sure node has no pressure before starting" )
645
- gomega .Eventually (ctx , func (ctx context.Context ) error {
646
- if expectedNodeCondition == noPressure || ! hasNodeCondition (ctx , f , expectedNodeCondition ) {
647
- return nil
648
- }
649
- return fmt .Errorf ("NodeCondition: %s encountered" , expectedNodeCondition )
650
- }, pressureDisappearTimeout , evictionPollInterval ).Should (gomega .Succeed ())
651
-
652
- // prepull images only if its image-gc-eviction-test
653
- if regexp .MustCompile (`(?i)image-gc.*` ).MatchString (f .BaseName ) {
654
- gomega .Expect (PrePullAllImages (ctx )).Should (gomega .Succeed ())
655
- }
656
592
ginkgo .By ("setting up pods to be used by tests" )
657
593
pods := []* v1.Pod {}
658
594
for _ , spec := range testSpecs {
@@ -720,23 +656,10 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
720
656
}, postTestConditionMonitoringPeriod , evictionPollInterval ).Should (gomega .Succeed ())
721
657
722
658
ginkgo .By ("checking for correctly formatted eviction events" )
723
- gomega .Eventually (ctx , func (ctx context.Context ) error {
724
- return verifyEvictionEvents (ctx , f , testSpecs , expectedStarvedResource )
725
- }, postTestConditionMonitoringPeriod , evictionPollInterval ).Should (gomega .Succeed ())
659
+ verifyEvictionEvents (ctx , f , testSpecs , expectedStarvedResource )
726
660
})
727
661
728
662
ginkgo .AfterEach (func (ctx context.Context ) {
729
- prePullImagesIfNeccecary := func () {
730
- if framework .TestContext .PrepullImages {
731
- // The disk eviction test may cause the prepulled images to be evicted,
732
- // prepull those images again to ensure this test not affect following tests.
733
- err := PrePullAllImages (ctx )
734
- gomega .Expect (err ).ShouldNot (gomega .HaveOccurred ())
735
- }
736
- }
737
- // Run prePull using a defer to make sure it is executed even when the assertions below fails
738
- defer prePullImagesIfNeccecary ()
739
-
740
663
ginkgo .By ("deleting pods" )
741
664
for _ , spec := range testSpecs {
742
665
ginkgo .By (fmt .Sprintf ("deleting pod: %s" , spec .pod .Name ))
@@ -887,7 +810,7 @@ func verifyPodConditions(ctx context.Context, f *framework.Framework, testSpecs
887
810
}
888
811
}
889
812
890
- func verifyEvictionEvents (ctx context.Context , f * framework.Framework , testSpecs []podEvictSpec , expectedStarvedResource v1.ResourceName ) error {
813
+ func verifyEvictionEvents (ctx context.Context , f * framework.Framework , testSpecs []podEvictSpec , expectedStarvedResource v1.ResourceName ) {
891
814
for _ , spec := range testSpecs {
892
815
pod := spec .pod
893
816
if spec .evictionPriority != 0 {
@@ -901,22 +824,24 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
901
824
framework .ExpectNoError (err , "getting events" )
902
825
gomega .Expect (podEvictEvents .Items ).To (gomega .HaveLen (1 ), "Expected to find 1 eviction event for pod %s, got %d" , pod .Name , len (podEvictEvents .Items ))
903
826
event := podEvictEvents .Items [0 ]
827
+
904
828
if expectedStarvedResource != noStarvedResource {
905
829
// Check the eviction.StarvedResourceKey
906
830
starved , found := event .Annotations [eviction .StarvedResourceKey ]
907
831
if ! found {
908
- return fmt . Errorf ("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found" ,
832
+ framework . Failf ("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found" ,
909
833
pod .Name , expectedStarvedResource )
910
834
}
911
835
starvedResource := v1 .ResourceName (starved )
912
836
gomega .Expect (starvedResource ).To (gomega .Equal (expectedStarvedResource ), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead" ,
913
837
pod .Name , expectedStarvedResource , starvedResource )
838
+
914
839
// We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present
915
840
if expectedStarvedResource == v1 .ResourceMemory {
916
841
// Check the eviction.OffendingContainersKey
917
842
offendersString , found := event .Annotations [eviction .OffendingContainersKey ]
918
843
if ! found {
919
- return fmt . Errorf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found" ,
844
+ framework . Failf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found" ,
920
845
pod .Name )
921
846
}
922
847
offendingContainers := strings .Split (offendersString , "," )
@@ -928,7 +853,7 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
928
853
// Check the eviction.OffendingContainersUsageKey
929
854
offendingUsageString , found := event .Annotations [eviction .OffendingContainersUsageKey ]
930
855
if ! found {
931
- return fmt . Errorf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found" ,
856
+ framework . Failf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found" ,
932
857
pod .Name )
933
858
}
934
859
offendingContainersUsage := strings .Split (offendingUsageString , "," )
@@ -943,7 +868,6 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
943
868
}
944
869
}
945
870
}
946
- return nil
947
871
}
948
872
949
873
// Returns TRUE if the node has the node condition, FALSE otherwise
0 commit comments