Skip to content

Commit fd50d92

Browse files
committed
End to end tests for pod level hugepage resources
1 parent b9e0d4a commit fd50d92

File tree

4 files changed

+409
-168
lines changed

4 files changed

+409
-168
lines changed

test/e2e/common/node/pod_level_resources.go

+212-37
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,17 @@ import (
2929
"k8s.io/apimachinery/pkg/api/resource"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
utilerrors "k8s.io/apimachinery/pkg/util/errors"
32+
v1resource "k8s.io/kubernetes/pkg/api/v1/resource"
33+
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
3234
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
3335
"k8s.io/kubernetes/test/e2e/feature"
3436
"k8s.io/kubernetes/test/e2e/framework"
3537
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
3638
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
3739
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
40+
41+
utils "k8s.io/kubernetes/test/utils"
42+
3843
imageutils "k8s.io/kubernetes/test/utils/image"
3944
admissionapi "k8s.io/pod-security-admission/api"
4045
)
@@ -43,9 +48,14 @@ const (
4348
cgroupv2CPUWeight string = "cpu.weight"
4449
cgroupv2CPULimit string = "cpu.max"
4550
cgroupv2MemLimit string = "memory.max"
46-
cgroupFsPath string = "/sys/fs/cgroup"
47-
CPUPeriod string = "100000"
48-
mountPath string = "/sysfscgroup"
51+
52+
cgroupv2HugeTLBPrefix string = "hugetlb"
53+
cgroupv2HugeTLBRsvd string = "rsvd"
54+
55+
cgroupFsPath string = "/sys/fs/cgroup"
56+
mountPath string = "/sysfscgroup"
57+
58+
CPUPeriod string = "100000"
4959
)
5060

5161
var (
@@ -69,6 +79,7 @@ var _ = SIGDescribe("Pod Level Resources", framework.WithSerial(), feature.PodLe
6979
e2eskipper.Skipf("not supported on cgroupv1 -- skipping")
7080
}
7181
})
82+
7283
podLevelResourcesTests(f)
7384
})
7485

@@ -104,7 +115,7 @@ func isCgroupv2Node(f *framework.Framework, ctx context.Context) bool {
104115

105116
func makeObjectMetadata(name, namespace string) metav1.ObjectMeta {
106117
return metav1.ObjectMeta{
107-
Name: "testpod", Namespace: namespace,
118+
Name: name, Namespace: namespace,
108119
Labels: map[string]string{"time": strconv.Itoa(time.Now().Nanosecond())},
109120
}
110121
}
@@ -113,11 +124,16 @@ type containerInfo struct {
113124
Name string
114125
Resources *resourceInfo
115126
}
127+
116128
type resourceInfo struct {
117-
CPUReq string
118-
CPULim string
119-
MemReq string
120-
MemLim string
129+
CPUReq string
130+
CPULim string
131+
MemReq string
132+
MemLim string
133+
HugePagesReq2Mi string
134+
HugePagesLim2Mi string
135+
HugePagesReq1Gi string
136+
HugePagesLim1Gi string
121137
}
122138

123139
func makeContainer(info containerInfo) v1.Container {
@@ -140,7 +156,7 @@ func makeContainer(info containerInfo) v1.Container {
140156
func getResourceRequirements(info *resourceInfo) v1.ResourceRequirements {
141157
var res v1.ResourceRequirements
142158
if info != nil {
143-
if info.CPUReq != "" || info.MemReq != "" {
159+
if info.CPUReq != "" || info.MemReq != "" || info.HugePagesReq2Mi != "" || info.HugePagesReq1Gi != "" {
144160
res.Requests = make(v1.ResourceList)
145161
}
146162
if info.CPUReq != "" {
@@ -149,8 +165,14 @@ func getResourceRequirements(info *resourceInfo) v1.ResourceRequirements {
149165
if info.MemReq != "" {
150166
res.Requests[v1.ResourceMemory] = resource.MustParse(info.MemReq)
151167
}
168+
if info.HugePagesReq2Mi != "" {
169+
res.Requests[v1.ResourceHugePagesPrefix+"2Mi"] = resource.MustParse(info.HugePagesReq2Mi)
170+
}
171+
if info.HugePagesReq1Gi != "" {
172+
res.Requests[v1.ResourceHugePagesPrefix+"1Gi"] = resource.MustParse(info.HugePagesReq1Gi)
173+
}
152174

153-
if info.CPULim != "" || info.MemLim != "" {
175+
if info.CPULim != "" || info.MemLim != "" || info.HugePagesLim2Mi != "" || info.HugePagesLim1Gi != "" {
154176
res.Limits = make(v1.ResourceList)
155177
}
156178
if info.CPULim != "" {
@@ -159,6 +181,12 @@ func getResourceRequirements(info *resourceInfo) v1.ResourceRequirements {
159181
if info.MemLim != "" {
160182
res.Limits[v1.ResourceMemory] = resource.MustParse(info.MemLim)
161183
}
184+
if info.HugePagesLim2Mi != "" {
185+
res.Limits[v1.ResourceHugePagesPrefix+"2Mi"] = resource.MustParse(info.HugePagesLim2Mi)
186+
}
187+
if info.HugePagesLim1Gi != "" {
188+
res.Limits[v1.ResourceHugePagesPrefix+"1Gi"] = resource.MustParse(info.HugePagesLim1Gi)
189+
}
162190
}
163191
return res
164192
}
@@ -211,7 +239,7 @@ func verifyQoS(gotPod v1.Pod, expectedQoS v1.PodQOSClass) {
211239
}
212240

213241
// TODO(ndixita): dedup the conversion logic in pod resize test and move to helpers/utils.
214-
func verifyPodCgroups(ctx context.Context, f *framework.Framework, pod *v1.Pod, info *resourceInfo) error {
242+
func verifyPodCgroups(f *framework.Framework, pod *v1.Pod, info *resourceInfo) error {
215243
ginkgo.GinkgoHelper()
216244
cmd := fmt.Sprintf("find %s -name '*%s*'", mountPath, strings.ReplaceAll(string(pod.UID), "-", "_"))
217245
framework.Logf("Namespace %s Pod %s - looking for Pod cgroup directory path: %q", f.Namespace, pod.Name, cmd)
@@ -247,6 +275,70 @@ func verifyPodCgroups(ctx context.Context, f *framework.Framework, pod *v1.Pod,
247275
if err != nil {
248276
errs = append(errs, fmt.Errorf("failed to verify memory limit cgroup value: %w", err))
249277
}
278+
279+
// Verify cgroup limits for all the hugepage sizes in the pod
280+
for resourceName, resourceAmount := range expectedResources.Limits {
281+
if !v1resource.IsHugePageResourceName(resourceName) {
282+
continue
283+
}
284+
285+
pageSize, err := v1helper.HugePageSizeFromResourceName(resourceName)
286+
if err != nil {
287+
errs = append(errs, fmt.Errorf("encountered error while obtaining hugepage size: %w", err))
288+
}
289+
290+
sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize.Value())
291+
if err != nil {
292+
errs = append(errs, fmt.Errorf("encountered error while obtaining hugepage unit size: %w", err))
293+
}
294+
295+
hugepageCgroupv2Limits := []string{
296+
fmt.Sprintf("%s.%s.max", cgroupv2HugeTLBPrefix, sizeString),
297+
fmt.Sprintf("%s.%s.%s.max", cgroupv2HugeTLBPrefix, sizeString, cgroupv2HugeTLBRsvd),
298+
}
299+
expectedHugepageLim := strconv.FormatInt(resourceAmount.Value(), 10)
300+
301+
for _, hugepageCgroupv2Limit := range hugepageCgroupv2Limits {
302+
hugepageLimCgPath := fmt.Sprintf("%s/%s", podCgPath, hugepageCgroupv2Limit)
303+
err = e2epod.VerifyCgroupValue(f, pod, pod.Spec.Containers[0].Name, hugepageLimCgPath, expectedHugepageLim)
304+
if err != nil {
305+
errs = append(errs, fmt.Errorf("failed to verify hugepage limit cgroup value: %w, path: %s", err, hugepageLimCgPath))
306+
}
307+
}
308+
}
309+
310+
return utilerrors.NewAggregate(errs)
311+
}
312+
313+
func verifyContainersCgroupLimits(f *framework.Framework, pod *v1.Pod) error {
314+
var errs []error
315+
for _, container := range pod.Spec.Containers {
316+
if pod.Spec.Resources == nil {
317+
continue
318+
}
319+
320+
if pod.Spec.Resources.Limits.Memory() != nil && container.Resources.Limits.Memory() == nil {
321+
expectedCgroupMemLimit := strconv.FormatInt(pod.Spec.Resources.Limits.Memory().Value(), 10)
322+
err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2MemLimit), expectedCgroupMemLimit)
323+
if err != nil {
324+
errs = append(errs, fmt.Errorf("failed to verify memory limit cgroup value: %w", err))
325+
}
326+
}
327+
328+
if pod.Spec.Resources.Limits.Cpu() != nil && container.Resources.Limits.Cpu() == nil {
329+
cpuQuota := kubecm.MilliCPUToQuota(pod.Spec.Resources.Limits.Cpu().MilliValue(), kubecm.QuotaPeriod)
330+
expectedCPULimit := strconv.FormatInt(cpuQuota, 10)
331+
expectedCPULimit = fmt.Sprintf("%s %s", expectedCPULimit, CPUPeriod)
332+
err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2CPULimit), expectedCPULimit)
333+
if err != nil {
334+
errs = append(errs, fmt.Errorf("failed to verify cpu limit cgroup value: %w", err))
335+
}
336+
}
337+
338+
// TODO(KevinTMtz) - Check for all hugepages for the pod, for this is
339+
// required to enabled the Containerd Cgroup value, because if not, HugeTLB
340+
// cgroup values will be just set to max
341+
}
250342
return utilerrors.NewAggregate(errs)
251343
}
252344

@@ -257,7 +349,7 @@ func podLevelResourcesTests(f *framework.Framework) {
257349
// and limits for the pod. If pod-level resource specifications
258350
// are specified, totalPodResources is equal to pod-level resources.
259351
// Otherwise, it is calculated by aggregating resource requests and
260-
// limits from all containers within the pod..
352+
// limits from all containers within the pod.
261353
totalPodResources *resourceInfo
262354
}
263355

@@ -266,6 +358,7 @@ func podLevelResourcesTests(f *framework.Framework) {
266358
podResources *resourceInfo
267359
containers []containerInfo
268360
expected expectedPodConfig
361+
hugepages map[string]int
269362
}
270363

271364
tests := []testCase{
@@ -349,10 +442,108 @@ func podLevelResourcesTests(f *framework.Framework) {
349442
totalPodResources: &resourceInfo{CPUReq: "50m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi"},
350443
},
351444
},
445+
{
446+
name: "Guaranteed QoS pod hugepages, no container resources, single page size",
447+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi"},
448+
containers: []containerInfo{{Name: "c1"}, {Name: "c2"}},
449+
expected: expectedPodConfig{
450+
qos: v1.PodQOSGuaranteed,
451+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi"},
452+
},
453+
hugepages: map[string]int{
454+
v1.ResourceHugePagesPrefix + "2Mi": 5,
455+
},
456+
},
457+
{
458+
name: "Burstable QoS pod hugepages, container resources, single page size",
459+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi"},
460+
containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi"}}, {Name: "c2"}},
461+
expected: expectedPodConfig{
462+
qos: v1.PodQOSBurstable,
463+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi"},
464+
},
465+
hugepages: map[string]int{
466+
v1.ResourceHugePagesPrefix + "2Mi": 5,
467+
},
468+
},
469+
{
470+
name: "Burstable QoS pod hugepages, container resources, single page size, pod level does not specify hugepages",
471+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi"},
472+
containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi"}}, {Name: "c2"}},
473+
expected: expectedPodConfig{
474+
qos: v1.PodQOSBurstable,
475+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "4Mi", HugePagesLim2Mi: "4Mi"},
476+
},
477+
hugepages: map[string]int{
478+
v1.ResourceHugePagesPrefix + "2Mi": 2,
479+
},
480+
},
481+
{
482+
name: "Guaranteed QoS pod hugepages, no container resources, multiple page size",
483+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi", HugePagesLim1Gi: "1Gi"},
484+
containers: []containerInfo{{Name: "c1"}, {Name: "c2"}},
485+
expected: expectedPodConfig{
486+
qos: v1.PodQOSGuaranteed,
487+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"},
488+
},
489+
hugepages: map[string]int{
490+
v1.ResourceHugePagesPrefix + "2Mi": 5,
491+
v1.ResourceHugePagesPrefix + "1Gi": 1,
492+
},
493+
},
494+
{
495+
name: "Burstable QoS pod hugepages, container resources, multiple page size",
496+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi", HugePagesLim1Gi: "1Gi"},
497+
containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi", HugePagesLim1Gi: "1Gi"}}, {Name: "c2"}},
498+
expected: expectedPodConfig{
499+
qos: v1.PodQOSBurstable,
500+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"},
501+
},
502+
hugepages: map[string]int{
503+
v1.ResourceHugePagesPrefix + "2Mi": 5,
504+
v1.ResourceHugePagesPrefix + "1Gi": 1,
505+
},
506+
},
507+
{
508+
name: "Burstable QoS pod hugepages, container resources, multiple page size, pod level does not specify hugepages",
509+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi"},
510+
containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi", HugePagesLim1Gi: "1Gi"}}, {Name: "c2"}},
511+
expected: expectedPodConfig{
512+
qos: v1.PodQOSBurstable,
513+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "4Mi", HugePagesLim2Mi: "4Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"},
514+
},
515+
hugepages: map[string]int{
516+
v1.ResourceHugePagesPrefix + "2Mi": 2,
517+
v1.ResourceHugePagesPrefix + "1Gi": 1,
518+
},
519+
},
520+
{
521+
name: "Burstable QoS pod hugepages, container resources, different page size between pod and container level",
522+
podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi"},
523+
containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim1Gi: "1Gi"}}, {Name: "c2"}},
524+
expected: expectedPodConfig{
525+
qos: v1.PodQOSBurstable,
526+
totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"},
527+
},
528+
hugepages: map[string]int{
529+
v1.ResourceHugePagesPrefix + "2Mi": 5,
530+
v1.ResourceHugePagesPrefix + "1Gi": 1,
531+
},
532+
},
352533
}
353534

354535
for _, tc := range tests {
355536
ginkgo.It(tc.name, func(ctx context.Context) {
537+
// Pre-allocate hugepages in the node
538+
if tc.hugepages != nil {
539+
utils.SetHugepages(ctx, tc.hugepages)
540+
541+
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
542+
utils.RestartKubelet(ctx, false)
543+
544+
utils.WaitForHugepages(ctx, f, tc.hugepages)
545+
}
546+
356547
podMetadata := makeObjectMetadata("testpod", f.Namespace.Name)
357548
testPod := makePod(&podMetadata, tc.podResources, tc.containers)
358549

@@ -367,7 +558,7 @@ func podLevelResourcesTests(f *framework.Framework) {
367558
verifyQoS(*pod, tc.expected.qos)
368559

369560
ginkgo.By("verifying pod cgroup values")
370-
err := verifyPodCgroups(ctx, f, pod, tc.expected.totalPodResources)
561+
err := verifyPodCgroups(f, pod, tc.expected.totalPodResources)
371562
framework.ExpectNoError(err, "failed to verify pod's cgroup values: %v", err)
372563

373564
ginkgo.By("verifying containers cgroup limits are same as pod container's cgroup limits")
@@ -377,32 +568,16 @@ func podLevelResourcesTests(f *framework.Framework) {
377568
ginkgo.By("deleting pods")
378569
delErr := e2epod.DeletePodWithWait(ctx, f.ClientSet, pod)
379570
framework.ExpectNoError(delErr, "failed to delete pod %s", delErr)
380-
})
381-
}
382-
}
383571

384-
func verifyContainersCgroupLimits(f *framework.Framework, pod *v1.Pod) error {
385-
var errs []error
386-
for _, container := range pod.Spec.Containers {
387-
if pod.Spec.Resources != nil && pod.Spec.Resources.Limits.Memory() != nil &&
388-
container.Resources.Limits.Memory() == nil {
389-
expectedCgroupMemLimit := strconv.FormatInt(pod.Spec.Resources.Limits.Memory().Value(), 10)
390-
err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2MemLimit), expectedCgroupMemLimit)
391-
if err != nil {
392-
errs = append(errs, fmt.Errorf("failed to verify memory limit cgroup value: %w", err))
393-
}
394-
}
572+
// Release pre-allocated hugepages
573+
if tc.hugepages != nil {
574+
utils.ReleaseHugepages(ctx, tc.hugepages)
395575

396-
if pod.Spec.Resources != nil && pod.Spec.Resources.Limits.Cpu() != nil &&
397-
container.Resources.Limits.Cpu() == nil {
398-
cpuQuota := kubecm.MilliCPUToQuota(pod.Spec.Resources.Limits.Cpu().MilliValue(), kubecm.QuotaPeriod)
399-
expectedCPULimit := strconv.FormatInt(cpuQuota, 10)
400-
expectedCPULimit = fmt.Sprintf("%s %s", expectedCPULimit, CPUPeriod)
401-
err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2CPULimit), expectedCPULimit)
402-
if err != nil {
403-
errs = append(errs, fmt.Errorf("failed to verify cpu limit cgroup value: %w", err))
576+
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
577+
utils.RestartKubelet(ctx, true)
578+
579+
utils.WaitForHugepages(ctx, f, tc.hugepages)
404580
}
405-
}
581+
})
406582
}
407-
return utilerrors.NewAggregate(errs)
408583
}

0 commit comments

Comments
 (0)