Skip to content

Commit dd26220

Browse files
rphillipsbertinatto
authored andcommitted
UPSTREAM: 126213: add test about container metrics from cadvisor
1 parent 0d79325 commit dd26220

File tree

2 files changed

+185
-30
lines changed

2 files changed

+185
-30
lines changed
+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package e2enode
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
"github.com/onsi/ginkgo/v2"
25+
"github.com/onsi/gomega"
26+
"github.com/onsi/gomega/gstruct"
27+
"github.com/onsi/gomega/types"
28+
29+
"k8s.io/kubernetes/test/e2e/framework"
30+
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
31+
e2evolume "k8s.io/kubernetes/test/e2e/framework/volume"
32+
admissionapi "k8s.io/pod-security-admission/api"
33+
)
34+
35+
var _ = SIGDescribe("ContainerMetrics", "[LinuxOnly]", framework.WithNodeConformance(), func() {
36+
f := framework.NewDefaultFramework("container-metrics")
37+
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
38+
ginkgo.Context("when querying /metrics/cadvisor", func() {
39+
ginkgo.BeforeEach(func(ctx context.Context) {
40+
createMetricsPods(ctx, f)
41+
})
42+
ginkgo.It("should report container metrics", func(ctx context.Context) {
43+
keys := gstruct.Keys{}
44+
ctrMatches := map[string]types.GomegaMatcher{
45+
"container_blkio_device_usage_total": boundedSample(0, 10000000),
46+
"container_cpu_load_average_10s": boundedSample(0, 100),
47+
"container_cpu_system_seconds_total": boundedSample(0, 100),
48+
"container_cpu_usage_seconds_total": boundedSample(0, 100),
49+
"container_cpu_user_seconds_total": boundedSample(0, 100),
50+
"container_file_descriptors": boundedSample(0, 100),
51+
"container_fs_reads_bytes_total": boundedSample(0, 10000000),
52+
"container_fs_reads_total": boundedSample(0, 100),
53+
"container_fs_usage_bytes": boundedSample(0, 1000000),
54+
"container_fs_writes_bytes_total": boundedSample(0, 1000000),
55+
"container_fs_writes_total": boundedSample(0, 100),
56+
"container_last_seen": boundedSample(time.Now().Add(-maxStatsAge).Unix(), time.Now().Add(2*time.Minute).Unix()),
57+
"container_memory_cache": boundedSample(1*e2evolume.Kb, 10*e2evolume.Mb),
58+
"container_memory_failcnt": preciseSample(0),
59+
"container_memory_failures_total": boundedSample(0, 1000000),
60+
"container_memory_mapped_file": boundedSample(0, 10000000),
61+
"container_memory_max_usage_bytes": boundedSample(0, 80*e2evolume.Mb),
62+
"container_memory_rss": boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
63+
"container_memory_swap": preciseSample(0),
64+
"container_memory_usage_bytes": boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
65+
"container_memory_working_set_bytes": boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
66+
"container_oom_events_total": preciseSample(0),
67+
"container_processes": boundedSample(0, 10),
68+
"container_sockets": boundedSample(0, 10),
69+
"container_spec_cpu_period": preciseSample(100000),
70+
"container_spec_cpu_shares": preciseSample(2),
71+
"container_spec_memory_limit_bytes": preciseSample(79998976),
72+
"container_spec_memory_reservation_limit_bytes": preciseSample(0),
73+
"container_spec_memory_swap_limit_bytes": preciseSample(0),
74+
"container_start_time_seconds": boundedSample(time.Now().Add(-maxStatsAge).Unix(), time.Now().Add(2*time.Minute).Unix()),
75+
"container_tasks_state": preciseSample(0),
76+
"container_threads": boundedSample(0, 10),
77+
"container_threads_max": boundedSample(0, 100000),
78+
"container_ulimits_soft": boundedSample(0, 10000000),
79+
}
80+
appendMatchesForContainer(f.Namespace.Name, pod0, pod1, "busybox-container", ctrMatches, keys, gstruct.AllowDuplicates|gstruct.IgnoreExtras)
81+
82+
ctrOptionalMatches := map[string]types.GomegaMatcher{
83+
"container_fs_io_current": boundedSample(0, 100),
84+
"container_fs_io_time_seconds_total": boundedSample(0, 100),
85+
"container_fs_io_time_weighted_seconds_total": boundedSample(0, 100),
86+
"container_fs_inodes_free": boundedSample(0, 10*e2evolume.Kb),
87+
"container_fs_inodes_total": boundedSample(0, 100),
88+
"container_fs_limit_bytes": boundedSample(100*e2evolume.Mb, 10*e2evolume.Tb),
89+
"container_fs_usage_bytes": boundedSample(0, 1000000),
90+
"container_fs_read_seconds_total": preciseSample(0),
91+
"container_fs_reads_merged_total": preciseSample(0),
92+
"container_fs_sector_reads_total": preciseSample(0),
93+
"container_fs_sector_writes_total": preciseSample(0),
94+
"container_fs_write_seconds_total": preciseSample(0),
95+
"container_fs_writes_merged_total": preciseSample(0),
96+
}
97+
// Missing from containerd, so set gstruct.IgnoreMissing
98+
// See https://github.com/google/cadvisor/issues/2785
99+
appendMatchesForContainer(f.Namespace.Name, pod0, pod1, "busybox-container", ctrOptionalMatches, keys, gstruct.AllowDuplicates|gstruct.IgnoreMissing|gstruct.IgnoreExtras)
100+
101+
podMatches := map[string]types.GomegaMatcher{
102+
"container_network_receive_bytes_total": boundedSample(10, 10*e2evolume.Mb),
103+
"container_network_receive_errors_total": boundedSample(0, 1000),
104+
"container_network_receive_packets_dropped_total": boundedSample(0, 1000),
105+
"container_network_receive_packets_total": boundedSample(0, 1000),
106+
"container_network_transmit_bytes_total": boundedSample(10, 10*e2evolume.Mb),
107+
"container_network_transmit_errors_total": boundedSample(0, 1000),
108+
"container_network_transmit_packets_dropped_total": boundedSample(0, 1000),
109+
"container_network_transmit_packets_total": boundedSample(0, 1000),
110+
}
111+
// TODO: determine why these are missing from containerd but not CRI-O
112+
appendMatchesForContainer(f.Namespace.Name, pod0, pod1, "POD", podMatches, keys, gstruct.AllowDuplicates|gstruct.IgnoreMissing|gstruct.IgnoreExtras)
113+
114+
matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, keys)
115+
ginkgo.By("Giving pods a minute to start up and produce metrics")
116+
gomega.Eventually(ctx, getContainerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
117+
ginkgo.By("Ensuring the metrics match the expectations a few more times")
118+
gomega.Consistently(ctx, getContainerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
119+
})
120+
ginkgo.AfterEach(func(ctx context.Context) {
121+
removeMetricsPods(ctx, f)
122+
})
123+
})
124+
})
125+
126+
func getContainerMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
127+
ginkgo.By("getting container metrics from cadvisor")
128+
return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, framework.TestContext.NodeName+":10255", "/metrics/cadvisor")
129+
}
130+
131+
func preciseSample(value interface{}) types.GomegaMatcher {
132+
return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
133+
"Metric": gstruct.Ignore(),
134+
"Value": gomega.BeEquivalentTo(value),
135+
"Timestamp": gstruct.Ignore(),
136+
"Histogram": gstruct.Ignore(),
137+
}))
138+
}
139+
140+
func appendMatchesForContainer(ns, pod1, pod2, ctr string, matches map[string]types.GomegaMatcher, keys gstruct.Keys, options gstruct.Options) {
141+
for k, v := range matches {
142+
keys[k] = gstruct.MatchElements(containerID, options, gstruct.Elements{
143+
fmt.Sprintf("%s::%s::%s", ns, pod1, ctr): v,
144+
fmt.Sprintf("%s::%s::%s", ns, pod2, ctr): v,
145+
})
146+
}
147+
}

test/e2e_node/resource_metrics_test.go

+38-30
Original file line numberDiff line numberDiff line change
@@ -51,24 +51,7 @@ var _ = SIGDescribe("ResourceMetricsAPI", nodefeature.ResourceMetrics, func() {
5151
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
5252
ginkgo.Context("when querying /resource/metrics", func() {
5353
ginkgo.BeforeEach(func(ctx context.Context) {
54-
ginkgo.By("Creating test pods to measure their resource usage")
55-
numRestarts := int32(1)
56-
pods := getSummaryTestPods(f, numRestarts, pod0, pod1)
57-
e2epod.NewPodClient(f).CreateBatch(ctx, pods)
58-
59-
ginkgo.By("restarting the containers to ensure container metrics are still being gathered after a container is restarted")
60-
gomega.Eventually(ctx, func(ctx context.Context) error {
61-
for _, pod := range pods {
62-
err := verifyPodRestartCount(ctx, f, pod.Name, len(pod.Spec.Containers), numRestarts)
63-
if err != nil {
64-
return err
65-
}
66-
}
67-
return nil
68-
}, time.Minute, 5*time.Second).Should(gomega.Succeed())
69-
70-
ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points")
71-
time.Sleep(15 * time.Second)
54+
createMetricsPods(ctx, f)
7255
})
7356
ginkgo.It("should report resource usage through the resource metrics api", func(ctx context.Context) {
7457
ginkgo.By("Fetching node so we can match against an appropriate memory limit")
@@ -134,22 +117,32 @@ var _ = SIGDescribe("ResourceMetricsAPI", nodefeature.ResourceMetrics, func() {
134117
gomega.Consistently(ctx, getResourceMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
135118
})
136119
ginkgo.AfterEach(func(ctx context.Context) {
137-
ginkgo.By("Deleting test pods")
138-
var zero int64 = 0
139-
e2epod.NewPodClient(f).DeleteSync(ctx, pod0, metav1.DeleteOptions{GracePeriodSeconds: &zero}, 10*time.Minute)
140-
e2epod.NewPodClient(f).DeleteSync(ctx, pod1, metav1.DeleteOptions{GracePeriodSeconds: &zero}, 10*time.Minute)
141-
if !ginkgo.CurrentSpecReport().Failed() {
142-
return
143-
}
144-
if framework.TestContext.DumpLogsOnFailure {
145-
e2ekubectl.LogFailedContainers(ctx, f.ClientSet, f.Namespace.Name, framework.Logf)
146-
}
147-
ginkgo.By("Recording processes in system cgroups")
148-
recordSystemCgroupProcesses(ctx)
120+
removeMetricsPods(ctx, f)
149121
})
150122
})
151123
})
152124

125+
func createMetricsPods(ctx context.Context, f *framework.Framework) {
126+
ginkgo.By("Creating test pods to measure their resource usage")
127+
numRestarts := int32(1)
128+
pods := getSummaryTestPods(f, numRestarts, pod0, pod1)
129+
e2epod.NewPodClient(f).CreateBatch(ctx, pods)
130+
131+
ginkgo.By("Restarting the containers to ensure container metrics are still being gathered after a container is restarted")
132+
gomega.Eventually(ctx, func(ctx context.Context) error {
133+
for _, pod := range pods {
134+
err := verifyPodRestartCount(ctx, f, pod.Name, len(pod.Spec.Containers), numRestarts)
135+
if err != nil {
136+
return err
137+
}
138+
}
139+
return nil
140+
}, time.Minute, 5*time.Second).Should(gomega.Succeed())
141+
142+
ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points")
143+
time.Sleep(15 * time.Second)
144+
}
145+
153146
func getResourceMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
154147
ginkgo.By("getting stable resource metrics API")
155148
return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics/resource")
@@ -212,3 +205,18 @@ func haveKeys(keys ...string) types.GomegaMatcher {
212205

213206
return matcher
214207
}
208+
209+
func removeMetricsPods(ctx context.Context, f *framework.Framework) {
210+
ginkgo.By("Deleting test pods")
211+
var zero int64 = 0
212+
e2epod.NewPodClient(f).DeleteSync(ctx, pod0, metav1.DeleteOptions{GracePeriodSeconds: &zero}, 10*time.Minute)
213+
e2epod.NewPodClient(f).DeleteSync(ctx, pod1, metav1.DeleteOptions{GracePeriodSeconds: &zero}, 10*time.Minute)
214+
if !ginkgo.CurrentSpecReport().Failed() {
215+
return
216+
}
217+
if framework.TestContext.DumpLogsOnFailure {
218+
e2ekubectl.LogFailedContainers(ctx, f.ClientSet, f.Namespace.Name, framework.Logf)
219+
}
220+
ginkgo.By("Recording processes in system cgroups")
221+
recordSystemCgroupProcesses(ctx)
222+
}

0 commit comments

Comments
 (0)