Skip to content

Commit c46a044

Browse files
authored
Merge pull request #140 from ehashman/cgroupv2
cgroupv2 support
2 parents 82f4392 + 015619f commit c46a044

File tree

28 files changed

+1025
-92
lines changed

28 files changed

+1025
-92
lines changed

cmd/container-monitor/cmd/cmd.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func (collector *Collector) Collect(ch chan<- prometheus.Metric) {
127127
wg.Add(1)
128128

129129
go func(c ctrstats.Container, results chan<- prometheus.Metric) {
130-
stats, err := ctrstats.GetContainerStats(context.Background(), c)
130+
stats, err := ctrstats.GetContainerStatsV1(context.Background(), c)
131131
if err != nil {
132132
monitorLog.WithFields(log.Fields{
133133
"container": c.ContainerID,

cmd/metrics-node-sampler/cmd/cmd_linux_amd64.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ func (collector *Collector) Collect(ch chan<- prometheus.Metric) {
124124
wg.Add(1)
125125

126126
go func(c ctrstats.Container, results chan<- prometheus.Metric) {
127-
stats, err := ctrstats.GetContainerStats(context.Background(), c)
127+
// TODO: should we add cgroupv2 support here too?
128+
stats, err := ctrstats.GetContainerStatsV1(context.Background(), c)
128129
if err != nil {
129130
monitorLog.WithFields(logrus.Fields{
130131
"container": c.ContainerID,

cmd/metrics-node-sampler/integration/integration_test.go

+157-31
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,13 @@ func setupTests(t *testing.T, f func([]int) string) {
9999
func(tc *testutil.TestCase) error {
100100
t := tc.T
101101
// create a new container client
102-
var samples samples
102+
var v1samples samplesV1
103+
var v2samples samplesV2
103104
var cfg testConfig
104105
tc.UnmarshalInputsStrict(map[string]interface{}{
105-
"input_samples.yaml": &samples,
106-
"input_test.yaml": &cfg,
106+
"input_samples.yaml": &v1samples,
107+
"input_samples_v2.yaml": &v2samples,
108+
"input_test.yaml": &cfg,
107109
})
108110

109111
// setup the testdata by copying it to a tmp directory
@@ -129,18 +131,36 @@ func setupTests(t *testing.T, f func([]int) string) {
129131
require.NoError(t, os.WriteFile(filepath.Join(testdataCopy, rel), b, 0600))
130132
return nil
131133
})
134+
132135
fs := &fakeFS{
133-
FS: os.DirFS(testdataCopy),
134-
root: testdataCopy,
135-
samples: samples,
136-
index: make(map[string]int),
137-
time: make(map[string]time.Time),
136+
FS: os.DirFS(testdataCopy),
137+
root: testdataCopy,
138+
samplesV1: v1samples,
139+
samplesV2: v2samples,
140+
index: make(map[string]int),
141+
time: make(map[string]time.Time),
138142
}
139143

140144
// get 2 free ports
141145
ports, err := testutil.GetFreePorts(2)
142146
require.NoError(t, err)
143147

148+
var cpuPaths []samplerserverv1alpha1.MetricsFilepath
149+
var memoryPaths []samplerserverv1alpha1.MetricsFilepath
150+
if cfg.Config.Reader.CGroupVersion == samplerserverv1alpha1.CGroupV2 {
151+
cpuPaths = []samplerserverv1alpha1.MetricsFilepath{
152+
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup"))}
153+
memoryPaths = []samplerserverv1alpha1.MetricsFilepath{
154+
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup"))}
155+
} else {
156+
cpuPaths = []samplerserverv1alpha1.MetricsFilepath{
157+
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup", "cpu")),
158+
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup", "cpuacct")),
159+
}
160+
memoryPaths = []samplerserverv1alpha1.MetricsFilepath{
161+
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup", "memory"))}
162+
}
163+
144164
server := sampler.Server{
145165
SortResults: true, // so the test results are consistent
146166
MetricsNodeSampler: samplerserverv1alpha1.MetricsNodeSampler{
@@ -149,12 +169,9 @@ func setupTests(t *testing.T, f func([]int) string) {
149169
Size: cfg.SampleSize,
150170
},
151171
Reader: samplerserverv1alpha1.Reader{
152-
CPUPaths: []samplerserverv1alpha1.MetricsFilepath{
153-
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup", "cpu")),
154-
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup", "cpuacct")),
155-
},
156-
MemoryPaths: []samplerserverv1alpha1.MetricsFilepath{
157-
samplerserverv1alpha1.MetricsFilepath(filepath.Join("sys", "fs", "cgroup", "memory"))},
172+
CGroupVersion: cfg.Config.Reader.CGroupVersion,
173+
CPUPaths: cpuPaths,
174+
MemoryPaths: memoryPaths,
158175
NodeAggregationLevelGlobs: append(samplerserverv1alpha1.DefaultNodeAggregationLevels,
159176
samplerserverv1alpha1.NodeAggregationLevel("system.slice/*"),
160177
samplerserverv1alpha1.NodeAggregationLevel("*"),
@@ -221,43 +238,66 @@ func setupTests(t *testing.T, f func([]int) string) {
221238
})
222239
}
223240

224-
type samples struct {
225-
MemorySamples map[string][]MemorySample `yaml:"memorySamples" json:"memorySamples"`
226-
MemoryOOMKillSamples map[string][]MemoryOOMKillSample `yaml:"oomSamples" json:"oomSamples"`
227-
MemoryOOMSamples map[string][]MemoryOOMSample `yaml:"oomKillSamples" json:"oomKillSamples"`
228-
CPUUsageSamples map[string][]CPUUsageSample `yaml:"cpuUsageSamples" json:"cpuUsageSamples"`
229-
CPUThrottlingSamples map[string][]CPUThrottlingSample `yaml:"cpuThrottlingSamples" json:"cpuThrottlingSamples"`
241+
type samplesV1 struct {
242+
MemorySamplesV1 map[string][]MemorySampleV1 `yaml:"memorySamples" json:"memorySamples"`
243+
MemoryOOMKillSamplesV1 map[string][]MemoryOOMKillSampleV1 `yaml:"oomSamples" json:"oomSamples"`
244+
MemoryOOMSamplesV1 map[string][]MemoryOOMSampleV1 `yaml:"oomKillSamples" json:"oomKillSamples"`
245+
CPUUsageSamplesV1 map[string][]CPUUsageSampleV1 `yaml:"cpuUsageSamples" json:"cpuUsageSamples"`
246+
CPUThrottlingSamplesV1 map[string][]CPUThrottlingSampleV1 `yaml:"cpuThrottlingSamples" json:"cpuThrottlingSamples"`
247+
}
248+
249+
type samplesV2 struct {
250+
MemorySamplesV2 map[string][]MemorySampleV2 `yaml:"memorySamples" json:"memorySamplesV2"`
251+
MemoryOOMSamplesV2 map[string][]MemoryOOMSampleV2 `yaml:"memorySamples" json:"oomSamplesV2"`
252+
CPUSamplesV2 map[string][]CPUSampleV2 `yaml:"memorySamples" json:"cpuSamplesV2"`
230253
}
231254

232255
type fakeFS struct {
233-
samples
256+
samplesV1
257+
samplesV2
234258
index map[string]int
235259
time map[string]time.Time
236260
root string
237261
fs.FS
238262
}
239263

240-
type MemorySample struct {
264+
type MemorySampleV1 struct {
241265
RSS int `yaml:"total_rss" json:"total_rss"`
242266
Cache int `yaml:"total_cache" json:"total_cache"`
243267
}
244268

245-
type MemoryOOMKillSample struct {
269+
type MemoryOOMKillSampleV1 struct {
246270
OOMKill int `yaml:"oom_kill" json:"oom_kill"`
247271
}
248272

249-
type MemoryOOMSample int
273+
type MemoryOOMSampleV1 int
250274

251-
type CPUUsageSample struct {
275+
type CPUUsageSampleV1 struct {
252276
Usage int `yaml:"usage" json:"usage"`
253277
}
254278

255-
type CPUThrottlingSample struct {
279+
type CPUThrottlingSampleV1 struct {
256280
ThrottledTime int `yaml:"throttled_time" json:"throttled_time"`
257281
Periods int `yaml:"nr_periods" json:"nr_periods"`
258282
ThrottledPeriods int `yaml:"nr_throttled" json:"nr_throttled"`
259283
}
260284

285+
type MemorySampleV2 struct {
286+
Current int `yaml:"total_rss" json:"usage"`
287+
}
288+
289+
type MemoryOOMSampleV2 struct {
290+
OOMKill int `yaml:"oom_kill" json:"oom_kill"`
291+
OOM int `yaml:"oom_kill" json:"oom"`
292+
}
293+
294+
type CPUSampleV2 struct {
295+
Usage int `yaml:"usage" json:"usage_usec"`
296+
ThrottledTime int `yaml:"throttled_time" json:"throttled_usec"`
297+
Periods int `yaml:"nr_periods" json:"nr_periods"`
298+
ThrottledPeriods int `yaml:"nr_throttled" json:"nr_throttled"`
299+
}
300+
261301
func (fakeFS *fakeFS) Time(name string) time.Time {
262302
t, ok := fakeFS.time[name]
263303
if !ok {
@@ -270,7 +310,7 @@ func (fakeFS *fakeFS) Time(name string) time.Time {
270310
}
271311

272312
func (fakeFS *fakeFS) Open(name string) (fs.File, error) {
273-
if val, ok := fakeFS.MemorySamples[name]; ok {
313+
if val, ok := fakeFS.MemorySamplesV1[name]; ok {
274314
// update the file value by setting its value
275315
index := fakeFS.index[name] % len(val)
276316
fakeFS.index[name] = (index + 1)
@@ -280,7 +320,7 @@ func (fakeFS *fakeFS) Open(name string) (fs.File, error) {
280320
if err != nil {
281321
return nil, err
282322
}
283-
} else if val, ok := fakeFS.CPUUsageSamples[name]; ok {
323+
} else if val, ok := fakeFS.CPUUsageSamplesV1[name]; ok {
284324
var i int
285325
// update the file value by incrementing it
286326
index := fakeFS.index[name] % len(val)
@@ -300,7 +340,7 @@ func (fakeFS *fakeFS) Open(name string) (fs.File, error) {
300340
if err != nil {
301341
return nil, err
302342
}
303-
} else if val, ok := fakeFS.CPUThrottlingSamples[name]; ok {
343+
} else if val, ok := fakeFS.CPUThrottlingSamplesV1[name]; ok {
304344
var throttledTime, periods, periodsThrottled int
305345
// update the file value by incrementing it
306346
index := fakeFS.index[name] % len(val)
@@ -339,7 +379,7 @@ func (fakeFS *fakeFS) Open(name string) (fs.File, error) {
339379
if err != nil {
340380
return nil, err
341381
}
342-
} else if val, ok := fakeFS.MemoryOOMKillSamples[name]; ok {
382+
} else if val, ok := fakeFS.MemoryOOMKillSamplesV1[name]; ok {
343383
// update the file value by setting its value
344384
index := fakeFS.index[name] % len(val)
345385
fakeFS.index[name] = (index + 1)
@@ -359,7 +399,7 @@ func (fakeFS *fakeFS) Open(name string) (fs.File, error) {
359399
if err != nil {
360400
return nil, err
361401
}
362-
} else if val, ok := fakeFS.MemoryOOMSamples[name]; ok {
402+
} else if val, ok := fakeFS.MemoryOOMSamplesV1[name]; ok {
363403
// update the file value by setting its value
364404
index := fakeFS.index[name] % len(val)
365405
fakeFS.index[name] = (index + 1)
@@ -377,6 +417,92 @@ func (fakeFS *fakeFS) Open(name string) (fs.File, error) {
377417
if err != nil {
378418
return nil, err
379419
}
420+
} else if val, ok := fakeFS.MemorySamplesV2[name]; ok {
421+
// update the file value by setting its value
422+
index := fakeFS.index[name] % len(val)
423+
fakeFS.index[name] = (index + 1)
424+
newVal := val[index]
425+
b := fmt.Sprintf("%d\n", newVal.Current)
426+
err := os.WriteFile(filepath.Join(fakeFS.root, name), []byte(b), 0600)
427+
if err != nil {
428+
return nil, err
429+
}
430+
} else if val, ok := fakeFS.CPUSamplesV2[name]; ok {
431+
var usage, throttledTime, periods, periodsThrottled int
432+
// update the file value by incrementing it
433+
index := fakeFS.index[name] % len(val)
434+
fakeFS.index[name] = (index + 1)
435+
inc := val[index]
436+
437+
b, err := os.ReadFile(filepath.Join(fakeFS.root, name))
438+
if err != nil {
439+
return nil, err
440+
}
441+
// parse the value out
442+
for _, line := range strings.Split(string(b), "\n") {
443+
fields := strings.Fields(line)
444+
value, err := strconv.ParseUint(fields[1], 10, 64)
445+
if err != nil {
446+
return nil, err
447+
}
448+
449+
switch fields[0] {
450+
case "usage_usec":
451+
usage = int(value)
452+
case "throttled_usec":
453+
throttledTime = int(value)
454+
case "nr_periods":
455+
periods = int(value)
456+
case "nr_throttled":
457+
periodsThrottled = int(value)
458+
}
459+
}
460+
461+
usage += inc.Usage
462+
throttledTime += inc.ThrottledTime
463+
periods += inc.Periods
464+
periodsThrottled += inc.ThrottledPeriods
465+
466+
err = os.WriteFile(filepath.Join(fakeFS.root, name), []byte(fmt.Sprintf(
467+
"usage_usec %d\nthrottled_usec %d\nnr_periods %d\nnr_throttled %d", usage, throttledTime, periods, periodsThrottled)), 0600)
468+
if err != nil {
469+
return nil, err
470+
}
471+
} else if val, ok := fakeFS.MemoryOOMSamplesV2[name]; ok {
472+
var oom, oomKill int
473+
// update the file value by setting its value
474+
index := fakeFS.index[name] % len(val)
475+
fakeFS.index[name] = (index + 1)
476+
newVal := val[index]
477+
478+
b, err := os.ReadFile(filepath.Join(fakeFS.root, name))
479+
if err != nil {
480+
return nil, err
481+
}
482+
// parse the value out
483+
for _, line := range strings.Split(string(b), "\n") {
484+
fields := strings.Fields(line)
485+
value, err := strconv.ParseUint(fields[1], 10, 64)
486+
if err != nil {
487+
return nil, err
488+
}
489+
490+
switch fields[0] {
491+
case "oom":
492+
oom = int(value)
493+
case "oom_kill":
494+
oomKill = int(value)
495+
}
496+
}
497+
498+
oom += newVal.OOM
499+
oomKill += newVal.OOMKill
500+
501+
err = os.WriteFile(filepath.Join(fakeFS.root, name), []byte(fmt.Sprintf("oom %d\noom_kill %d", oom, oomKill)), 0600)
502+
if err != nil {
503+
return nil, err
504+
}
380505
}
506+
381507
return fakeFS.FS.Open(name)
382508
}

0 commit comments

Comments
 (0)