Skip to content

Commit d4f3ab8

Browse files
committed
Add TRACE log level for the metric refresh loop
1 parent d66b732 commit d4f3ab8

File tree

4 files changed

+9
-7
lines changed

4 files changed

+9
-7
lines changed

docs/dev.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const(
2727
DEFAULT=2
2828
VERBOSE=3
2929
DEBUG=4
30+
TRACE=5
3031
)
3132
```
3233

@@ -54,12 +55,12 @@ The guidelines are written in the context of a k8s controller. Our [ext-proc](..
5455
* `V(DEBUG)` and above: Use your best judgement.
5556

5657
4. Metric scraping loops. These loops run at a very high frequency, and logs can be very spammy if not handled properly.
57-
* `klog.V(DEBUG).InfoS`
58+
* `klog.V(TRACE).InfoS`
5859
* Transient errors/warnings, such as failure to get response from a pod.
5960
* Important state changes, such as updating a metric.
6061

6162
5. Misc
6263
1. Periodic (every 5s) debug loop which prints the current pods and metrics.
6364
* `klog.WarningS` If the metrics are not fresh enough, which indicates an error occurred during the metric scraping loop.
64-
* `klog.V(VERBOSE).InfoS`
65+
* `klog.V(DEBUG).InfoS`
6566
* This is very important to debug the request scheduling algorithm, and yet not spammy compared to the metric scraping loop logs.

pkg/ext-proc/backend/provider.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duratio
8080
for {
8181
time.Sleep(refreshMetricsInterval)
8282
if err := p.refreshMetricsOnce(); err != nil {
83-
klog.V(logutil.DEBUG).Infof("Failed to refresh metrics: %v", err)
83+
klog.V(logutil.TRACE).Infof("Failed to refresh metrics: %v", err)
8484
}
8585
}
8686
}()
@@ -135,12 +135,12 @@ func (p *Provider) refreshMetricsOnce() error {
135135
defer func() {
136136
d := time.Since(start)
137137
// TODO: add a metric instead of logging
138-
klog.V(logutil.DEBUG).Infof("Refreshed metrics in %v", d)
138+
klog.V(logutil.TRACE).Infof("Refreshed metrics in %v", d)
139139
}()
140140
var wg sync.WaitGroup
141141
errCh := make(chan error)
142142
processOnePod := func(key, value any) bool {
143-
klog.V(logutil.DEBUG).Infof("Processing pod %v and metric %v", key, value)
143+
klog.V(logutil.TRACE).Infof("Processing pod %v and metric %v", key, value)
144144
pod := key.(Pod)
145145
existing := value.(*PodMetrics)
146146
wg.Add(1)
@@ -152,7 +152,7 @@ func (p *Provider) refreshMetricsOnce() error {
152152
return
153153
}
154154
p.UpdatePodMetrics(pod, updated)
155-
klog.V(logutil.DEBUG).Infof("Updated metrics for pod %s: %v", pod, updated.Metrics)
155+
klog.V(logutil.TRACE).Infof("Updated metrics for pod %s: %v", pod, updated.Metrics)
156156
}()
157157
return true
158158
}

pkg/ext-proc/backend/vllm/metrics.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,6 @@ func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName str
171171
latest = m
172172
}
173173
}
174-
klog.V(logutil.DEBUG).Infof("Got metric value %+v for metric %v", latest, metricName)
174+
klog.V(logutil.TRACE).Infof("Got metric value %+v for metric %v", latest, metricName)
175175
return latest, nil
176176
}

pkg/ext-proc/util/logging/logging_const.go

+1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ const (
44
DEFAULT = 2
55
VERBOSE = 3
66
DEBUG = 4
7+
TRACE = 5
78
)

0 commit comments

Comments
 (0)