Skip to content

Commit a931809

Browse files
authored
fixing some lint errors (#126)
1 parent 447801c commit a931809

File tree

4 files changed

+22
-21
lines changed

4 files changed

+22
-21
lines changed

pkg/ext-proc/backend/provider.go

+4-8
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,8 @@ func (p *Provider) GetPodMetrics(pod Pod) (*PodMetrics, bool) {
5858
}
5959

6060
func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duration) error {
61-
if err := p.refreshPodsOnce(); err != nil {
62-
klog.Errorf("Failed to init pods: %v", err)
63-
}
61+
p.refreshPodsOnce()
62+
6463
if err := p.refreshMetricsOnce(); err != nil {
6564
klog.Errorf("Failed to init metrics: %v", err)
6665
}
@@ -71,9 +70,7 @@ func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duratio
7170
go func() {
7271
for {
7372
time.Sleep(refreshPodsInterval)
74-
if err := p.refreshPodsOnce(); err != nil {
75-
klog.V(4).Infof("Failed to refresh podslist pods: %v", err)
76-
}
73+
p.refreshPodsOnce()
7774
}
7875
}()
7976

@@ -102,7 +99,7 @@ func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duratio
10299

103100
// refreshPodsOnce lists pods and updates keys in the podMetrics map.
104101
// Note this function doesn't update the PodMetrics value, it's done separately.
105-
func (p *Provider) refreshPodsOnce() error {
102+
func (p *Provider) refreshPodsOnce() {
106103
// merge new pods with cached ones.
107104
// add new pod to the map
108105
addNewPods := func(k, v any) bool {
@@ -128,7 +125,6 @@ func (p *Provider) refreshPodsOnce() error {
128125
}
129126
p.podMetrics.Range(mergeFn)
130127
p.datastore.pods.Range(addNewPods)
131-
return nil
132128
}
133129

134130
func (p *Provider) refreshMetricsOnce() error {

pkg/ext-proc/backend/vllm/metrics.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ func (p *PodMetricsClientImpl) FetchMetrics(
5252
klog.Errorf("failed to fetch metrics from %s: %v", pod, err)
5353
return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod, err)
5454
}
55-
defer resp.Body.Close()
55+
defer func() {
56+
_ = resp.Body.Close()
57+
}()
5658

5759
if resp.StatusCode != http.StatusOK {
5860
klog.Errorf("unexpected status code from %s: %v", pod, resp.StatusCode)
@@ -76,17 +78,17 @@ func promToPodMetrics(
7678
) (*backend.PodMetrics, error) {
7779
var errs error
7880
updated := existing.Clone()
79-
runningQueueSize, _, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName)
81+
runningQueueSize, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName)
8082
errs = multierr.Append(errs, err)
8183
if err == nil {
8284
updated.RunningQueueSize = int(runningQueueSize.GetGauge().GetValue())
8385
}
84-
waitingQueueSize, _, err := getLatestMetric(metricFamilies, WaitingQueueSizeMetricName)
86+
waitingQueueSize, err := getLatestMetric(metricFamilies, WaitingQueueSizeMetricName)
8587
errs = multierr.Append(errs, err)
8688
if err == nil {
8789
updated.WaitingQueueSize = int(waitingQueueSize.GetGauge().GetValue())
8890
}
89-
cachePercent, _, err := getLatestMetric(metricFamilies, KVCacheUsagePercentMetricName)
91+
cachePercent, err := getLatestMetric(metricFamilies, KVCacheUsagePercentMetricName)
9092
errs = multierr.Append(errs, err)
9193
if err == nil {
9294
updated.KVCacheUsagePercent = cachePercent.GetGauge().GetValue()
@@ -151,14 +153,14 @@ func getLatestLoraMetric(metricFamilies map[string]*dto.MetricFamily) (*dto.Metr
151153

152154
// getLatestMetric gets the latest metric of a family. This should be used to get the latest Gauge metric.
153155
// Since vllm doesn't set the timestamp in metric, this metric essentially gets the first metric.
154-
func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName string) (*dto.Metric, time.Time, error) {
156+
func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName string) (*dto.Metric, error) {
155157
mf, ok := metricFamilies[metricName]
156158
if !ok {
157159
klog.Warningf("metric family %q not found", metricName)
158-
return nil, time.Time{}, fmt.Errorf("metric family %q not found", metricName)
160+
return nil, fmt.Errorf("metric family %q not found", metricName)
159161
}
160162
if len(mf.GetMetric()) == 0 {
161-
return nil, time.Time{}, fmt.Errorf("no metrics available for %q", metricName)
163+
return nil, fmt.Errorf("no metrics available for %q", metricName)
162164
}
163165
var latestTs int64
164166
var latest *dto.Metric
@@ -169,5 +171,5 @@ func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName str
169171
}
170172
}
171173
klog.V(4).Infof("Got metric value %+v for metric %v", latest, metricName)
172-
return latest, time.Unix(0, latestTs*1000), nil
174+
return latest, nil
173175
}

pkg/ext-proc/scheduling/filter.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,10 @@ func leastKVCacheFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*bac
157157
type podPredicate func(req *LLMRequest, pod *backend.PodMetrics) bool
158158

159159
// We consider serving an adapter low cost it the adapter is active in the model server, or the
160-
// model server has room to load the adapter. The lowLoRACostPredicate ensures weak affinity by spreading the
161-
// load of a LoRA adapter across multiple pods, avoiding "pinning" all requests to a single pod.
162-
// This gave good performance in our initial benchmarking results in the scenario where # of lora slots > # of lora adapters.
160+
// model server has room to load the adapter. The lowLoRACostPredicate ensures weak affinity by
161+
// spreading the load of a LoRA adapter across multiple pods, avoiding "pinning" all requests to
162+
// a single pod. This gave good performance in our initial benchmarking results in the scenario
163+
// where # of lora slots > # of lora adapters.
163164
func lowLoRACostPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
164165
_, ok := pod.ActiveModels[req.ResolvedTargetModel]
165166
return ok || len(pod.ActiveModels) < pod.MaxActiveModels

pkg/ext-proc/scheduling/scheduler.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ var (
8484
name: "drop request",
8585
filter: func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
8686
klog.Infof("Dropping request %v", req)
87-
return []*backend.PodMetrics{}, status.Errorf(codes.ResourceExhausted, "dropping request due to limited backend resources")
87+
return []*backend.PodMetrics{}, status.Errorf(
88+
codes.ResourceExhausted, "dropping request due to limited backend resources")
8889
},
8990
},
9091
}
@@ -114,7 +115,8 @@ func (s *Scheduler) Schedule(req *LLMRequest) (targetPod backend.Pod, err error)
114115
klog.V(3).Infof("request: %v; metrics: %+v", req, s.podMetricsProvider.AllPodMetrics())
115116
pods, err := s.filter.Filter(req, s.podMetricsProvider.AllPodMetrics())
116117
if err != nil || len(pods) == 0 {
117-
return backend.Pod{}, fmt.Errorf("failed to apply filter, resulted %v pods, this should never happen: %w", len(pods), err)
118+
return backend.Pod{}, fmt.Errorf(
119+
"failed to apply filter, resulted %v pods, this should never happen: %w", len(pods), err)
118120
}
119121
klog.V(3).Infof("Going to randomly select a pod from the candidates: %+v", pods)
120122
i := rand.Intn(len(pods))

0 commit comments

Comments
 (0)