Skip to content

Commit a1ff3cb

Browse files
committed
[Metrics] Add number of ready pods metric for inference pool
1 parent 4d392ce commit a1ff3cb

File tree

4 files changed

+29
-1
lines changed

4 files changed

+29
-1
lines changed

Diff for: pkg/epp/backend/metrics/logger.go

+1
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,5 @@ func flushPrometheusMetricsOnce(logger logr.Logger, datastore Datastore) {
110110
podTotalCount := len(podMetrics)
111111
metrics.RecordInferencePoolAvgKVCache(pool.Name, kvCacheTotal/float64(podTotalCount))
112112
metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(queueTotal/podTotalCount))
113+
metrics.RecordInferencePoolNumReadyPods(pool.Name, float64(podTotalCount))
113114
}

Diff for: pkg/epp/metrics/metrics.go

+15
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,16 @@ var (
151151
},
152152
[]string{"name"},
153153
)
154+
155+
inferencePoolNumReadyPods = compbasemetrics.NewGaugeVec(
156+
&compbasemetrics.GaugeOpts{
157+
Subsystem: InferencePoolComponent,
158+
Name: "num_ready_pods",
159+
Help: "The number of ready pods in the inference server pool.",
160+
StabilityLevel: compbasemetrics.ALPHA,
161+
},
162+
[]string{"name"},
163+
)
154164
)
155165

156166
var registerMetrics sync.Once
@@ -169,6 +179,7 @@ func Register() {
169179

170180
legacyregistry.MustRegister(inferencePoolAvgKVCache)
171181
legacyregistry.MustRegister(inferencePoolAvgQueueSize)
182+
legacyregistry.MustRegister(inferencePoolNumReadyPods)
172183
})
173184
}
174185

@@ -241,3 +252,7 @@ func RecordInferencePoolAvgKVCache(name string, utilization float64) {
241252
func RecordInferencePoolAvgQueueSize(name string, queueSize float64) {
242253
inferencePoolAvgQueueSize.WithLabelValues(name).Set(queueSize)
243254
}
255+
256+
func RecordInferencePoolNumReadyPods(name string, runningPods float64) {
257+
inferencePoolNumReadyPods.WithLabelValues(name).Set(runningPods)
258+
}

Diff for: site-src/guides/metrics.md

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
3333
| inference_model_running_requests | Gauge | Number of running requests for each model. | `model_name`=<model-name> | ALPHA |
3434
| inference_pool_average_kv_cache_utilization | Gauge | The average kv cache utilization for an inference server pool. | `name`=<inference-pool-name> | ALPHA |
3535
| inference_pool_average_queue_size | Gauge | The average number of requests pending in the model server queue. | `name`=<inference-pool-name> | ALPHA |
36+
| inference_pool_num_running_pods | Gauge | The number of running pods for an inference server pool. | `name`=<inference-pool-name> | ALPHA |
3637

3738
## Scrape Metrics
3839

Diff for: test/integration/epp/hermetic_test.go

+12-1
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,13 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
430430
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
431431
# TYPE inference_model_request_total counter
432432
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
433-
`},
433+
`,
434+
`inference_pool_num_ready_pods`: `
435+
# HELP inference_pool_num_ready_pods [ALPHA] The number of ready pods in the inference server pool.
436+
# TYPE inference_pool_num_ready_pods gauge
437+
inference_pool_num_ready_pods{name="vllm-llama3-8b-instruct-pool"} 3
438+
`,
439+
},
434440
wantErr: false,
435441
wantResponses: []*extProcPb.ProcessingResponse{
436442
{
@@ -1465,6 +1471,11 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
14651471
},
14661472
},
14671473
},
1474+
wantMetrics: map[string]string{`inference_pool_num_ready_pods`: `
1475+
# HELP inference_pool_num_ready_pods [ALPHA] The number of ready pods in the inference server pool.
1476+
# TYPE inference_pool_num_ready_pods gauge
1477+
inference_pool_num_ready_pods{name="vllm-llama3-8b-instruct-pool"} 1
1478+
`},
14681479
},
14691480
}
14701481

0 commit comments

Comments
 (0)