Skip to content

[Metrics] Add number of ready pods metric for inference pool #622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/epp/backend/metrics/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,5 @@ func flushPrometheusMetricsOnce(logger logr.Logger, datastore Datastore) {
podTotalCount := len(podMetrics)
metrics.RecordInferencePoolAvgKVCache(pool.Name, kvCacheTotal/float64(podTotalCount))
metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(queueTotal/podTotalCount))
metrics.RecordinferencePoolReadyPods(pool.Name, float64(podTotalCount))
}
15 changes: 15 additions & 0 deletions pkg/epp/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,16 @@ var (
},
[]string{"name"},
)

inferencePoolReadyPods = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
Subsystem: InferencePoolComponent,
Name: "ready_pods",
Help: "The number of ready pods in the inference server pool.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name"},
)
)

var registerMetrics sync.Once
Expand All @@ -169,6 +179,7 @@ func Register() {

legacyregistry.MustRegister(inferencePoolAvgKVCache)
legacyregistry.MustRegister(inferencePoolAvgQueueSize)
legacyregistry.MustRegister(inferencePoolReadyPods)
})
}

Expand Down Expand Up @@ -241,3 +252,7 @@ func RecordInferencePoolAvgKVCache(name string, utilization float64) {
func RecordInferencePoolAvgQueueSize(name string, queueSize float64) {
inferencePoolAvgQueueSize.WithLabelValues(name).Set(queueSize)
}

func RecordinferencePoolReadyPods(name string, runningPods float64) {
inferencePoolReadyPods.WithLabelValues(name).Set(runningPods)
}
1 change: 1 addition & 0 deletions site-src/guides/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
| inference_model_running_requests | Gauge | Number of running requests for each model. | `model_name`=<model-name> | ALPHA |
| inference_pool_average_kv_cache_utilization | Gauge | The average kv cache utilization for an inference server pool. | `name`=<inference-pool-name> | ALPHA |
| inference_pool_average_queue_size | Gauge | The average number of requests pending in the model server queue. | `name`=<inference-pool-name> | ALPHA |
| inference_pool_ready_pods | Gauge | The number of ready pods for an inference server pool. | `name`=<inference-pool-name> | ALPHA |

## Scrape Metrics

Expand Down
13 changes: 12 additions & 1 deletion test/integration/epp/hermetic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,13 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
# TYPE inference_model_request_total counter
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
`},
`,
`inference_pool_ready_pods`: `
# HELP inference_pool_ready_pods [ALPHA] The number of ready pods in the inference server pool.
# TYPE inference_pool_ready_pods gauge
inference_pool_ready_pods{name="vllm-llama3-8b-instruct-pool"} 3
`,
},
wantErr: false,
wantResponses: []*extProcPb.ProcessingResponse{
{
Expand Down Expand Up @@ -1465,6 +1471,11 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
},
},
},
wantMetrics: map[string]string{`inference_pool_ready_pods`: `
# HELP inference_pool_ready_pods [ALPHA] The number of ready pods in the inference server pool.
# TYPE inference_pool_ready_pods gauge
inference_pool_ready_pods{name="vllm-llama3-8b-instruct-pool"} 1
`},
},
}

Expand Down