Skip to content

Commit 36b46bc

Browse files
committed
fix: refactor custom inferencepool metric
Signed-off-by: nayihz <[email protected]>
1 parent f704350 commit 36b46bc

File tree

10 files changed

+120
-79
lines changed

10 files changed

+120
-79
lines changed

cmd/bbr/main.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package main
1818

1919
import (
2020
"flag"
21+
"fmt"
2122
"os"
2223

2324
"github.com/go-logr/logr"
@@ -46,8 +47,9 @@ var (
4647
"grpcHealthPort",
4748
9005,
4849
"The port used for gRPC liveness and readiness probes")
49-
metricsAddr = flag.String("metrics-bind-address", ":9090", "The address the metric endpoint binds to.")
50-
streaming = flag.Bool(
50+
metricsPort = flag.Int(
51+
"metricsPort", 9090, "The metrics port")
52+
streaming = flag.Bool(
5153
"streaming", false, "Enables streaming support for Envoy full-duplex streaming mode")
5254
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
5355

@@ -88,7 +90,7 @@ func run() error {
8890
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server
8991
// - https://book.kubebuilder.io/reference/metrics.html
9092
metricsServerOptions := metricsserver.Options{
91-
BindAddress: *metricsAddr,
93+
BindAddress: fmt.Sprintf(":%d", *metricsPort),
9294
FilterProvider: filters.WithAuthenticationAndAuthorization,
9395
}
9496
mgr, err := ctrl.NewManager(cfg, ctrl.Options{Metrics: metricsServerOptions})

cmd/epp/main.go

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"os"
2323

2424
"github.com/go-logr/logr"
25+
"github.com/prometheus/client_golang/prometheus"
2526
uberzap "go.uber.org/zap"
2627
"go.uber.org/zap/zapcore"
2728
"google.golang.org/grpc"
@@ -38,6 +39,7 @@ import (
3839
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
3940
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
4041
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
42+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors"
4143
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
4244
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
4345
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
@@ -58,7 +60,8 @@ var (
5860
"grpcHealthPort",
5961
9003,
6062
"The port used for gRPC liveness and readiness probes")
61-
metricsAddr = flag.String("metrics-bind-address", ":9090", "The address the metric endpoint binds to.")
63+
metricsPort = flag.Int(
64+
"metricsPort", 9090, "The metrics port")
6265
destinationEndpointHintKey = flag.String(
6366
"destinationEndpointHintKey",
6467
runserver.DefaultDestinationEndpointHintKey,
@@ -154,14 +157,34 @@ func run() error {
154157
return err
155158
}
156159

157-
metrics.Register()
160+
// Set up mapper for metric scraping.
161+
mapping, err := backendmetrics.NewMetricMapping(
162+
*totalQueuedRequestsMetric,
163+
*kvCacheUsagePercentageMetric,
164+
*loraInfoMetric,
165+
)
166+
if err != nil {
167+
setupLog.Error(err, "Failed to create metric mapping from flags.")
168+
return err
169+
}
170+
verifyMetricMapping(*mapping, setupLog)
171+
172+
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{MetricMapping: mapping}, *refreshMetricsInterval)
173+
// Setup runner.
174+
ctx := ctrl.SetupSignalHandler()
175+
176+
datastore := datastore.NewDatastore(ctx, pmf)
177+
178+
customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(datastore)}
179+
metrics.Register(customCollectors...)
180+
metrics.RecordInferenceExtensionInfo()
158181
// Register metrics handler.
159182
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
160183
// More info:
161184
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server
162185
// - https://book.kubebuilder.io/reference/metrics.html
163186
metricsServerOptions := metricsserver.Options{
164-
BindAddress: *metricsAddr,
187+
BindAddress: fmt.Sprintf(":%d", *metricsPort),
165188
FilterProvider: filters.WithAuthenticationAndAuthorization,
166189
}
167190

@@ -175,24 +198,6 @@ func run() error {
175198
return err
176199
}
177200

178-
// Set up mapper for metric scraping.
179-
mapping, err := backendmetrics.NewMetricMapping(
180-
*totalQueuedRequestsMetric,
181-
*kvCacheUsagePercentageMetric,
182-
*loraInfoMetric,
183-
)
184-
if err != nil {
185-
setupLog.Error(err, "Failed to create metric mapping from flags.")
186-
return err
187-
}
188-
verifyMetricMapping(*mapping, setupLog)
189-
190-
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{MetricMapping: mapping}, *refreshMetricsInterval)
191-
// Setup runner.
192-
ctx := ctrl.SetupSignalHandler()
193-
194-
datastore := datastore.NewDatastore(ctx, pmf)
195-
196201
scheduler := scheduling.NewScheduler(datastore)
197202
if schedulerV2 == "true" {
198203
queueScorerWeight := envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", scorer.DefaultQueueScorerWeight, setupLog)

config/manifests/inferencepool-resources.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ rules:
107107
- subjectaccessreviews
108108
verbs:
109109
- create
110+
- nonResourceURLs:
111+
- "/metrics"
112+
verbs:
113+
- get
110114
---
111115
kind: ClusterRoleBinding
112116
apiVersion: rbac.authorization.k8s.io/v1

pkg/bbr/metrics/metrics.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@ limitations under the License.
1717
package metrics
1818

1919
import (
20-
"fmt"
2120
"sync"
2221

2322
"github.com/prometheus/client_golang/prometheus"
2423
compbasemetrics "k8s.io/component-base/metrics"
2524
"sigs.k8s.io/controller-runtime/pkg/metrics"
25+
26+
metricsutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/metrics"
2627
)
2728

2829
const component = "bbr"
@@ -32,23 +33,23 @@ var (
3233
prometheus.CounterOpts{
3334
Subsystem: component,
3435
Name: "success_total",
35-
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Count of successes pulling model name from body and injecting it in the request headers."),
36+
Help: metricsutil.HelpMsgWithStability("Count of successes pulling model name from body and injecting it in the request headers.", compbasemetrics.ALPHA),
3637
},
3738
[]string{},
3839
)
3940
modelNotInBodyCounter = prometheus.NewCounterVec(
4041
prometheus.CounterOpts{
4142
Subsystem: component,
4243
Name: "model_not_in_body_total",
43-
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Count of times the model was not present in the request body."),
44+
Help: metricsutil.HelpMsgWithStability("Count of times the model was not present in the request body.", compbasemetrics.ALPHA),
4445
},
4546
[]string{},
4647
)
4748
modelNotParsedCounter = prometheus.NewCounterVec(
4849
prometheus.CounterOpts{
4950
Subsystem: component,
5051
Name: "model_not_parsed_total",
51-
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Count of times the model was in the request body but we could not parse it."),
52+
Help: metricsutil.HelpMsgWithStability("Count of times the model was in the request body but we could not parse it.", compbasemetrics.ALPHA),
5253
},
5354
[]string{},
5455
)

pkg/epp/metrics/collectors/inference_pool.go

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,47 +17,46 @@ limitations under the License.
1717
package collectors
1818

1919
import (
20-
"k8s.io/component-base/metrics"
20+
"github.com/prometheus/client_golang/prometheus"
21+
compbasemetrics "k8s.io/component-base/metrics"
22+
2123
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
24+
metricsutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/metrics"
2225
)
2326

2427
var (
25-
descInferencePoolPerPodQueueSize = metrics.NewDesc(
28+
descInferencePoolPerPodQueueSize = prometheus.NewDesc(
2629
"inference_pool_per_pod_queue_size",
27-
"The total number of requests pending in the model server queue for each underlying pod.",
30+
metricsutil.HelpMsgWithStability("The total number of requests pending in the model server queue for each underlying pod.", compbasemetrics.ALPHA),
2831
[]string{
2932
"name",
3033
"model_server_pod",
3134
}, nil,
32-
metrics.ALPHA,
33-
"",
3435
)
3536
)
3637

3738
type inferencePoolMetricsCollector struct {
38-
metrics.BaseStableCollector
39-
4039
ds datastore.Datastore
4140
}
4241

4342
// Check if inferencePoolMetricsCollector implements necessary interface
44-
var _ metrics.StableCollector = &inferencePoolMetricsCollector{}
43+
var _ prometheus.Collector = &inferencePoolMetricsCollector{}
4544

46-
// NewInferencePoolMetricsCollector implements the metrics.StableCollector interface and
45+
// NewInferencePoolMetricsCollector implements the prometheus.Collector interface and
4746
// exposes metrics about inference pool.
48-
func NewInferencePoolMetricsCollector(ds datastore.Datastore) metrics.StableCollector {
47+
func NewInferencePoolMetricsCollector(ds datastore.Datastore) prometheus.Collector {
4948
return &inferencePoolMetricsCollector{
5049
ds: ds,
5150
}
5251
}
5352

54-
// DescribeWithStability implements the metrics.StableCollector interface.
55-
func (c *inferencePoolMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
53+
// DescribeWithStability implements the prometheus.Collector interface.
54+
func (c *inferencePoolMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
5655
ch <- descInferencePoolPerPodQueueSize
5756
}
5857

59-
// CollectWithStability implements the metrics.StableCollector interface.
60-
func (c *inferencePoolMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) {
58+
// CollectWithStability implements the prometheus.Collector interface.
59+
func (c *inferencePoolMetricsCollector) Collect(ch chan<- prometheus.Metric) {
6160
pool, err := c.ds.PoolGet()
6261
if err != nil {
6362
return
@@ -69,9 +68,9 @@ func (c *inferencePoolMetricsCollector) CollectWithStability(ch chan<- metrics.M
6968
}
7069

7170
for _, pod := range podMetrics {
72-
ch <- metrics.NewLazyConstMetric(
71+
ch <- prometheus.MustNewConstMetric(
7372
descInferencePoolPerPodQueueSize,
74-
metrics.GaugeValue,
73+
prometheus.GaugeValue,
7574
float64(pod.GetMetrics().WaitingQueueSize),
7675
pool.Name,
7776
pod.GetPod().NamespacedName.Name,

pkg/epp/metrics/collectors/inference_pool_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func TestNoMetricsCollected(t *testing.T) {
5555
ds: datastore,
5656
}
5757

58-
if err := testutil.CustomCollectAndCompare(collector, strings.NewReader(""), ""); err != nil {
58+
if err := testutil.CollectAndCompare(collector, strings.NewReader(""), ""); err != nil {
5959
t.Fatal(err)
6060
}
6161
}
@@ -90,7 +90,7 @@ func TestMetricsCollected(t *testing.T) {
9090
collector := &inferencePoolMetricsCollector{
9191
ds: ds,
9292
}
93-
err := testutil.CustomCollectAndCompare(collector, strings.NewReader(`
93+
err := testutil.CollectAndCompare(collector, strings.NewReader(`
9494
# HELP inference_pool_per_pod_queue_size [ALPHA] The total number of requests pending in the model server queue for each underlying pod.
9595
# TYPE inference_pool_per_pod_queue_size gauge
9696
inference_pool_per_pod_queue_size{model_server_pod="pod1",name="test-pool"} 100

0 commit comments

Comments
 (0)