From 4c3ab5b5fbc3d31e7a1418ca8a29d15fb80f644f Mon Sep 17 00:00:00 2001 From: Rohit Ramkumar Date: Thu, 6 Mar 2025 22:03:43 +0000 Subject: [PATCH] Add initial set of metrics for BBR --- cmd/body-based-routing/main.go | 70 ++++++++++++ pkg/body-based-routing/handlers/request.go | 4 + .../handlers/request_test.go | 21 ++++ pkg/body-based-routing/metrics/metrics.go | 103 ++++++++++++++++++ 4 files changed, 198 insertions(+) create mode 100644 pkg/body-based-routing/metrics/metrics.go diff --git a/cmd/body-based-routing/main.go b/cmd/body-based-routing/main.go index 3f586788..fdb84223 100644 --- a/cmd/body-based-routing/main.go +++ b/cmd/body-based-routing/main.go @@ -18,18 +18,26 @@ package main import ( "flag" + "net" + "net/http" "os" + "strconv" "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus/promhttp" uberzap "go.uber.org/zap" "go.uber.org/zap/zapcore" "google.golang.org/grpc" healthPb "google.golang.org/grpc/health/grpc_health_v1" + "k8s.io/client-go/rest" + "k8s.io/component-base/metrics/legacyregistry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/body-based-routing/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) @@ -42,6 +50,8 @@ var ( "grpcHealthPort", 9003, "The port used for gRPC liveness and readiness probes") + metricsPort = flag.Int( + "metricsPort", 9090, "The metrics port") logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") setupLog = ctrl.Log.WithName("setup") @@ -95,6 +105,11 @@ func run() error { return err } + // Register metrics handler. + if err := registerMetricsHandler(mgr, *metricsPort, cfg); err != nil { + return err + } + // Start the manager. This blocks until a signal is received. setupLog.Info("Manager starting") if err := mgr.Start(ctx); err != nil { @@ -135,3 +150,58 @@ func initLogging(opts *zap.Options) { logger := zap.New(zap.UseFlagOptions(opts), zap.RawZapOpts(uberzap.AddCaller())) ctrl.SetLogger(logger) } + +const metricsEndpoint = "/metrics" + +// registerMetricsHandler adds the metrics HTTP handler as a Runnable to the given manager. +func registerMetricsHandler(mgr manager.Manager, port int, cfg *rest.Config) error { + metrics.Register() + + // Init HTTP server. + h, err := metricsHandlerWithAuthenticationAndAuthorization(cfg) + if err != nil { + return err + } + + mux := http.NewServeMux() + mux.Handle(metricsEndpoint, h) + + srv := &http.Server{ + Addr: net.JoinHostPort("", strconv.Itoa(port)), + Handler: mux, + } + + if err := mgr.Add(&manager.Server{ + Name: "metrics", + Server: srv, + }); err != nil { + setupLog.Error(err, "Failed to register metrics HTTP handler") + return err + } + return nil +} + +func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) (http.Handler, error) { + h := promhttp.HandlerFor( + legacyregistry.DefaultGatherer, + promhttp.HandlerOpts{}, + ) + httpClient, err := rest.HTTPClientFor(cfg) + if err != nil { + setupLog.Error(err, "Failed to create http client for metrics auth") + return nil, err + } + + filter, err := filters.WithAuthenticationAndAuthorization(cfg, httpClient) + if err != nil { + setupLog.Error(err, "Failed to create metrics filter for auth") + return nil, err + } + metricsLogger := ctrl.Log.WithName("metrics").WithValues("path", metricsEndpoint) + metricsAuthHandler, err := filter(metricsLogger, h) + if err != nil { + setupLog.Error(err, "Failed to create metrics auth handler") + return nil, err + } + return metricsAuthHandler, nil +} diff --git a/pkg/body-based-routing/handlers/request.go b/pkg/body-based-routing/handlers/request.go index 3c5037a9..6596e191 100644 --- a/pkg/body-based-routing/handlers/request.go +++ b/pkg/body-based-routing/handlers/request.go @@ -24,6 +24,7 @@ import ( basepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" eppb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/gateway-api-inference-extension/pkg/body-based-routing/metrics" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) @@ -38,6 +39,7 @@ func (s *Server) HandleRequestBody(ctx context.Context, body *eppb.HttpBody) (*e modelVal, ok := data["model"] if !ok { + metrics.RecordModelNotInBodyCounter() logger.V(logutil.DEFAULT).Info("Request body does not contain model parameter") return &eppb.ProcessingResponse{ Response: &eppb.ProcessingResponse_RequestBody{ @@ -48,6 +50,7 @@ func (s *Server) HandleRequestBody(ctx context.Context, body *eppb.HttpBody) (*e modelStr, ok := modelVal.(string) if !ok { + metrics.RecordModelNotParsedCounter() logger.V(logutil.DEFAULT).Info("Model parameter value is not a string") return &eppb.ProcessingResponse{ Response: &eppb.ProcessingResponse_RequestBody{ @@ -56,6 +59,7 @@ func (s *Server) HandleRequestBody(ctx context.Context, body *eppb.HttpBody) (*e }, fmt.Errorf("the model parameter value %v is not a string", modelVal) } + metrics.RecordSuccessCounter() return &eppb.ProcessingResponse{ Response: &eppb.ProcessingResponse_RequestBody{ RequestBody: &eppb.BodyResponse{ diff --git a/pkg/body-based-routing/handlers/request_test.go b/pkg/body-based-routing/handlers/request_test.go index 9bdac521..76f64e0c 100644 --- a/pkg/body-based-routing/handlers/request_test.go +++ b/pkg/body-based-routing/handlers/request_test.go @@ -18,12 +18,16 @@ package handlers import ( "context" + "strings" "testing" basepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "github.com/google/go-cmp/cmp" "google.golang.org/protobuf/testing/protocmp" + "k8s.io/component-base/metrics/legacyregistry" + metricsutils "k8s.io/component-base/metrics/testutil" + "sigs.k8s.io/gateway-api-inference-extension/pkg/body-based-routing/metrics" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) @@ -48,6 +52,7 @@ const ( ) func TestHandleRequestBody(t *testing.T) { + metrics.Register() ctx := logutil.NewTestLoggerIntoContext(context.Background()) tests := []struct { @@ -125,4 +130,20 @@ func TestHandleRequestBody(t *testing.T) { } }) } + + wantMetrics := ` + # HELP bbr_model_not_in_body_total [ALPHA] Count of times the model was not present in the request body. + # TYPE bbr_model_not_in_body_total counter + bbr_model_not_in_body_total{} 1 + # HELP bbr_model_not_parsed_total [ALPHA] Count of times the model was in the request body but we could not parse it. + # TYPE bbr_model_not_parsed_total counter + bbr_model_not_parsed_total{} 1 + # HELP bbr_success_total [ALPHA] Count of successes pulling model name from body and injecting it in the request headers. + # TYPE bbr_success_total counter + bbr_success_total{} 1 + ` + + if err := metricsutils.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(wantMetrics), "inference_model_request_total"); err != nil { + t.Error(err) + } } diff --git a/pkg/body-based-routing/metrics/metrics.go b/pkg/body-based-routing/metrics/metrics.go new file mode 100644 index 00000000..fc3538fb --- /dev/null +++ b/pkg/body-based-routing/metrics/metrics.go @@ -0,0 +1,103 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + + compbasemetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const component = "bbr" + +var ( + successCounter = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Subsystem: component, + Name: "success_total", + Help: "Count of successes pulling model name from body and injecting it in the request headers.", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{}, + ) + modelNotInBodyCounter = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Subsystem: component, + Name: "model_not_in_body_total", + Help: "Count of times the model was not present in the request body.", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{}, + ) + modelNotParsedCounter = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Subsystem: component, + Name: "model_not_parsed_total", + Help: "Count of times the model was in the request body but we could not parse it.", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{}, + ) + + // TODO: Uncomment and use this metrics once the core server implementation has handling to skip body parsing if header exists. + /* + modelAlreadyPresentInHeaderCounter = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Subsystem: component, + Name: "model_already_present_in_header_total", + Help: "Count of times the model was already present in request headers.", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{}, + ) + */ +) + +var registerMetrics sync.Once + +// Register all metrics. +func Register() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(successCounter) + legacyregistry.MustRegister(modelNotInBodyCounter) + legacyregistry.MustRegister(modelNotParsedCounter) + // legacyregistry.MustRegister(modelAlreadyPresentInHeaderCounter) + }) +} + +// RecordSuccessCounter records the number of successful requests to inject the model name into request headers. +func RecordSuccessCounter() { + successCounter.WithLabelValues().Inc() +} + +// RecordModelNotInBodyCounter records the number of times the model was not found in the request body. +func RecordModelNotInBodyCounter() { + modelNotInBodyCounter.WithLabelValues().Inc() +} + +// RecordModelNotParsedCounter records the number of times the model was found in the body but it could not be parsed. +func RecordModelNotParsedCounter() { + modelNotParsedCounter.WithLabelValues().Inc() +} + +/* +// RecordModelAlreadyInHeaderCounter records the number of times the model was already found in the request headers. +func RecordModelAlreadyInHeaderCounter() { + modelAlreadyPresentInHeaderCounter.WithLabelValues().Inc() +} +*/