Skip to content

Commit c04fc29

Browse files
committed
Adding metrics handler
1 parent 59272c1 commit c04fc29

File tree

3 files changed

+38
-0
lines changed

3 files changed

+38
-0
lines changed

pkg/ext-proc/handlers/request.go

+4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ import (
55
"errors"
66
"fmt"
77
"strconv"
8+
"time"
89

910
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
1011
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
1112
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/backend"
13+
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/metrics"
1214
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/scheduling"
1315
klog "k8s.io/klog/v2"
1416
)
@@ -18,6 +20,7 @@ import (
1820
// Envoy sends the request body to ext proc before sending the request to the backend server.
1921
func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) (*extProcPb.ProcessingResponse, error) {
2022
klog.V(3).Infof("Handling request body")
23+
requestReceivedTimestamp := time.Now()
2124

2225
// Unmarshal request body (must be JSON).
2326
v := req.Request.(*extProcPb.ProcessingRequest_RequestBody)
@@ -116,6 +119,7 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
116119
},
117120
},
118121
}
122+
metrics.MonitorRequest(llmReq.Model, llmReq.ResolvedTargetModel, len(v.RequestBody.Body), time.Since(requestReceivedTimestamp))
119123
return resp, nil
120124
}
121125

pkg/ext-proc/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/backend"
2020
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/backend/vllm"
2121
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/handlers"
22+
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/metrics"
2223
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/scheduling"
2324
"k8s.io/apimachinery/pkg/runtime"
2425
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -32,6 +33,8 @@ var (
3233
"port",
3334
9002,
3435
"gRPC port")
36+
metricsPort = flag.Int(
37+
"metricsPort", 9090, "metrics port")
3538
targetPodHeader = flag.String(
3639
"targetPodHeader",
3740
"target-pod",
@@ -103,6 +106,8 @@ func main() {
103106
klog.Fatalf("failed to listen: %v", err)
104107
}
105108

109+
metrics.Register()
110+
go metrics.StartMetricsHandler(*metricsPort)
106111
datastore := backend.NewK8sDataStore()
107112

108113
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package metrics
2+
3+
import (
4+
"net"
5+
"net/http"
6+
"strconv"
7+
8+
"github.com/prometheus/client_golang/prometheus/promhttp"
9+
"k8s.io/component-base/metrics/legacyregistry"
10+
"k8s.io/klog/v2"
11+
)
12+
13+
func StartMetricsHandler(port int) {
14+
klog.Info("Starting metrics HTTP handler ...")
15+
16+
mux := http.NewServeMux()
17+
mux.Handle("/metrics", promhttp.HandlerFor(
18+
legacyregistry.DefaultGatherer,
19+
promhttp.HandlerOpts{},
20+
))
21+
22+
server := &http.Server{
23+
Addr: net.JoinHostPort("", strconv.Itoa(port)),
24+
Handler: mux,
25+
}
26+
if err := server.ListenAndServe(); err != http.ErrServerClosed {
27+
klog.Fatalf("failed to start metrics HTTP handler: %v", err)
28+
}
29+
}

0 commit comments

Comments
 (0)