Skip to content

Commit 513ae62

Browse files
committed
[Metrics] Add request error metrics
This change defines some general errors, the list might grow in the future if more finer error types are needed.
1 parent 8233946 commit 513ae62

File tree

10 files changed

+209
-16
lines changed

10 files changed

+209
-16
lines changed

pkg/ext-proc/handlers/request.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package handlers
22

33
import (
44
"encoding/json"
5-
"errors"
65
"fmt"
76
"strconv"
87

@@ -12,6 +11,7 @@ import (
1211
klog "k8s.io/klog/v2"
1312
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
1413
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
14+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
1515
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
1616
)
1717

@@ -27,14 +27,14 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
2727
var rb map[string]interface{}
2828
if err := json.Unmarshal(v.RequestBody.Body, &rb); err != nil {
2929
klog.V(logutil.DEFAULT).ErrorS(err, "Error unmarshaling request body")
30-
return nil, fmt.Errorf("error unmarshaling request body: %v", err)
30+
return nil, errutil.Error{Code: errutil.InvalidRequest, Msg: fmt.Sprintf("error unmarshaling request body: %v", err)}
3131
}
3232
klogV.InfoS("Request body unmarshalled", "body", rb)
3333

3434
// Resolve target models.
3535
model, ok := rb["model"].(string)
3636
if !ok {
37-
return nil, errors.New("model not found in request")
37+
return nil, errutil.Error{Code: errutil.InvalidRequest, Msg: "model not found in request"}
3838
}
3939
klogV.InfoS("Model requested", "model", model)
4040
modelName := model
@@ -44,12 +44,12 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
4444
// are able to be requested by using their distinct name.
4545
modelObj := s.datastore.FetchModelData(model)
4646
if modelObj == nil {
47-
return nil, fmt.Errorf("error finding a model object in InferenceModel for input %v", model)
47+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("error finding a model object in InferenceModel for input %v", model)}
4848
}
4949
if len(modelObj.Spec.TargetModels) > 0 {
5050
modelName = backend.RandomWeightedDraw(modelObj, 0)
5151
if modelName == "" {
52-
return nil, fmt.Errorf("error getting target model name for model %v", modelObj.Name)
52+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("error getting target model name for model %v", modelObj.Name)}
5353
}
5454
}
5555
llmReq := &scheduling.LLMRequest{
@@ -67,14 +67,14 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
6767
requestBody, err = json.Marshal(rb)
6868
if err != nil {
6969
klog.V(logutil.DEFAULT).ErrorS(err, "Error marshaling request body")
70-
return nil, fmt.Errorf("error marshaling request body: %v", err)
70+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("error marshaling request body: %v", err)}
7171
}
7272
klogV.InfoS("Updated request body marshalled", "body", string(requestBody))
7373
}
7474

7575
targetPod, err := s.scheduler.Schedule(llmReq)
7676
if err != nil {
77-
return nil, fmt.Errorf("failed to find target pod: %w", err)
77+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Errorf("failed to find target pod: %w", err).Error()}
7878
}
7979
klogV.InfoS("Target model and pod selected", "model", llmReq.ResolvedTargetModel, "pod", targetPod)
8080

pkg/ext-proc/handlers/response.go

+40-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
88
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
99
klog "k8s.io/klog/v2"
10+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
1011
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
1112
)
1213

@@ -16,6 +17,44 @@ func (s *Server) HandleResponseHeaders(reqCtx *RequestContext, req *extProcPb.Pr
1617
h := req.Request.(*extProcPb.ProcessingRequest_ResponseHeaders)
1718
klog.V(logutil.VERBOSE).InfoS("Headers before", "headers", h)
1819

20+
// Example header
21+
// {
22+
// "ResponseHeaders": {
23+
// "headers": [
24+
// {
25+
// "key": ":status",
26+
// "raw_value": "200"
27+
// },
28+
// {
29+
// "key": "date",
30+
// "raw_value": "Thu, 30 Jan 2025 18:50:48 GMT"
31+
// },
32+
// {
33+
// "key": "server",
34+
// "raw_value": "uvicorn"
35+
// },
36+
// {
37+
// "key": "content-type",
38+
// "raw_value": "text/event-stream; charset=utf-8"
39+
// },
40+
// {
41+
// "key": "transfer-encoding",
42+
// "raw_value": "chunked"
43+
// }
44+
// ]
45+
// }
46+
// }
47+
reqCtx.ResponseStatusCode = errutil.OK
48+
for _, header := range h.ResponseHeaders.Headers.GetHeaders() {
49+
if header.Key == "status" {
50+
code := header.RawValue[0]
51+
if string(code) != "200" {
52+
reqCtx.ResponseStatusCode = errutil.ModelServerError
53+
}
54+
break
55+
}
56+
}
57+
1958
resp := &extProcPb.ProcessingResponse{
2059
Response: &extProcPb.ProcessingResponse_ResponseHeaders{
2160
ResponseHeaders: &extProcPb.HeadersResponse{
@@ -71,7 +110,7 @@ func (s *Server) HandleResponseBody(reqCtx *RequestContext, req *extProcPb.Proce
71110

72111
res := Response{}
73112
if err := json.Unmarshal(body.ResponseBody.Body, &res); err != nil {
74-
return nil, fmt.Errorf("unmarshaling response body: %v", err)
113+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("unmarshaling response body: %v", err)}
75114
}
76115
reqCtx.Response = res
77116
reqCtx.ResponseSize = len(body.ResponseBody.Body)

pkg/ext-proc/handlers/server.go

+33-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
1414
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
1515
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
16+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
1617
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
1718
)
1819

@@ -105,12 +106,19 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
105106
klog.V(logutil.DEFAULT).ErrorS(nil, "Unknown Request type", "request", v)
106107
return status.Error(codes.Unknown, "unknown request type")
107108
}
109+
110+
// This indicates error from the underlying model server.
111+
if reqCtx.ResponseStatusCode != errutil.OK {
112+
metrics.RecordRequestErrCounter(reqCtx.Model, reqCtx.ResolvedTargetModel, reqCtx.ResponseStatusCode)
113+
}
114+
108115
if err != nil {
109116
klog.V(logutil.DEFAULT).ErrorS(err, "Failed to process request", "request", req)
110-
switch status.Code(err) {
117+
metrics.RecordRequestErrCounter(reqCtx.Model, reqCtx.ResolvedTargetModel, errutil.CanonicalCode(err))
118+
switch errutil.CanonicalCode(err) {
111119
// This code can be returned by scheduler when there is no capacity for sheddable
112120
// requests.
113-
case codes.ResourceExhausted:
121+
case errutil.InferencePoolResourceExhausted:
114122
resp = &extProcPb.ProcessingResponse{
115123
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
116124
ImmediateResponse: &extProcPb.ImmediateResponse{
@@ -120,6 +128,28 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
120128
},
121129
},
122130
}
131+
// This code can be returned by when EPP processes the request and run into server-side errors.
132+
case errutil.Internal:
133+
resp = &extProcPb.ProcessingResponse{
134+
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
135+
ImmediateResponse: &extProcPb.ImmediateResponse{
136+
Status: &envoyTypePb.HttpStatus{
137+
Code: envoyTypePb.StatusCode_InternalServerError,
138+
},
139+
},
140+
},
141+
}
142+
// This code can be returned when users provide invalid json request.
143+
case errutil.InvalidRequest:
144+
resp = &extProcPb.ProcessingResponse{
145+
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
146+
ImmediateResponse: &extProcPb.ImmediateResponse{
147+
Status: &envoyTypePb.HttpStatus{
148+
Code: envoyTypePb.StatusCode_BadRequest,
149+
},
150+
},
151+
},
152+
}
123153
default:
124154
return status.Errorf(status.Code(err), "failed to handle request: %v", err)
125155
}
@@ -144,4 +174,5 @@ type RequestContext struct {
144174
Response Response
145175
ResponseSize int
146176
ResponseComplete bool
177+
ResponseStatusCode string
147178
}

pkg/ext-proc/metrics/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ spec:
4141
| Metric name | Metric Type | Description | Labels | Status |
4242
| ------------|--------------| ----------- | ------ | ------ |
4343
| inference_model_request_total | Counter | The counter of requests broken out for each model. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
44+
| inference_model_request_error_total | Counter | The counter of requests errors broken out for each model. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
4445
| inference_model_request_duration_seconds | Distribution | Distribution of response latency. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
4546
| inference_model_request_sizes | Distribution | Distribution of request size in bytes. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
4647
| inference_model_response_sizes | Distribution | Distribution of response size in bytes. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |

pkg/ext-proc/metrics/metrics.go

+19-1
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,22 @@ var (
2121
&compbasemetrics.CounterOpts{
2222
Subsystem: InferenceModelComponent,
2323
Name: "request_total",
24-
Help: "Counter of inference model requests broken out for each model and target model.",
24+
Help: "Counter of inference requests broken out for each model and target model.",
2525
StabilityLevel: compbasemetrics.ALPHA,
2626
},
2727
[]string{"model_name", "target_model_name"},
2828
)
2929

30+
requestErrCounter = compbasemetrics.NewCounterVec(
31+
&compbasemetrics.CounterOpts{
32+
Subsystem: InferenceModelComponent,
33+
Name: "request_error_total",
34+
Help: "Counter of inference requests errors broken out for each model and target model.",
35+
StabilityLevel: compbasemetrics.ALPHA,
36+
},
37+
[]string{"model_name", "target_model_name", "error_code"},
38+
)
39+
3040
requestLatencies = compbasemetrics.NewHistogramVec(
3141
&compbasemetrics.HistogramOpts{
3242
Subsystem: InferenceModelComponent,
@@ -122,6 +132,7 @@ var registerMetrics sync.Once
122132
func Register() {
123133
registerMetrics.Do(func() {
124134
legacyregistry.MustRegister(requestCounter)
135+
legacyregistry.MustRegister(requestErrCounter)
125136
legacyregistry.MustRegister(requestLatencies)
126137
legacyregistry.MustRegister(requestSizes)
127138
legacyregistry.MustRegister(responseSizes)
@@ -138,6 +149,13 @@ func RecordRequestCounter(modelName, targetModelName string) {
138149
requestCounter.WithLabelValues(modelName, targetModelName).Inc()
139150
}
140151

152+
// RecordRequestErrCounter records the number of error requests.
153+
func RecordRequestErrCounter(modelName, targetModelName string, code string) {
154+
if code != "" {
155+
requestErrCounter.WithLabelValues(modelName, targetModelName, code).Inc()
156+
}
157+
}
158+
141159
// RecordRequestSizes records the request sizes.
142160
func RecordRequestSizes(modelName, targetModelName string, reqSize int) {
143161
requestSizes.WithLabelValues(modelName, targetModelName).Observe(float64(reqSize))

pkg/ext-proc/metrics/metrics_test.go

+61
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ import (
77

88
"k8s.io/component-base/metrics/legacyregistry"
99
"k8s.io/component-base/metrics/testutil"
10+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
1011
)
1112

1213
const RequestTotalMetric = InferenceModelComponent + "_request_total"
14+
const RequestErrorTotalMetric = InferenceModelComponent + "_request_error_total"
1315
const RequestLatenciesMetric = InferenceModelComponent + "_request_duration_seconds"
1416
const RequestSizesMetric = InferenceModelComponent + "_request_sizes"
1517
const ResponseSizesMetric = InferenceModelComponent + "_response_sizes"
@@ -88,6 +90,65 @@ func TestRecordRequestCounterandSizes(t *testing.T) {
8890
}
8991
}
9092

93+
func TestRecordRequestErrorCounter(t *testing.T) {
94+
type requests struct {
95+
modelName string
96+
targetModelName string
97+
error string
98+
}
99+
scenarios := []struct {
100+
name string
101+
reqs []requests
102+
invalid bool
103+
}{{
104+
name: "multiple requests",
105+
reqs: []requests{
106+
{
107+
modelName: "m10",
108+
targetModelName: "t10",
109+
error: errutil.Internal,
110+
},
111+
{
112+
modelName: "m10",
113+
targetModelName: "t10",
114+
error: errutil.Internal,
115+
},
116+
{
117+
modelName: "m10",
118+
targetModelName: "t11",
119+
error: errutil.ModelServerError,
120+
},
121+
{
122+
modelName: "m20",
123+
targetModelName: "t20",
124+
error: errutil.InferencePoolResourceExhausted,
125+
},
126+
},
127+
},
128+
}
129+
Register()
130+
for _, scenario := range scenarios {
131+
t.Run(scenario.name, func(t *testing.T) {
132+
for _, req := range scenario.reqs {
133+
RecordRequestErrCounter(req.modelName, req.targetModelName, req.error)
134+
}
135+
136+
wantRequestErrorCounter, err := os.Open("testdata/request_error_total_metric")
137+
defer func() {
138+
if err := wantRequestErrorCounter.Close(); err != nil {
139+
t.Error(err)
140+
}
141+
}()
142+
if err != nil {
143+
t.Fatal(err)
144+
}
145+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestErrorCounter, RequestErrorTotalMetric); err != nil {
146+
t.Error(err)
147+
}
148+
})
149+
}
150+
}
151+
91152
func TestRecordRequestLatencies(t *testing.T) {
92153
timeBaseline := time.Now()
93154
type requests struct {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# HELP inference_model_request_error_total [ALPHA] Counter of inference requests errors broken out for each model and target model.
2+
# TYPE inference_model_request_error_total counter
3+
inference_model_request_error_total{error_code="Internal", model_name="m10",target_model_name="t10"} 2
4+
inference_model_request_error_total{error_code="ModelServerError", model_name="m10",target_model_name="t11"} 1
5+
inference_model_request_error_total{error_code="InferencePoolResourceExhausted", model_name="m20",target_model_name="t20"} 1

pkg/ext-proc/metrics/testdata/request_total_metric

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
1+
# HELP inference_model_request_total [ALPHA] Counter of inference requests broken out for each model and target model.
22
# TYPE inference_model_request_total counter
33
inference_model_request_total{model_name="m10", target_model_name="t10"} 2
44
inference_model_request_total{model_name="m10", target_model_name="t11"} 1

pkg/ext-proc/scheduling/scheduler.go

+3-4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ import (
55
"fmt"
66
"math/rand"
77

8-
"google.golang.org/grpc/codes"
9-
"google.golang.org/grpc/status"
108
klog "k8s.io/klog/v2"
119
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
10+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
1211
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
1312
)
1413

@@ -84,8 +83,8 @@ var (
8483
name: "drop request",
8584
filter: func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
8685
klog.V(logutil.DEFAULT).InfoS("Request dropped", "request", req)
87-
return []*backend.PodMetrics{}, status.Errorf(
88-
codes.ResourceExhausted, "dropping request due to limited backend resources")
86+
return []*backend.PodMetrics{}, errutil.Error{
87+
Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources"}
8988
},
9089
},
9190
}

pkg/ext-proc/util/error/error.go

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package error
2+
3+
import (
4+
"fmt"
5+
6+
"k8s.io/klog/v2"
7+
8+
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
9+
)
10+
11+
// Error is an error struct for errors returned by the epp server.
12+
type Error struct {
13+
Code string
14+
Msg string
15+
}
16+
17+
const (
18+
OK = "OK"
19+
Unknown = "Unknown"
20+
InvalidRequest = "InvalidRequest"
21+
Internal = "Internal"
22+
ModelServerError = "ModelServerError"
23+
InferencePoolResourceExhausted = "InferencePoolResourceExhausted"
24+
)
25+
26+
// Error returns a string version of the error.
27+
func (e Error) Error() string {
28+
return fmt.Sprintf("inference gateway: %s - %s", e.Code, e.Msg)
29+
}
30+
31+
// CanonicalCode returns the error's ErrorCode.
32+
func CanonicalCode(err error) string {
33+
e, ok := err.(Error)
34+
if ok {
35+
return e.Code
36+
}
37+
klog.V(logutil.VERBOSE).Infof("failed to convert to internal error")
38+
return Unknown
39+
}

0 commit comments

Comments
 (0)