Skip to content

Commit 6130ee0

Browse files
authored
[Metrics] Add request error metrics (#269)
This change defines some general errors, the list might grow in the future if more finer error types are needed.
1 parent 9f34673 commit 6130ee0

File tree

9 files changed

+229
-25
lines changed

9 files changed

+229
-25
lines changed

pkg/ext-proc/handlers/request.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package handlers
1919
import (
2020
"context"
2121
"encoding/json"
22-
"errors"
2322
"fmt"
2423
"strconv"
2524

@@ -29,6 +28,7 @@ import (
2928
"sigs.k8s.io/controller-runtime/pkg/log"
3029
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
3130
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
31+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
3232
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
3333
)
3434

@@ -49,14 +49,14 @@ func (s *Server) HandleRequestBody(
4949
var rb map[string]interface{}
5050
if err := json.Unmarshal(v.RequestBody.Body, &rb); err != nil {
5151
logger.V(logutil.DEFAULT).Error(err, "Error unmarshaling request body")
52-
return nil, fmt.Errorf("error unmarshaling request body: %v", err)
52+
return nil, errutil.Error{Code: errutil.BadRequest, Msg: fmt.Sprintf("error unmarshaling request body: %v", err)}
5353
}
5454
loggerVerbose.Info("Request body unmarshalled", "body", rb)
5555

5656
// Resolve target models.
5757
model, ok := rb["model"].(string)
5858
if !ok {
59-
return nil, errors.New("model not found in request")
59+
return nil, errutil.Error{Code: errutil.BadRequest, Msg: "model not found in request"}
6060
}
6161
loggerVerbose.Info("Model requested", "model", model)
6262
modelName := model
@@ -66,12 +66,12 @@ func (s *Server) HandleRequestBody(
6666
// are able to be requested by using their distinct name.
6767
modelObj, exist := s.datastore.ModelGet(model)
6868
if !exist {
69-
return nil, fmt.Errorf("error finding a model object in InferenceModel for input %v", model)
69+
return nil, errutil.Error{Code: errutil.BadConfiguration, Msg: fmt.Sprintf("error finding a model object in InferenceModel for input %v", model)}
7070
}
7171
if len(modelObj.Spec.TargetModels) > 0 {
7272
modelName = datastore.RandomWeightedDraw(logger, modelObj, 0)
7373
if modelName == "" {
74-
return nil, fmt.Errorf("error getting target model name for model %v", modelObj.Name)
74+
return nil, errutil.Error{Code: errutil.BadConfiguration, Msg: fmt.Sprintf("error getting target model name for model %v", modelObj.Name)}
7575
}
7676
}
7777
llmReq := &scheduling.LLMRequest{
@@ -89,14 +89,14 @@ func (s *Server) HandleRequestBody(
8989
requestBody, err = json.Marshal(rb)
9090
if err != nil {
9191
logger.V(logutil.DEFAULT).Error(err, "Error marshaling request body")
92-
return nil, fmt.Errorf("error marshaling request body: %v", err)
92+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("error marshaling request body: %v", err)}
9393
}
9494
loggerVerbose.Info("Updated request body marshalled", "body", string(requestBody))
9595
}
9696

9797
targetPod, err := s.scheduler.Schedule(ctx, llmReq)
9898
if err != nil {
99-
return nil, fmt.Errorf("failed to find target pod: %w", err)
99+
return nil, errutil.Error{Code: errutil.InferencePoolResourceExhausted, Msg: fmt.Errorf("failed to find target pod: %w", err).Error()}
100100
}
101101

102102
logger.V(logutil.DEFAULT).Info("Request handled",

pkg/ext-proc/handlers/response.go

+39-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
2525
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
2626
"sigs.k8s.io/controller-runtime/pkg/log"
27+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
2728
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
2829
)
2930

@@ -38,6 +39,43 @@ func (s *Server) HandleResponseHeaders(
3839
h := req.Request.(*extProcPb.ProcessingRequest_ResponseHeaders)
3940
loggerVerbose.Info("Headers before", "headers", h)
4041

42+
// Example header
43+
// {
44+
// "ResponseHeaders": {
45+
// "headers": [
46+
// {
47+
// "key": ":status",
48+
// "raw_value": "200"
49+
// },
50+
// {
51+
// "key": "date",
52+
// "raw_value": "Thu, 30 Jan 2025 18:50:48 GMT"
53+
// },
54+
// {
55+
// "key": "server",
56+
// "raw_value": "uvicorn"
57+
// },
58+
// {
59+
// "key": "content-type",
60+
// "raw_value": "text/event-stream; charset=utf-8"
61+
// },
62+
// {
63+
// "key": "transfer-encoding",
64+
// "raw_value": "chunked"
65+
// }
66+
// ]
67+
// }
68+
// }
69+
for _, header := range h.ResponseHeaders.Headers.GetHeaders() {
70+
if header.Key == "status" {
71+
code := header.RawValue[0]
72+
if string(code) != "200" {
73+
reqCtx.ResponseStatusCode = errutil.ModelServerError
74+
}
75+
break
76+
}
77+
}
78+
4179
resp := &extProcPb.ProcessingResponse{
4280
Response: &extProcPb.ProcessingResponse_ResponseHeaders{
4381
ResponseHeaders: &extProcPb.HeadersResponse{
@@ -99,7 +137,7 @@ func (s *Server) HandleResponseBody(
99137

100138
res := Response{}
101139
if err := json.Unmarshal(body.ResponseBody.Body, &res); err != nil {
102-
return nil, fmt.Errorf("unmarshaling response body: %v", err)
140+
return nil, errutil.Error{Code: errutil.Internal, Msg: fmt.Sprintf("unmarshaling response body: %v", err)}
103141
}
104142
reqCtx.Response = res
105143
reqCtx.ResponseSize = len(body.ResponseBody.Body)

pkg/ext-proc/handlers/server.go

+53-5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore"
3131
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
3232
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
33+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
3334
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
3435
)
3536

@@ -65,18 +66,30 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
6566
// See https://github.com/envoyproxy/envoy/issues/17540.
6667
reqCtx := &RequestContext{}
6768

69+
// Create variable for error handling as each request should only report once for
70+
// error metric. This doesn't cover the error "Cannot receive stream request" because
71+
// such error might happen even the response is processed.
72+
var err error
73+
defer func(error) {
74+
if reqCtx.ResponseStatusCode != "" {
75+
metrics.RecordRequestErrCounter(reqCtx.Model, reqCtx.ResolvedTargetModel, reqCtx.ResponseStatusCode)
76+
} else if err != nil {
77+
metrics.RecordRequestErrCounter(reqCtx.Model, reqCtx.ResolvedTargetModel, errutil.CanonicalCode(err))
78+
}
79+
}(err)
80+
6881
for {
6982
select {
7083
case <-ctx.Done():
7184
return ctx.Err()
7285
default:
7386
}
7487

75-
req, err := srv.Recv()
76-
if err == io.EOF || errors.Is(err, context.Canceled) {
88+
req, recvErr := srv.Recv()
89+
if recvErr == io.EOF || errors.Is(recvErr, context.Canceled) {
7790
return nil
7891
}
79-
if err != nil {
92+
if recvErr != nil {
8093
// This error occurs very frequently, though it doesn't seem to have any impact.
8194
// TODO Figure out if we can remove this noise.
8295
loggerVerbose.Error(err, "Cannot receive stream request")
@@ -113,12 +126,13 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
113126
logger.V(logutil.DEFAULT).Error(nil, "Unknown Request type", "request", v)
114127
return status.Error(codes.Unknown, "unknown request type")
115128
}
129+
116130
if err != nil {
117131
logger.V(logutil.DEFAULT).Error(err, "Failed to process request", "request", req)
118-
switch status.Code(err) {
132+
switch errutil.CanonicalCode(err) {
119133
// This code can be returned by scheduler when there is no capacity for sheddable
120134
// requests.
121-
case codes.ResourceExhausted:
135+
case errutil.InferencePoolResourceExhausted:
122136
resp = &extProcPb.ProcessingResponse{
123137
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
124138
ImmediateResponse: &extProcPb.ImmediateResponse{
@@ -128,6 +142,38 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
128142
},
129143
},
130144
}
145+
// This code can be returned by when EPP processes the request and run into server-side errors.
146+
case errutil.Internal:
147+
resp = &extProcPb.ProcessingResponse{
148+
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
149+
ImmediateResponse: &extProcPb.ImmediateResponse{
150+
Status: &envoyTypePb.HttpStatus{
151+
Code: envoyTypePb.StatusCode_InternalServerError,
152+
},
153+
},
154+
},
155+
}
156+
// This code can be returned when users provide invalid json request.
157+
case errutil.BadRequest:
158+
resp = &extProcPb.ProcessingResponse{
159+
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
160+
ImmediateResponse: &extProcPb.ImmediateResponse{
161+
Status: &envoyTypePb.HttpStatus{
162+
Code: envoyTypePb.StatusCode_BadRequest,
163+
},
164+
},
165+
},
166+
}
167+
case errutil.BadConfiguration:
168+
resp = &extProcPb.ProcessingResponse{
169+
Response: &extProcPb.ProcessingResponse_ImmediateResponse{
170+
ImmediateResponse: &extProcPb.ImmediateResponse{
171+
Status: &envoyTypePb.HttpStatus{
172+
Code: envoyTypePb.StatusCode_NotFound,
173+
},
174+
},
175+
},
176+
}
131177
default:
132178
return status.Errorf(status.Code(err), "failed to handle request: %v", err)
133179
}
@@ -139,6 +185,7 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
139185
return status.Errorf(codes.Unknown, "failed to send response back to Envoy: %v", err)
140186
}
141187
}
188+
142189
}
143190

144191
// RequestContext stores context information during the life time of an HTTP request.
@@ -153,4 +200,5 @@ type RequestContext struct {
153200
Response Response
154201
ResponseSize int
155202
ResponseComplete bool
203+
ResponseStatusCode string
156204
}

pkg/ext-proc/metrics/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ spec:
4141
| Metric name | Metric Type | Description | Labels | Status |
4242
| ------------|--------------| ----------- | ------ | ------ |
4343
| inference_model_request_total | Counter | The counter of requests broken out for each model. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
44+
| inference_model_request_error_total | Counter | The counter of requests errors broken out for each model. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
4445
| inference_model_request_duration_seconds | Distribution | Distribution of response latency. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
4546
| inference_model_request_sizes | Distribution | Distribution of request size in bytes. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |
4647
| inference_model_response_sizes | Distribution | Distribution of response size in bytes. | `model_name`=&lt;model-name&gt; <br> `target_model_name`=&lt;target-model-name&gt; | ALPHA |

pkg/ext-proc/metrics/metrics.go

+18
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ var (
4444
[]string{"model_name", "target_model_name"},
4545
)
4646

47+
requestErrCounter = compbasemetrics.NewCounterVec(
48+
&compbasemetrics.CounterOpts{
49+
Subsystem: InferenceModelComponent,
50+
Name: "request_error_total",
51+
Help: "Counter of inference model requests errors broken out for each model and target model.",
52+
StabilityLevel: compbasemetrics.ALPHA,
53+
},
54+
[]string{"model_name", "target_model_name", "error_code"},
55+
)
56+
4757
requestLatencies = compbasemetrics.NewHistogramVec(
4858
&compbasemetrics.HistogramOpts{
4959
Subsystem: InferenceModelComponent,
@@ -139,6 +149,7 @@ var registerMetrics sync.Once
139149
func Register() {
140150
registerMetrics.Do(func() {
141151
legacyregistry.MustRegister(requestCounter)
152+
legacyregistry.MustRegister(requestErrCounter)
142153
legacyregistry.MustRegister(requestLatencies)
143154
legacyregistry.MustRegister(requestSizes)
144155
legacyregistry.MustRegister(responseSizes)
@@ -155,6 +166,13 @@ func RecordRequestCounter(modelName, targetModelName string) {
155166
requestCounter.WithLabelValues(modelName, targetModelName).Inc()
156167
}
157168

169+
// RecordRequestErrCounter records the number of error requests.
170+
func RecordRequestErrCounter(modelName, targetModelName string, code string) {
171+
if code != "" {
172+
requestErrCounter.WithLabelValues(modelName, targetModelName, code).Inc()
173+
}
174+
}
175+
158176
// RecordRequestSizes records the request sizes.
159177
func RecordRequestSizes(modelName, targetModelName string, reqSize int) {
160178
requestSizes.WithLabelValues(modelName, targetModelName).Observe(float64(reqSize))

pkg/ext-proc/metrics/metrics_test.go

+69-8
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,20 @@ import (
2424

2525
"k8s.io/component-base/metrics/legacyregistry"
2626
"k8s.io/component-base/metrics/testutil"
27+
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error"
2728
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
2829
)
2930

3031
const (
31-
RequestTotalMetric = InferenceModelComponent + "_request_total"
32-
RequestLatenciesMetric = InferenceModelComponent + "_request_duration_seconds"
33-
RequestSizesMetric = InferenceModelComponent + "_request_sizes"
34-
ResponseSizesMetric = InferenceModelComponent + "_response_sizes"
35-
InputTokensMetric = InferenceModelComponent + "_input_tokens"
36-
OutputTokensMetric = InferenceModelComponent + "_output_tokens"
37-
KVCacheAvgUsageMetric = InferencePoolComponent + "_average_kv_cache_utilization"
38-
QueueAvgSizeMetric = InferencePoolComponent + "_average_queue_size"
32+
RequestTotalMetric = InferenceModelComponent + "_request_total"
33+
RequestErrorTotalMetric = InferenceModelComponent + "_request_error_total"
34+
RequestLatenciesMetric = InferenceModelComponent + "_request_duration_seconds"
35+
RequestSizesMetric = InferenceModelComponent + "_request_sizes"
36+
ResponseSizesMetric = InferenceModelComponent + "_response_sizes"
37+
InputTokensMetric = InferenceModelComponent + "_input_tokens"
38+
OutputTokensMetric = InferenceModelComponent + "_output_tokens"
39+
KVCacheAvgUsageMetric = InferencePoolComponent + "_average_kv_cache_utilization"
40+
QueueAvgSizeMetric = InferencePoolComponent + "_average_queue_size"
3941
)
4042

4143
func TestRecordRequestCounterandSizes(t *testing.T) {
@@ -107,6 +109,65 @@ func TestRecordRequestCounterandSizes(t *testing.T) {
107109
}
108110
}
109111

112+
func TestRecordRequestErrorCounter(t *testing.T) {
113+
type requests struct {
114+
modelName string
115+
targetModelName string
116+
error string
117+
}
118+
scenarios := []struct {
119+
name string
120+
reqs []requests
121+
invalid bool
122+
}{{
123+
name: "multiple requests",
124+
reqs: []requests{
125+
{
126+
modelName: "m10",
127+
targetModelName: "t10",
128+
error: errutil.Internal,
129+
},
130+
{
131+
modelName: "m10",
132+
targetModelName: "t10",
133+
error: errutil.Internal,
134+
},
135+
{
136+
modelName: "m10",
137+
targetModelName: "t11",
138+
error: errutil.ModelServerError,
139+
},
140+
{
141+
modelName: "m20",
142+
targetModelName: "t20",
143+
error: errutil.InferencePoolResourceExhausted,
144+
},
145+
},
146+
},
147+
}
148+
Register()
149+
for _, scenario := range scenarios {
150+
t.Run(scenario.name, func(t *testing.T) {
151+
for _, req := range scenario.reqs {
152+
RecordRequestErrCounter(req.modelName, req.targetModelName, req.error)
153+
}
154+
155+
wantRequestErrorCounter, err := os.Open("testdata/request_error_total_metric")
156+
defer func() {
157+
if err := wantRequestErrorCounter.Close(); err != nil {
158+
t.Error(err)
159+
}
160+
}()
161+
if err != nil {
162+
t.Fatal(err)
163+
}
164+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestErrorCounter, RequestErrorTotalMetric); err != nil {
165+
t.Error(err)
166+
}
167+
})
168+
}
169+
}
170+
110171
func TestRecordRequestLatencies(t *testing.T) {
111172
ctx := logutil.NewTestLoggerIntoContext(context.Background())
112173
timeBaseline := time.Now()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# HELP inference_model_request_error_total [ALPHA] Counter of inference model requests errors broken out for each model and target model.
2+
# TYPE inference_model_request_error_total counter
3+
inference_model_request_error_total{error_code="Internal", model_name="m10",target_model_name="t10"} 2
4+
inference_model_request_error_total{error_code="ModelServerError", model_name="m10",target_model_name="t11"} 1
5+
inference_model_request_error_total{error_code="InferencePoolResourceExhausted", model_name="m20",target_model_name="t20"} 1

0 commit comments

Comments
 (0)