@@ -24,15 +24,20 @@ import (
24
24
"errors"
25
25
"fmt"
26
26
"io"
27
+ "net"
28
+ "net/http"
27
29
"os"
28
30
"path/filepath"
31
+ "strconv"
32
+ "strings"
29
33
"testing"
30
34
"time"
31
35
32
36
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
33
37
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
34
38
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
35
39
"github.com/google/go-cmp/cmp"
40
+ "github.com/prometheus/client_golang/prometheus/promhttp"
36
41
"github.com/stretchr/testify/assert"
37
42
"google.golang.org/grpc"
38
43
"google.golang.org/grpc/credentials/insecure"
@@ -43,12 +48,16 @@ import (
43
48
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
44
49
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
45
50
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
51
+ "k8s.io/component-base/metrics/legacyregistry"
52
+ metricsutils "k8s.io/component-base/metrics/testutil"
46
53
ctrl "sigs.k8s.io/controller-runtime"
47
54
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
48
55
"sigs.k8s.io/controller-runtime/pkg/envtest"
56
+ "sigs.k8s.io/controller-runtime/pkg/manager"
49
57
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
50
58
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
51
59
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
60
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
52
61
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
53
62
extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
54
63
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -57,7 +66,8 @@ import (
57
66
)
58
67
59
68
const (
60
- port = runserver .DefaultGrpcPort
69
+ port = runserver .DefaultGrpcPort
70
+ metricsPort = 8888
61
71
)
62
72
63
73
var (
@@ -76,6 +86,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
76
86
wantHeaders []* configPb.HeaderValueOption
77
87
wantMetadata * structpb.Struct
78
88
wantBody []byte
89
+ wantMetrics string
79
90
wantErr bool
80
91
immediateResponse * extProcPb.ImmediateResponse
81
92
}{
@@ -113,7 +124,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
113
124
},
114
125
wantMetadata : makeMetadata ("address-1:8000" ),
115
126
wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" my-model-12345\" ,\" prompt\" :\" test1\" ,\" temperature\" :0}" ),
116
- wantErr : false ,
127
+ wantMetrics : `
128
+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
129
+ # TYPE inference_model_request_total counter
130
+ inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
131
+ ` ,
132
+ wantErr : false ,
117
133
},
118
134
{
119
135
name : "select active lora, low queue" ,
@@ -161,7 +177,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
161
177
},
162
178
wantMetadata : makeMetadata ("address-1:8000" ),
163
179
wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" sql-lora-1fdg2\" ,\" prompt\" :\" test2\" ,\" temperature\" :0}" ),
164
- wantErr : false ,
180
+ wantMetrics : `
181
+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
182
+ # TYPE inference_model_request_total counter
183
+ inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
184
+ ` ,
185
+ wantErr : false ,
165
186
},
166
187
{
167
188
name : "select no lora despite active model, avoid excessive queue size" ,
@@ -210,7 +231,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
210
231
},
211
232
wantMetadata : makeMetadata ("address-2:8000" ),
212
233
wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" sql-lora-1fdg2\" ,\" prompt\" :\" test3\" ,\" temperature\" :0}" ),
213
- wantErr : false ,
234
+ wantMetrics : `
235
+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
236
+ # TYPE inference_model_request_total counter
237
+ inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
238
+ ` ,
239
+ wantErr : false ,
214
240
},
215
241
{
216
242
name : "noncritical and all models past threshold, shed request" ,
@@ -253,6 +279,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
253
279
Code : envoyTypePb .StatusCode_TooManyRequests ,
254
280
},
255
281
},
282
+ wantMetrics : "" ,
256
283
},
257
284
{
258
285
name : "noncritical, but one server has capacity, do not shed" ,
@@ -301,7 +328,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
301
328
},
302
329
wantMetadata : makeMetadata ("address-0:8000" ),
303
330
wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" sql-lora-1fdg3\" ,\" prompt\" :\" test5\" ,\" temperature\" :0}" ),
304
- wantErr : false ,
331
+ wantMetrics : `
332
+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
333
+ # TYPE inference_model_request_total counter
334
+ inference_model_request_total{model_name="sql-lora-sheddable",target_model_name="sql-lora-1fdg3"} 1
335
+ ` ,
336
+ wantErr : false ,
305
337
},
306
338
}
307
339
@@ -345,6 +377,14 @@ func TestKubeInferenceModelRequest(t *testing.T) {
345
377
if diff := cmp .Diff (want , res , protocmp .Transform ()); diff != "" {
346
378
t .Errorf ("Unexpected response, (-want +got): %v" , diff )
347
379
}
380
+
381
+ if test .wantMetrics != "" {
382
+ if err := metricsutils .GatherAndCompare (legacyregistry .DefaultGatherer , strings .NewReader (test .wantMetrics ), "inference_model_request_total" ); err != nil {
383
+ t .Error (err )
384
+ }
385
+ }
386
+
387
+ legacyregistry .Reset ()
348
388
})
349
389
}
350
390
}
@@ -423,6 +463,10 @@ func BeforeSuit(t *testing.T) func() {
423
463
logutil .Fatal (logger , err , "Failed to create controller manager" )
424
464
}
425
465
466
+ if err := registerMetricsHandler (mgr , metricsPort ); err != nil {
467
+ logutil .Fatal (logger , err , "Failed to register metrics handler" )
468
+ }
469
+
426
470
serverRunner = runserver .NewDefaultExtProcServerRunner ()
427
471
// Adjust from defaults
428
472
serverRunner .PoolName = "vllm-llama2-7b-pool"
@@ -543,3 +587,31 @@ func makeMetadata(endpoint string) *structpb.Struct {
543
587
},
544
588
}
545
589
}
590
+
591
+ // registerMetricsHandler is a simplified version of metrics endpoint handler
592
+ // without Authentication for integration tests.
593
+ func registerMetricsHandler (mgr manager.Manager , port int ) error {
594
+ metrics .Register ()
595
+
596
+ // Init HTTP server.
597
+ h := promhttp .HandlerFor (
598
+ legacyregistry .DefaultGatherer ,
599
+ promhttp.HandlerOpts {},
600
+ )
601
+
602
+ mux := http .NewServeMux ()
603
+ mux .Handle ("/metrics" , h )
604
+
605
+ srv := & http.Server {
606
+ Addr : net .JoinHostPort ("" , strconv .Itoa (port )),
607
+ Handler : mux ,
608
+ }
609
+
610
+ if err := mgr .Add (& manager.Server {
611
+ Name : "metrics" ,
612
+ Server : srv ,
613
+ }); err != nil {
614
+ return err
615
+ }
616
+ return nil
617
+ }
0 commit comments