Skip to content

Commit 5d78491

Browse files
committed
Add metrics validation in integration test
Start by adding request total metrics, more validation will be added in follow up. kubernetes-sigs#326
1 parent 2ad70e3 commit 5d78491

File tree

1 file changed

+77
-5
lines changed

1 file changed

+77
-5
lines changed

test/integration/hermetic_test.go

+77-5
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@ import (
2424
"errors"
2525
"fmt"
2626
"io"
27+
"net"
28+
"net/http"
2729
"os"
2830
"path/filepath"
31+
"strconv"
32+
"strings"
2933
"testing"
3034
"time"
3135

3236
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
3337
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
3438
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
3539
"github.com/google/go-cmp/cmp"
40+
"github.com/prometheus/client_golang/prometheus/promhttp"
3641
"github.com/stretchr/testify/assert"
3742
"google.golang.org/grpc"
3843
"google.golang.org/grpc/credentials/insecure"
@@ -43,12 +48,16 @@ import (
4348
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
4449
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
4550
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
51+
"k8s.io/component-base/metrics/legacyregistry"
52+
metricsutils "k8s.io/component-base/metrics/testutil"
4653
ctrl "sigs.k8s.io/controller-runtime"
4754
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
4855
"sigs.k8s.io/controller-runtime/pkg/envtest"
56+
"sigs.k8s.io/controller-runtime/pkg/manager"
4957
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
5058
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
5159
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
60+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
5261
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
5362
extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
5463
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -57,7 +66,8 @@ import (
5766
)
5867

5968
const (
60-
port = runserver.DefaultGrpcPort
69+
port = runserver.DefaultGrpcPort
70+
metricsPort = 8888
6171
)
6272

6373
var (
@@ -76,6 +86,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
7686
wantHeaders []*configPb.HeaderValueOption
7787
wantMetadata *structpb.Struct
7888
wantBody []byte
89+
wantMetrics string
7990
wantErr bool
8091
immediateResponse *extProcPb.ImmediateResponse
8192
}{
@@ -113,7 +124,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
113124
},
114125
wantMetadata: makeMetadata("address-1:8000"),
115126
wantBody: []byte("{\"max_tokens\":100,\"model\":\"my-model-12345\",\"prompt\":\"test1\",\"temperature\":0}"),
116-
wantErr: false,
127+
wantMetrics: `
128+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
129+
# TYPE inference_model_request_total counter
130+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
131+
`,
132+
wantErr: false,
117133
},
118134
{
119135
name: "select active lora, low queue",
@@ -161,7 +177,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
161177
},
162178
wantMetadata: makeMetadata("address-1:8000"),
163179
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test2\",\"temperature\":0}"),
164-
wantErr: false,
180+
wantMetrics: `
181+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
182+
# TYPE inference_model_request_total counter
183+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
184+
`,
185+
wantErr: false,
165186
},
166187
{
167188
name: "select no lora despite active model, avoid excessive queue size",
@@ -210,7 +231,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
210231
},
211232
wantMetadata: makeMetadata("address-2:8000"),
212233
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test3\",\"temperature\":0}"),
213-
wantErr: false,
234+
wantMetrics: `
235+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
236+
# TYPE inference_model_request_total counter
237+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
238+
`,
239+
wantErr: false,
214240
},
215241
{
216242
name: "noncritical and all models past threshold, shed request",
@@ -253,6 +279,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
253279
Code: envoyTypePb.StatusCode_TooManyRequests,
254280
},
255281
},
282+
wantMetrics: "",
256283
},
257284
{
258285
name: "noncritical, but one server has capacity, do not shed",
@@ -301,7 +328,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
301328
},
302329
wantMetadata: makeMetadata("address-0:8000"),
303330
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg3\",\"prompt\":\"test5\",\"temperature\":0}"),
304-
wantErr: false,
331+
wantMetrics: `
332+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
333+
# TYPE inference_model_request_total counter
334+
inference_model_request_total{model_name="sql-lora-sheddable",target_model_name="sql-lora-1fdg3"} 1
335+
`,
336+
wantErr: false,
305337
},
306338
}
307339

@@ -345,6 +377,14 @@ func TestKubeInferenceModelRequest(t *testing.T) {
345377
if diff := cmp.Diff(want, res, protocmp.Transform()); diff != "" {
346378
t.Errorf("Unexpected response, (-want +got): %v", diff)
347379
}
380+
381+
if test.wantMetrics != "" {
382+
if err := metricsutils.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.wantMetrics), "inference_model_request_total"); err != nil {
383+
t.Error(err)
384+
}
385+
}
386+
387+
legacyregistry.Reset()
348388
})
349389
}
350390
}
@@ -423,6 +463,10 @@ func BeforeSuit(t *testing.T) func() {
423463
logutil.Fatal(logger, err, "Failed to create controller manager")
424464
}
425465

466+
if err := registerMetricsHandler(mgr, metricsPort); err != nil {
467+
logutil.Fatal(logger, err, "Failed to register metrics handler")
468+
}
469+
426470
serverRunner = runserver.NewDefaultExtProcServerRunner()
427471
// Adjust from defaults
428472
serverRunner.PoolName = "vllm-llama2-7b-pool"
@@ -543,3 +587,31 @@ func makeMetadata(endpoint string) *structpb.Struct {
543587
},
544588
}
545589
}
590+
591+
// registerMetricsHandler is a simplified version of metrics endpoint handler
592+
// without Authentication for integration tests.
593+
func registerMetricsHandler(mgr manager.Manager, port int) error {
594+
metrics.Register()
595+
596+
// Init HTTP server.
597+
h := promhttp.HandlerFor(
598+
legacyregistry.DefaultGatherer,
599+
promhttp.HandlerOpts{},
600+
)
601+
602+
mux := http.NewServeMux()
603+
mux.Handle("/metrics", h)
604+
605+
srv := &http.Server{
606+
Addr: net.JoinHostPort("", strconv.Itoa(port)),
607+
Handler: mux,
608+
}
609+
610+
if err := mgr.Add(&manager.Server{
611+
Name: "metrics",
612+
Server: srv,
613+
}); err != nil {
614+
return err
615+
}
616+
return nil
617+
}

0 commit comments

Comments
 (0)