Skip to content

[Metrics] Add metrics validation in integration test #413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 77 additions & 5 deletions test/integration/hermetic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,20 @@ import (
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"time"

configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/stretchr/testify/assert"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
Expand All @@ -43,12 +48,16 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/component-base/metrics/legacyregistry"
metricsutils "k8s.io/component-base/metrics/testutil"
ctrl "sigs.k8s.io/controller-runtime"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/envtest"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
Expand All @@ -57,7 +66,8 @@ import (
)

const (
port = runserver.DefaultGrpcPort
port = runserver.DefaultGrpcPort
metricsPort = 8888
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Can we define a DefaultMetricsPort as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default port 9090 used by metrics is already in use in the integration test so I picked a random port..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally we should test the same metrics endpoint as in production. Can you add a TODO here to follow up on setting up correct auth in integration test and test the "real" metrics endpoint?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I add a comment in #326 (comment) to make sure we don't close the issue until it is improved.

)

var (
Expand All @@ -76,6 +86,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
wantHeaders []*configPb.HeaderValueOption
wantMetadata *structpb.Struct
wantBody []byte
wantMetrics string
wantErr bool
immediateResponse *extProcPb.ImmediateResponse
}{
Expand Down Expand Up @@ -113,7 +124,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
},
wantMetadata: makeMetadata("address-1:8000"),
wantBody: []byte("{\"max_tokens\":100,\"model\":\"my-model-12345\",\"prompt\":\"test1\",\"temperature\":0}"),
wantErr: false,
wantMetrics: `
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
# TYPE inference_model_request_total counter
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
`,
wantErr: false,
},
{
name: "select active lora, low queue",
Expand Down Expand Up @@ -161,7 +177,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
},
wantMetadata: makeMetadata("address-1:8000"),
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test2\",\"temperature\":0}"),
wantErr: false,
wantMetrics: `
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
# TYPE inference_model_request_total counter
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
`,
wantErr: false,
},
{
name: "select no lora despite active model, avoid excessive queue size",
Expand Down Expand Up @@ -210,7 +231,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
},
wantMetadata: makeMetadata("address-2:8000"),
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test3\",\"temperature\":0}"),
wantErr: false,
wantMetrics: `
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
# TYPE inference_model_request_total counter
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
`,
wantErr: false,
},
{
name: "noncritical and all models past threshold, shed request",
Expand Down Expand Up @@ -253,6 +279,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
Code: envoyTypePb.StatusCode_TooManyRequests,
},
},
wantMetrics: "",
},
{
name: "noncritical, but one server has capacity, do not shed",
Expand Down Expand Up @@ -301,7 +328,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
},
wantMetadata: makeMetadata("address-0:8000"),
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg3\",\"prompt\":\"test5\",\"temperature\":0}"),
wantErr: false,
wantMetrics: `
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
# TYPE inference_model_request_total counter
inference_model_request_total{model_name="sql-lora-sheddable",target_model_name="sql-lora-1fdg3"} 1
`,
wantErr: false,
},
}

Expand Down Expand Up @@ -345,6 +377,14 @@ func TestKubeInferenceModelRequest(t *testing.T) {
if diff := cmp.Diff(want, res, protocmp.Transform()); diff != "" {
t.Errorf("Unexpected response, (-want +got): %v", diff)
}

if test.wantMetrics != "" {
if err := metricsutils.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.wantMetrics), "inference_model_request_total"); err != nil {
t.Error(err)
}
}

legacyregistry.Reset()
})
}
}
Expand Down Expand Up @@ -423,6 +463,10 @@ func BeforeSuit(t *testing.T) func() {
logutil.Fatal(logger, err, "Failed to create controller manager")
}

if err := registerMetricsHandler(mgr, metricsPort); err != nil {
logutil.Fatal(logger, err, "Failed to register metrics handler")
}

serverRunner = runserver.NewDefaultExtProcServerRunner()
// Adjust from defaults
serverRunner.PoolName = "vllm-llama2-7b-pool"
Expand Down Expand Up @@ -543,3 +587,31 @@ func makeMetadata(endpoint string) *structpb.Struct {
},
}
}

// registerMetricsHandler is a simplified version of metrics endpoint handler
// without Authentication for integration tests.
func registerMetricsHandler(mgr manager.Manager, port int) error {
metrics.Register()

// Init HTTP server.
h := promhttp.HandlerFor(
legacyregistry.DefaultGatherer,
promhttp.HandlerOpts{},
)

mux := http.NewServeMux()
mux.Handle("/metrics", h)

srv := &http.Server{
Addr: net.JoinHostPort("", strconv.Itoa(port)),
Handler: mux,
}

if err := mgr.Add(&manager.Server{
Name: "metrics",
Server: srv,
}); err != nil {
return err
}
return nil
}