From 9832ec2cedd8467d19ce5e197fa61df0aa41d002 Mon Sep 17 00:00:00 2001 From: Ondrej Kupka Date: Wed, 19 Feb 2025 17:35:54 +0100 Subject: [PATCH] Rename pkg/ext-proc to pkg/epp --- Dockerfile | 10 ++-- cmd/{ext-proc => epp}/health.go | 4 +- cmd/{ext-proc => epp}/main.go | 12 ++--- docs/dev.md | 2 +- .../003-endpoint-picker-protocol/README.md | 2 +- pkg/{ext-proc => epp}/backend/fake.go | 4 +- pkg/{ext-proc => epp}/backend/provider.go | 6 +-- .../backend/provider_test.go | 2 +- pkg/{ext-proc => epp}/backend/vllm/metrics.go | 4 +- .../backend/vllm/metrics_test.go | 4 +- .../controller/inferencemodel_reconciler.go | 4 +- .../inferencemodel_reconciler_test.go | 4 +- .../controller/inferencepool_reconciler.go | 4 +- .../inferencepool_reconciler_test.go | 4 +- .../controller/pod_reconciler.go | 4 +- .../controller/pod_reconciler_test.go | 2 +- pkg/{ext-proc => epp}/datastore/datastore.go | 2 +- .../datastore/datastore_test.go | 2 +- pkg/{ext-proc => epp}/datastore/types.go | 0 pkg/{ext-proc => epp}/handlers/request.go | 8 +-- pkg/{ext-proc => epp}/handlers/response.go | 4 +- .../handlers/response_test.go | 2 +- pkg/{ext-proc => epp}/handlers/server.go | 11 ++-- pkg/{ext-proc => epp}/metrics/README.md | 0 pkg/{ext-proc => epp}/metrics/metrics.go | 2 +- pkg/{ext-proc => epp}/metrics/metrics_test.go | 51 ++++++++++--------- .../metrics/testdata/input_tokens_metric | 0 .../metrics/testdata/kv_cache_avg_metrics | 0 .../metrics/testdata/output_tokens_metric | 0 .../metrics/testdata/queue_avg_size_metrics | 0 .../testdata/request_duration_seconds_metric | 0 .../testdata/request_error_total_metric | 0 .../metrics/testdata/request_sizes_metric | 0 .../metrics/testdata/request_total_metric | 0 .../metrics/testdata/response_sizes_metric | 0 pkg/{ext-proc => epp}/scheduling/filter.go | 4 +- .../scheduling/filter_test.go | 4 +- pkg/{ext-proc => epp}/scheduling/scheduler.go | 9 ++-- pkg/{ext-proc => epp}/scheduling/types.go | 0 pkg/{ext-proc => epp}/server/runserver.go | 10 ++-- .../server/runserver_test.go | 4 +- .../test/benchmark/benchmark.go | 8 +-- pkg/{ext-proc => epp}/test/utils.go | 12 ++--- pkg/{ext-proc => epp}/util/error/error.go | 0 pkg/{ext-proc => epp}/util/logging/fatal.go | 0 pkg/{ext-proc => epp}/util/logging/logger.go | 0 .../util/logging/logging_const.go | 0 .../util/testing/wrappers.go | 0 test/integration/hermetic_test.go | 12 ++--- tools/dashboards/README.md | 3 +- tools/dashboards/inference_gateway.json | 2 +- 51 files changed, 112 insertions(+), 110 deletions(-) rename cmd/{ext-proc => epp}/health.go (91%) rename cmd/{ext-proc => epp}/main.go (95%) rename pkg/{ext-proc => epp}/backend/fake.go (90%) rename pkg/{ext-proc => epp}/backend/provider.go (95%) rename pkg/{ext-proc => epp}/backend/provider_test.go (98%) rename pkg/{ext-proc => epp}/backend/vllm/metrics.go (97%) rename pkg/{ext-proc => epp}/backend/vllm/metrics_test.go (97%) rename pkg/{ext-proc => epp}/controller/inferencemodel_reconciler.go (95%) rename pkg/{ext-proc => epp}/controller/inferencemodel_reconciler_test.go (98%) rename pkg/{ext-proc => epp}/controller/inferencepool_reconciler.go (96%) rename pkg/{ext-proc => epp}/controller/inferencepool_reconciler_test.go (97%) rename pkg/{ext-proc => epp}/controller/pod_reconciler.go (95%) rename pkg/{ext-proc => epp}/controller/pod_reconciler_test.go (99%) rename pkg/{ext-proc => epp}/datastore/datastore.go (98%) rename pkg/{ext-proc => epp}/datastore/datastore_test.go (97%) rename pkg/{ext-proc => epp}/datastore/types.go (100%) rename pkg/{ext-proc => epp}/handlers/request.go (95%) rename pkg/{ext-proc => epp}/handlers/response.go (97%) rename pkg/{ext-proc => epp}/handlers/response_test.go (97%) rename pkg/{ext-proc => epp}/handlers/server.go (95%) rename pkg/{ext-proc => epp}/metrics/README.md (100%) rename pkg/{ext-proc => epp}/metrics/metrics.go (98%) rename pkg/{ext-proc => epp}/metrics/metrics_test.go (93%) rename pkg/{ext-proc => epp}/metrics/testdata/input_tokens_metric (100%) rename pkg/{ext-proc => epp}/metrics/testdata/kv_cache_avg_metrics (100%) rename pkg/{ext-proc => epp}/metrics/testdata/output_tokens_metric (100%) rename pkg/{ext-proc => epp}/metrics/testdata/queue_avg_size_metrics (100%) rename pkg/{ext-proc => epp}/metrics/testdata/request_duration_seconds_metric (100%) rename pkg/{ext-proc => epp}/metrics/testdata/request_error_total_metric (100%) rename pkg/{ext-proc => epp}/metrics/testdata/request_sizes_metric (100%) rename pkg/{ext-proc => epp}/metrics/testdata/request_total_metric (100%) rename pkg/{ext-proc => epp}/metrics/testdata/response_sizes_metric (100%) rename pkg/{ext-proc => epp}/scheduling/filter.go (98%) rename pkg/{ext-proc => epp}/scheduling/filter_test.go (98%) rename pkg/{ext-proc => epp}/scheduling/scheduler.go (94%) rename pkg/{ext-proc => epp}/scheduling/types.go (100%) rename pkg/{ext-proc => epp}/server/runserver.go (95%) rename pkg/{ext-proc => epp}/server/runserver_test.go (87%) rename pkg/{ext-proc => epp}/test/benchmark/benchmark.go (93%) rename pkg/{ext-proc => epp}/test/utils.go (88%) rename pkg/{ext-proc => epp}/util/error/error.go (100%) rename pkg/{ext-proc => epp}/util/logging/fatal.go (100%) rename pkg/{ext-proc => epp}/util/logging/logger.go (100%) rename pkg/{ext-proc => epp}/util/logging/logging_const.go (100%) rename pkg/{ext-proc => epp}/util/testing/wrappers.go (100%) diff --git a/Dockerfile b/Dockerfile index 5d6f08a5..4adc82e4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ ARG BUILDER_IMAGE=golang:1.23-alpine ARG BASE_IMAGE=gcr.io/distroless/base-debian10 ## Multistage build -FROM ${BUILDER_IMAGE} as builder +FROM ${BUILDER_IMAGE} AS builder ENV CGO_ENABLED=0 ENV GOOS=linux ENV GOARCH=amd64 @@ -19,13 +19,13 @@ COPY cmd ./cmd COPY pkg ./pkg COPY internal ./internal COPY api ./api -WORKDIR /src/cmd/ext-proc -RUN go build -o /ext-proc +WORKDIR /src/cmd/epp +RUN go build -o /epp ## Multistage deploy FROM ${BASE_IMAGE} WORKDIR / -COPY --from=builder /ext-proc /ext-proc +COPY --from=builder /epp /epp -ENTRYPOINT ["/ext-proc"] \ No newline at end of file +ENTRYPOINT ["/epp"] diff --git a/cmd/ext-proc/health.go b/cmd/epp/health.go similarity index 91% rename from cmd/ext-proc/health.go rename to cmd/epp/health.go index 26a58df8..335c0849 100644 --- a/cmd/ext-proc/health.go +++ b/cmd/epp/health.go @@ -23,8 +23,8 @@ import ( "google.golang.org/grpc/codes" healthPb "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/grpc/status" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type healthServer struct { diff --git a/cmd/ext-proc/main.go b/cmd/epp/main.go similarity index 95% rename from cmd/ext-proc/main.go rename to cmd/epp/main.go index 047a1fa7..a189984b 100644 --- a/cmd/ext-proc/main.go +++ b/cmd/epp/main.go @@ -41,12 +41,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/metrics/filters" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend/vllm" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics" - runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/vllm" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" + runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/docs/dev.md b/docs/dev.md index 2af39668..d223ed6a 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -37,7 +37,7 @@ const( ) ``` -The guidelines are written in the context of a k8s controller. Our [ext-proc](../pkg/ext-proc/) does more things such as handling requests and scraping metrics, therefore we adapt the guidelines as follows: +The guidelines are written in the context of a k8s controller. Our [epp](../pkg/epp/) does more things such as handling requests and scraping metrics, therefore we adapt the guidelines as follows: 1. The server startup process and configuration. diff --git a/docs/proposals/003-endpoint-picker-protocol/README.md b/docs/proposals/003-endpoint-picker-protocol/README.md index 8e96a630..6876135d 100644 --- a/docs/proposals/003-endpoint-picker-protocol/README.md +++ b/docs/proposals/003-endpoint-picker-protocol/README.md @@ -2,7 +2,7 @@ The Endpoint Picker, or EPP, is a core component of the inference extension. Ultimately it's responsible for picking an endpoint from the `InferencePool`. A reference implementation can be -found [here](../../../pkg/ext-proc/). +found [here](../../../pkg/epp/). ## Proxy Protocol diff --git a/pkg/ext-proc/backend/fake.go b/pkg/epp/backend/fake.go similarity index 90% rename from pkg/ext-proc/backend/fake.go rename to pkg/epp/backend/fake.go index 2de34c16..e81b3817 100644 --- a/pkg/ext-proc/backend/fake.go +++ b/pkg/epp/backend/fake.go @@ -22,8 +22,8 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type FakePodMetricsClient struct { diff --git a/pkg/ext-proc/backend/provider.go b/pkg/epp/backend/provider.go similarity index 95% rename from pkg/ext-proc/backend/provider.go rename to pkg/epp/backend/provider.go index 974319f7..a12f84d5 100644 --- a/pkg/ext-proc/backend/provider.go +++ b/pkg/epp/backend/provider.go @@ -25,9 +25,9 @@ import ( "github.com/go-logr/logr" "go.uber.org/multierr" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/backend/provider_test.go b/pkg/epp/backend/provider_test.go similarity index 98% rename from pkg/ext-proc/backend/provider_test.go rename to pkg/epp/backend/provider_test.go index 7736dd8d..1e11afe2 100644 --- a/pkg/ext-proc/backend/provider_test.go +++ b/pkg/epp/backend/provider_test.go @@ -27,7 +27,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" ) var ( diff --git a/pkg/ext-proc/backend/vllm/metrics.go b/pkg/epp/backend/vllm/metrics.go similarity index 97% rename from pkg/ext-proc/backend/vllm/metrics.go rename to pkg/epp/backend/vllm/metrics.go index 59a132c8..8648e24c 100644 --- a/pkg/ext-proc/backend/vllm/metrics.go +++ b/pkg/epp/backend/vllm/metrics.go @@ -30,8 +30,8 @@ import ( "github.com/prometheus/common/expfmt" "go.uber.org/multierr" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/backend/vllm/metrics_test.go b/pkg/epp/backend/vllm/metrics_test.go similarity index 97% rename from pkg/ext-proc/backend/vllm/metrics_test.go rename to pkg/epp/backend/vllm/metrics_test.go index 1c9d5448..12aac1a1 100644 --- a/pkg/ext-proc/backend/vllm/metrics_test.go +++ b/pkg/epp/backend/vllm/metrics_test.go @@ -23,8 +23,8 @@ import ( dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" "google.golang.org/protobuf/proto" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestPromToPodMetrics(t *testing.T) { diff --git a/pkg/ext-proc/controller/inferencemodel_reconciler.go b/pkg/epp/controller/inferencemodel_reconciler.go similarity index 95% rename from pkg/ext-proc/controller/inferencemodel_reconciler.go rename to pkg/epp/controller/inferencemodel_reconciler.go index cca05fce..99a1eb26 100644 --- a/pkg/ext-proc/controller/inferencemodel_reconciler.go +++ b/pkg/epp/controller/inferencemodel_reconciler.go @@ -28,8 +28,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type InferenceModelReconciler struct { diff --git a/pkg/ext-proc/controller/inferencemodel_reconciler_test.go b/pkg/epp/controller/inferencemodel_reconciler_test.go similarity index 98% rename from pkg/ext-proc/controller/inferencemodel_reconciler_test.go rename to pkg/epp/controller/inferencemodel_reconciler_test.go index 583f5f75..cf94b168 100644 --- a/pkg/ext-proc/controller/inferencemodel_reconciler_test.go +++ b/pkg/epp/controller/inferencemodel_reconciler_test.go @@ -29,8 +29,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) var ( diff --git a/pkg/ext-proc/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go similarity index 96% rename from pkg/ext-proc/controller/inferencepool_reconciler.go rename to pkg/epp/controller/inferencepool_reconciler.go index b2cd01c0..f2c56991 100644 --- a/pkg/ext-proc/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -28,8 +28,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // InferencePoolReconciler utilizes the controller runtime to reconcile Instance Gateway resources diff --git a/pkg/ext-proc/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go similarity index 97% rename from pkg/ext-proc/controller/inferencepool_reconciler_test.go rename to pkg/epp/controller/inferencepool_reconciler_test.go index 925cb236..6263fa16 100644 --- a/pkg/ext-proc/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -31,8 +31,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) var ( diff --git a/pkg/ext-proc/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go similarity index 95% rename from pkg/ext-proc/controller/pod_reconciler.go rename to pkg/epp/controller/pod_reconciler.go index 871e1da5..5b0c25c9 100644 --- a/pkg/ext-proc/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -28,8 +28,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type PodReconciler struct { diff --git a/pkg/ext-proc/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go similarity index 99% rename from pkg/ext-proc/controller/pod_reconciler_test.go rename to pkg/epp/controller/pod_reconciler_test.go index c87ee54d..b3869113 100644 --- a/pkg/ext-proc/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -32,7 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" ) var ( diff --git a/pkg/ext-proc/datastore/datastore.go b/pkg/epp/datastore/datastore.go similarity index 98% rename from pkg/ext-proc/datastore/datastore.go rename to pkg/epp/datastore/datastore.go index 60236496..eecea59c 100644 --- a/pkg/ext-proc/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -29,7 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // The datastore is a local cache of relevant data for the given InferencePool (currently all pulled from k8s-api) diff --git a/pkg/ext-proc/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go similarity index 97% rename from pkg/ext-proc/datastore/datastore_test.go rename to pkg/epp/datastore/datastore_test.go index f32d8d77..bd5c5020 100644 --- a/pkg/ext-proc/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -21,7 +21,7 @@ import ( v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestHasSynced(t *testing.T) { diff --git a/pkg/ext-proc/datastore/types.go b/pkg/epp/datastore/types.go similarity index 100% rename from pkg/ext-proc/datastore/types.go rename to pkg/epp/datastore/types.go diff --git a/pkg/ext-proc/handlers/request.go b/pkg/epp/handlers/request.go similarity index 95% rename from pkg/ext-proc/handlers/request.go rename to pkg/epp/handlers/request.go index 34db206d..b9ffd0b0 100644 --- a/pkg/ext-proc/handlers/request.go +++ b/pkg/epp/handlers/request.go @@ -26,10 +26,10 @@ import ( extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "google.golang.org/protobuf/types/known/structpb" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // HandleRequestBody handles body of the request to the backend server, such as parsing the "model" diff --git a/pkg/ext-proc/handlers/response.go b/pkg/epp/handlers/response.go similarity index 97% rename from pkg/ext-proc/handlers/response.go rename to pkg/epp/handlers/response.go index ed3082c5..f9396acf 100644 --- a/pkg/ext-proc/handlers/response.go +++ b/pkg/epp/handlers/response.go @@ -24,8 +24,8 @@ import ( configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "sigs.k8s.io/controller-runtime/pkg/log" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // HandleResponseHeaders processes response headers from the backend model server. diff --git a/pkg/ext-proc/handlers/response_test.go b/pkg/epp/handlers/response_test.go similarity index 97% rename from pkg/ext-proc/handlers/response_test.go rename to pkg/epp/handlers/response_test.go index dbb7e700..01f02d09 100644 --- a/pkg/ext-proc/handlers/response_test.go +++ b/pkg/epp/handlers/response_test.go @@ -22,7 +22,7 @@ import ( extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "github.com/google/go-cmp/cmp" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/handlers/server.go b/pkg/epp/handlers/server.go similarity index 95% rename from pkg/ext-proc/handlers/server.go rename to pkg/epp/handlers/server.go index 506eaa97..2c61118c 100644 --- a/pkg/ext-proc/handlers/server.go +++ b/pkg/epp/handlers/server.go @@ -27,11 +27,11 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func NewServer(scheduler Scheduler, targetEndpointKey string, datastore datastore.Datastore) *Server { @@ -185,7 +185,6 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error { return status.Errorf(codes.Unknown, "failed to send response back to Envoy: %v", err) } } - } // RequestContext stores context information during the life time of an HTTP request. diff --git a/pkg/ext-proc/metrics/README.md b/pkg/epp/metrics/README.md similarity index 100% rename from pkg/ext-proc/metrics/README.md rename to pkg/epp/metrics/README.md diff --git a/pkg/ext-proc/metrics/metrics.go b/pkg/epp/metrics/metrics.go similarity index 98% rename from pkg/ext-proc/metrics/metrics.go rename to pkg/epp/metrics/metrics.go index cc21d531..e86ca901 100644 --- a/pkg/ext-proc/metrics/metrics.go +++ b/pkg/epp/metrics/metrics.go @@ -24,7 +24,7 @@ import ( compbasemetrics "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" "sigs.k8s.io/controller-runtime/pkg/log" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/metrics/metrics_test.go b/pkg/epp/metrics/metrics_test.go similarity index 93% rename from pkg/ext-proc/metrics/metrics_test.go rename to pkg/epp/metrics/metrics_test.go index 2e891066..c2436bab 100644 --- a/pkg/ext-proc/metrics/metrics_test.go +++ b/pkg/epp/metrics/metrics_test.go @@ -24,8 +24,8 @@ import ( "k8s.io/component-base/metrics/legacyregistry" "k8s.io/component-base/metrics/testutil" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( @@ -119,31 +119,32 @@ func TestRecordRequestErrorCounter(t *testing.T) { name string reqs []requests invalid bool - }{{ - name: "multiple requests", - reqs: []requests{ - { - modelName: "m10", - targetModelName: "t10", - error: errutil.Internal, - }, - { - modelName: "m10", - targetModelName: "t10", - error: errutil.Internal, - }, - { - modelName: "m10", - targetModelName: "t11", - error: errutil.ModelServerError, - }, - { - modelName: "m20", - targetModelName: "t20", - error: errutil.InferencePoolResourceExhausted, + }{ + { + name: "multiple requests", + reqs: []requests{ + { + modelName: "m10", + targetModelName: "t10", + error: errutil.Internal, + }, + { + modelName: "m10", + targetModelName: "t10", + error: errutil.Internal, + }, + { + modelName: "m10", + targetModelName: "t11", + error: errutil.ModelServerError, + }, + { + modelName: "m20", + targetModelName: "t20", + error: errutil.InferencePoolResourceExhausted, + }, }, }, - }, } Register() for _, scenario := range scenarios { diff --git a/pkg/ext-proc/metrics/testdata/input_tokens_metric b/pkg/epp/metrics/testdata/input_tokens_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/input_tokens_metric rename to pkg/epp/metrics/testdata/input_tokens_metric diff --git a/pkg/ext-proc/metrics/testdata/kv_cache_avg_metrics b/pkg/epp/metrics/testdata/kv_cache_avg_metrics similarity index 100% rename from pkg/ext-proc/metrics/testdata/kv_cache_avg_metrics rename to pkg/epp/metrics/testdata/kv_cache_avg_metrics diff --git a/pkg/ext-proc/metrics/testdata/output_tokens_metric b/pkg/epp/metrics/testdata/output_tokens_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/output_tokens_metric rename to pkg/epp/metrics/testdata/output_tokens_metric diff --git a/pkg/ext-proc/metrics/testdata/queue_avg_size_metrics b/pkg/epp/metrics/testdata/queue_avg_size_metrics similarity index 100% rename from pkg/ext-proc/metrics/testdata/queue_avg_size_metrics rename to pkg/epp/metrics/testdata/queue_avg_size_metrics diff --git a/pkg/ext-proc/metrics/testdata/request_duration_seconds_metric b/pkg/epp/metrics/testdata/request_duration_seconds_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_duration_seconds_metric rename to pkg/epp/metrics/testdata/request_duration_seconds_metric diff --git a/pkg/ext-proc/metrics/testdata/request_error_total_metric b/pkg/epp/metrics/testdata/request_error_total_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_error_total_metric rename to pkg/epp/metrics/testdata/request_error_total_metric diff --git a/pkg/ext-proc/metrics/testdata/request_sizes_metric b/pkg/epp/metrics/testdata/request_sizes_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_sizes_metric rename to pkg/epp/metrics/testdata/request_sizes_metric diff --git a/pkg/ext-proc/metrics/testdata/request_total_metric b/pkg/epp/metrics/testdata/request_total_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_total_metric rename to pkg/epp/metrics/testdata/request_total_metric diff --git a/pkg/ext-proc/metrics/testdata/response_sizes_metric b/pkg/epp/metrics/testdata/response_sizes_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/response_sizes_metric rename to pkg/epp/metrics/testdata/response_sizes_metric diff --git a/pkg/ext-proc/scheduling/filter.go b/pkg/epp/scheduling/filter.go similarity index 98% rename from pkg/ext-proc/scheduling/filter.go rename to pkg/epp/scheduling/filter.go index 36691a73..b7881468 100644 --- a/pkg/ext-proc/scheduling/filter.go +++ b/pkg/epp/scheduling/filter.go @@ -21,8 +21,8 @@ import ( "math" "github.com/go-logr/logr" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type Filter interface { diff --git a/pkg/ext-proc/scheduling/filter_test.go b/pkg/epp/scheduling/filter_test.go similarity index 98% rename from pkg/ext-proc/scheduling/filter_test.go rename to pkg/epp/scheduling/filter_test.go index 01909fea..ac765b78 100644 --- a/pkg/ext-proc/scheduling/filter_test.go +++ b/pkg/epp/scheduling/filter_test.go @@ -23,8 +23,8 @@ import ( "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestFilter(t *testing.T) { diff --git a/pkg/ext-proc/scheduling/scheduler.go b/pkg/epp/scheduling/scheduler.go similarity index 94% rename from pkg/ext-proc/scheduling/scheduler.go rename to pkg/epp/scheduling/scheduler.go index b5f2f4f2..a969948e 100644 --- a/pkg/ext-proc/scheduling/scheduler.go +++ b/pkg/epp/scheduling/scheduler.go @@ -24,9 +24,9 @@ import ( "github.com/go-logr/logr" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( @@ -102,7 +102,8 @@ var ( filter: func(logger logr.Logger, req *LLMRequest, pods []*datastore.PodMetrics) ([]*datastore.PodMetrics, error) { logger.V(logutil.DEFAULT).Info("Request dropped", "request", req) return []*datastore.PodMetrics{}, errutil.Error{ - Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources"} + Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources", + } }, }, } diff --git a/pkg/ext-proc/scheduling/types.go b/pkg/epp/scheduling/types.go similarity index 100% rename from pkg/ext-proc/scheduling/types.go rename to pkg/epp/scheduling/types.go diff --git a/pkg/ext-proc/server/runserver.go b/pkg/epp/server/runserver.go similarity index 95% rename from pkg/ext-proc/server/runserver.go rename to pkg/epp/server/runserver.go index 795b242d..92b7be7f 100644 --- a/pkg/ext-proc/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -36,11 +36,11 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/controller" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/controller" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" ) // ExtProcServerRunner provides methods to manage an external process server. diff --git a/pkg/ext-proc/server/runserver_test.go b/pkg/epp/server/runserver_test.go similarity index 87% rename from pkg/ext-proc/server/runserver_test.go rename to pkg/epp/server/runserver_test.go index 438dc096..b02688c5 100644 --- a/pkg/ext-proc/server/runserver_test.go +++ b/pkg/epp/server/runserver_test.go @@ -21,8 +21,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestRunnable(t *testing.T) { diff --git a/pkg/ext-proc/test/benchmark/benchmark.go b/pkg/epp/test/benchmark/benchmark.go similarity index 93% rename from pkg/ext-proc/test/benchmark/benchmark.go rename to pkg/epp/test/benchmark/benchmark.go index dc06a27a..10987b47 100644 --- a/pkg/ext-proc/test/benchmark/benchmark.go +++ b/pkg/epp/test/benchmark/benchmark.go @@ -32,10 +32,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/test" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) var ( diff --git a/pkg/ext-proc/test/utils.go b/pkg/epp/test/utils.go similarity index 88% rename from pkg/ext-proc/test/utils.go rename to pkg/epp/test/utils.go index ef83c932..f82084d9 100644 --- a/pkg/ext-proc/test/utils.go +++ b/pkg/epp/test/utils.go @@ -30,12 +30,12 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" - utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) func StartExtProc( diff --git a/pkg/ext-proc/util/error/error.go b/pkg/epp/util/error/error.go similarity index 100% rename from pkg/ext-proc/util/error/error.go rename to pkg/epp/util/error/error.go diff --git a/pkg/ext-proc/util/logging/fatal.go b/pkg/epp/util/logging/fatal.go similarity index 100% rename from pkg/ext-proc/util/logging/fatal.go rename to pkg/epp/util/logging/fatal.go diff --git a/pkg/ext-proc/util/logging/logger.go b/pkg/epp/util/logging/logger.go similarity index 100% rename from pkg/ext-proc/util/logging/logger.go rename to pkg/epp/util/logging/logger.go diff --git a/pkg/ext-proc/util/logging/logging_const.go b/pkg/epp/util/logging/logging_const.go similarity index 100% rename from pkg/ext-proc/util/logging/logging_const.go rename to pkg/epp/util/logging/logging_const.go diff --git a/pkg/ext-proc/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go similarity index 100% rename from pkg/ext-proc/util/testing/wrappers.go rename to pkg/epp/util/testing/wrappers.go diff --git a/test/integration/hermetic_test.go b/test/integration/hermetic_test.go index 18efe7bf..eb2ca40e 100644 --- a/test/integration/hermetic_test.go +++ b/test/integration/hermetic_test.go @@ -47,12 +47,12 @@ import ( k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/test" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" - utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" "sigs.k8s.io/yaml" ) diff --git a/tools/dashboards/README.md b/tools/dashboards/README.md index c8258b63..7be2a5b8 100644 --- a/tools/dashboards/README.md +++ b/tools/dashboards/README.md @@ -4,7 +4,7 @@ This documentation provides instructions for setting up grafana dashboards to se ## Requirements -Please follow [metrics](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/ext-proc/metrics) page to configure the proxy to enable all metrics. +Please follow [metrics](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/epp/metrics) page to configure the proxy to enable all metrics. ## Load Inference Extension dashboard into Grafana @@ -21,6 +21,7 @@ If you run the inferece gateway with [Google Managed Prometheus](https://cloud.g Please configure the `scrape_interval` of your prometheus configuration to lower than `15s`, `rate` function returns empty string if data falls too apart. See https://www.robustperception.io/what-range-should-i-use-with-rate/ for more details. Example: + ``` global: scrape_interval: 5s diff --git a/tools/dashboards/inference_gateway.json b/tools/dashboards/inference_gateway.json index 3af66703..4e872739 100644 --- a/tools/dashboards/inference_gateway.json +++ b/tools/dashboards/inference_gateway.json @@ -39,7 +39,7 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "# Inferece Gateway Dashboard\n\nPlease see https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/ext-proc/metrics for more details of underlying metrics used in the dashboard.", + "content": "# Inferece Gateway Dashboard\n\nPlease see https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/epp/metrics for more details of underlying metrics used in the dashboard.", "mode": "markdown" }, "pluginVersion": "11.5.0",