From 868a8617f573ffe95cae340e49cd75330542d1eb Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 9 Oct 2024 17:43:06 +0000 Subject: [PATCH 1/8] moving all yaml to default namespace --- examples/poc/manifests/installation.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml index 57ecd185..d6620c20 100644 --- a/examples/poc/manifests/installation.yaml +++ b/examples/poc/manifests/installation.yaml @@ -8,7 +8,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1 kind: EnvoyProxy metadata: name: llm-route-envoy-config - namespace: llm-gateway + namespace: default spec: provider: type: Kubernetes @@ -103,14 +103,14 @@ spec: group: gateway.envoyproxy.io kind: EnvoyProxy name: llm-route-envoy-config - namespace: llm-gateway + namespace: default --- apiVersion: apps/v1 kind: Deployment metadata: name: llm-route-ext-proc - namespace: llm-gateway + namespace: default labels: app: llm-route-ext-proc spec: @@ -144,7 +144,7 @@ apiVersion: v1 kind: Service metadata: name: llm-route-ext-proc - namespace: llm-gateway + namespace: default spec: selector: app: llm-route-ext-proc From 9fa80a96c5d8bef89396948dc8ca8c1229bcf018 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 9 Oct 2024 17:55:30 +0000 Subject: [PATCH 2/8] adding new files --- .../poc/manifests/enable_patch_policy.yaml | 25 +++++ examples/poc/manifests/installation.yaml | 94 ------------------- examples/poc/manifests/patch_policy.yaml | 78 +++++++++++++++ 3 files changed, 103 insertions(+), 94 deletions(-) create mode 100644 examples/poc/manifests/enable_patch_policy.yaml create mode 100644 examples/poc/manifests/patch_policy.yaml diff --git a/examples/poc/manifests/enable_patch_policy.yaml b/examples/poc/manifests/enable_patch_policy.yaml new file mode 100644 index 00000000..a3bc6160 --- /dev/null +++ b/examples/poc/manifests/enable_patch_policy.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: envoy-gateway-config + namespace: envoy-gateway-system +data: +# This manifest's main purpose is to set `enabledEnvoyPatchPolicy` to `true`. +# Any field under `admin` is optional, and only for enabling the admin endpoints, for debugging. +# Admin Interface: https://www.envoyproxy.io/docs/envoy/latest/operations/admin +# PatchPolicy docs: https://gateway.envoyproxy.io/docs/tasks/extensibility/envoy-patch-policy/#enable-envoypatchpolicy + envoy-gateway.yaml: | + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: EnvoyGateway + provider: + type: Kubernetes + gateway: + controllerName: gateway.envoyproxy.io/gatewayclass-controller + extensionApis: + enableEnvoyPatchPolicy: true + admin: + enablePprof: true + address: + host: 127.0.0.1 + port: 19000 + enabledDumpConfig: true diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml index d6620c20..0fa0d308 100644 --- a/examples/poc/manifests/installation.yaml +++ b/examples/poc/manifests/installation.yaml @@ -1,97 +1,3 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: llm-gateway - ---- -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: EnvoyProxy -metadata: - name: llm-route-envoy-config - namespace: default -spec: - provider: - type: Kubernetes - kubernetes: - envoyService: - patch: - type: StrategicMerge - value: - spec: - ports: - - name: http-8081 - port: 8081 - protocol: TCP - targetPort: 8081 - bootstrap: - type: Merge - value: | - static_resources: - listeners: - - name: listener_0 - address: - socket_address: - address: 0.0.0.0 - port_value: 8081 - filter_chains: - - filters: - - name: envoy.filters.network.http_connection_manager - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager - stat_prefix: http - codec_type: AUTO - route_config: - name: local_route - virtual_hosts: - - name: backend - domains: ["*"] - routes: - - match: - prefix: "/" - route: - cluster: original_destination_cluster - timeout: 1000s # Increase route timeout - http_filters: - - name: envoy.filters.http.ext_proc - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor - failure_mode_allow: false - grpc_service: - envoy_grpc: - cluster_name: ext_proc_cluster - processing_mode: - request_header_mode: "SEND" - response_header_mode: "SEND" - request_body_mode: "BUFFERED" - response_body_mode: "NONE" - request_trailer_mode: "SKIP" - response_trailer_mode: "SKIP" - - name: envoy.filters.http.router - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - clusters: - - name: original_destination_cluster - type: ORIGINAL_DST - original_dst_lb_config: - use_http_header: true - http_header_name: "target-pod" - connect_timeout: 6s - lb_policy: CLUSTER_PROVIDED - dns_lookup_family: V4_ONLY - - name: ext_proc_cluster - connect_timeout: 1000s - type: LOGICAL_DNS - http2_protocol_options: {} - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: ext_proc_cluster - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: llm-route-ext-proc.llm-gateway.svc.cluster.local - port_value: 9002 --- apiVersion: gateway.networking.k8s.io/v1 kind: GatewayClass diff --git a/examples/poc/manifests/patch_policy.yaml b/examples/poc/manifests/patch_policy.yaml new file mode 100644 index 00000000..1ff0b74d --- /dev/null +++ b/examples/poc/manifests/patch_policy.yaml @@ -0,0 +1,78 @@ +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyPatchPolicy +metadata: + name: custom-response-patch-policy + namespace: default +spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: inference-gateway + type: JSONPatch + jsonPatches: + # Necessary to create a cluster of the type: ORIGINAL_DST to allow for + # direct pod scheduling. Which is heavily utilized in our scheduling. + # Specifically the field `original_dst_lb_config` allows us to enable + # `use_http_header` and `http_header_name`. + # Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto + - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster" + name: original_destination_cluster + operation: + op: add + path: "" + value: + name: original_destination_cluster + type: ORIGINAL_DST + original_dst_lb_config: + use_http_header: true + http_header_name: "target-pod" + connect_timeout: 6s + lb_policy: CLUSTER_PROVIDED + dns_lookup_family: V4_ONLY + + # The listener is required to route requests to the original destination + # cluster we just made. + - type: "type.googleapis.com/envoy.config.listener.v3.Listener" + # The listener name is of the form // + name: default/inference-gateway/http + operation: + op: add + path: "/filter_chains" + value: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: http + codec_type: AUTO + route_config: + name: local_route + virtual_hosts: + - name: backend + domains: ["*"] + routes: + - match: + prefix: "/" + route: + cluster: original_destination_cluster + timeout: 10s + http_filters: + - name: envoy.filters.http.ext_proc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + failure_mode_allow: false + grpc_service: + envoy_grpc: + # This is the cluster name as created by the EnvoyExtensionPolicy + # Name is of the form /// + cluster_name: envoyextensionpolicy/default/ext-proc-policy/0 + processing_mode: + request_header_mode: "SEND" + response_header_mode: "SEND" + request_body_mode: "BUFFERED" + response_body_mode: "NONE" + request_trailer_mode: "SKIP" + response_trailer_mode: "SKIP" + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router \ No newline at end of file From c3571bd4812dd67ececadcc56b8fb22cebc787de Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 9 Oct 2024 20:13:37 +0000 Subject: [PATCH 3/8] small updates --- examples/poc/README.md | 2 +- examples/poc/manifests/installation.yaml | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/examples/poc/README.md b/examples/poc/README.md index 6c1cbdc9..96ee1f16 100644 --- a/examples/poc/README.md +++ b/examples/poc/README.md @@ -25,7 +25,7 @@ This project sets up an Envoy gateway with a custom external processing which im ``` 2. **Install GatewayClass with Ext Proc** - A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed into the `llm-gateway` namespace. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyProxy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used. + A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyExtensionPolicy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used. NOTE: Ensure the `llm-route-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods. diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml index 0fa0d308..f1795d0b 100644 --- a/examples/poc/manifests/installation.yaml +++ b/examples/poc/manifests/installation.yaml @@ -10,7 +10,28 @@ spec: kind: EnvoyProxy name: llm-route-envoy-config namespace: default - +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyExtensionPolicy +metadata: + name: ext-proc-policy + namespace: default +spec: + extProc: + - backendRefs: + - group: "" + kind: Service + name: grpc-server-service + port: 9002 + processingMode: + request: + body: Buffered + response: + messageTimeout: 5s + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: inference-gateway --- apiVersion: apps/v1 kind: Deployment From c46a4966ed447890b79063887fc84ae5eead41a8 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 9 Oct 2024 21:13:39 +0000 Subject: [PATCH 4/8] Update PoC to support Envoy best Practices --- examples/poc/README.md | 30 +++++--- .../{ => gateway}/enable_patch_policy.yaml | 12 ++-- .../ext_proc.yaml} | 71 ++++++++----------- .../{samples => gateway}/gateway.yaml | 8 +++ .../manifests/{ => gateway}/patch_policy.yaml | 0 .../vllm-lora-deployment.yaml | 0 .../{samples => vllm}/vllm-lora-service.yaml | 0 7 files changed, 64 insertions(+), 57 deletions(-) rename examples/poc/manifests/{ => gateway}/enable_patch_policy.yaml (84%) rename examples/poc/manifests/{installation.yaml => gateway/ext_proc.yaml} (68%) rename examples/poc/manifests/{samples => gateway}/gateway.yaml (53%) rename examples/poc/manifests/{ => gateway}/patch_policy.yaml (100%) rename examples/poc/manifests/{samples => vllm}/vllm-lora-deployment.yaml (100%) rename examples/poc/manifests/{samples => vllm}/vllm-lora-service.yaml (100%) diff --git a/examples/poc/README.md b/examples/poc/README.md index 96ee1f16..460ba1da 100644 --- a/examples/poc/README.md +++ b/examples/poc/README.md @@ -17,29 +17,41 @@ This project sets up an Envoy gateway with a custom external processing which im ### Steps 1. **Deploy Sample vLLM Application** + NOTE: Create a HuggingFace API token and store it in a secret named `hf-token` with key `token`. This is configured in the `HUGGING_FACE_HUB_TOKEN` and `HF_TOKEN` environment variables in `./manifests/samples/vllm-lora-deployment.yaml`. ```bash - kubectl apply -f ./manifests/samples/vllm-lora-deployment.yaml - kubectl apply -f ./manifests/samples/vllm-lora-service.yaml + kubectl apply -f ./manifests/vllm/vllm-lora-deployment.yaml + kubectl apply -f ./manifests/vllm/vllm-lora-service.yaml ``` -2. **Install GatewayClass with Ext Proc** - A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyExtensionPolicy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used. +1. **Update Envoy Gateway Config to enable Patch Policy** + + Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run: + ```bash + kubectl apply -f ./manifests/gateway/enable_patch_policy.yaml + kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system - NOTE: Ensure the `llm-route-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods. + ``` + Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again. + + +1. **Deploy Gateway** ```bash - kubectl apply -f ./manifests/installation.yaml + kubectl apply -f ./manifests/gateway/gateway.yaml ``` -3. **Deploy Gateway** +1. **Deploy Ext-Proc** ```bash - kubectl apply -f ./manifests/samples/gateway.yaml + kubectl apply -f ./manifests/gateway/ext_proc.yaml + kubectl apply -f ./manifests/gateway/patch_policy.yaml ``` + **NOTE**: Ensure the `instance-gateway-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods. + +1. **Try it out** -4. **Try it out** Wait until the gateway is ready. ```bash diff --git a/examples/poc/manifests/enable_patch_policy.yaml b/examples/poc/manifests/gateway/enable_patch_policy.yaml similarity index 84% rename from examples/poc/manifests/enable_patch_policy.yaml rename to examples/poc/manifests/gateway/enable_patch_policy.yaml index a3bc6160..2b72697b 100644 --- a/examples/poc/manifests/enable_patch_policy.yaml +++ b/examples/poc/manifests/gateway/enable_patch_policy.yaml @@ -17,9 +17,9 @@ data: controllerName: gateway.envoyproxy.io/gatewayclass-controller extensionApis: enableEnvoyPatchPolicy: true - admin: - enablePprof: true - address: - host: 127.0.0.1 - port: 19000 - enabledDumpConfig: true +# admin: +# enablePprof: true +# address: +# host: 127.0.0.1 +# port: 19000 +# enabledDumpConfig: true diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/gateway/ext_proc.yaml similarity index 68% rename from examples/poc/manifests/installation.yaml rename to examples/poc/manifests/gateway/ext_proc.yaml index f1795d0b..ff956640 100644 --- a/examples/poc/manifests/installation.yaml +++ b/examples/poc/manifests/gateway/ext_proc.yaml @@ -1,57 +1,22 @@ ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: GatewayClass -metadata: - name: llm-gateway -spec: - controllerName: gateway.envoyproxy.io/gatewayclass-controller - parametersRef: - group: gateway.envoyproxy.io - kind: EnvoyProxy - name: llm-route-envoy-config - namespace: default ---- -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: EnvoyExtensionPolicy -metadata: - name: ext-proc-policy - namespace: default -spec: - extProc: - - backendRefs: - - group: "" - kind: Service - name: grpc-server-service - port: 9002 - processingMode: - request: - body: Buffered - response: - messageTimeout: 5s - targetRef: - group: gateway.networking.k8s.io - kind: Gateway - name: inference-gateway ---- apiVersion: apps/v1 kind: Deployment metadata: - name: llm-route-ext-proc + name: instance-gateway-ext-proc namespace: default labels: - app: llm-route-ext-proc + app: instance-gateway-ext-proc spec: replicas: 1 selector: matchLabels: - app: llm-route-ext-proc + app: instance-gateway-ext-proc template: metadata: labels: - app: llm-route-ext-proc + app: instance-gateway-ext-proc spec: containers: - - name: llm-route-ext-proc + - name: instance-gateway-ext-proc image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo args: #TODO: specify label selector and dynamically update pods @@ -70,13 +35,35 @@ spec: apiVersion: v1 kind: Service metadata: - name: llm-route-ext-proc + name: instance-gateway-ext-proc namespace: default spec: selector: - app: llm-route-ext-proc + app: instance-gateway-ext-proc ports: - protocol: TCP port: 9002 targetPort: 9002 type: ClusterIP +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyExtensionPolicy +metadata: + name: ext-proc-policy + namespace: default +spec: + extProc: + - backendRefs: + - group: "" + kind: Service + name: instance-gateway-ext-proc + port: 9002 + processingMode: + request: + body: Buffered + response: + messageTimeout: 5s + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: llm-gateway \ No newline at end of file diff --git a/examples/poc/manifests/samples/gateway.yaml b/examples/poc/manifests/gateway/gateway.yaml similarity index 53% rename from examples/poc/manifests/samples/gateway.yaml rename to examples/poc/manifests/gateway/gateway.yaml index 0f3f1803..e98f1065 100644 --- a/examples/poc/manifests/samples/gateway.yaml +++ b/examples/poc/manifests/gateway/gateway.yaml @@ -10,3 +10,11 @@ spec: - name: http protocol: HTTP port: 8080 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + name: llm-gateway +spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller +--- \ No newline at end of file diff --git a/examples/poc/manifests/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml similarity index 100% rename from examples/poc/manifests/patch_policy.yaml rename to examples/poc/manifests/gateway/patch_policy.yaml diff --git a/examples/poc/manifests/samples/vllm-lora-deployment.yaml b/examples/poc/manifests/vllm/vllm-lora-deployment.yaml similarity index 100% rename from examples/poc/manifests/samples/vllm-lora-deployment.yaml rename to examples/poc/manifests/vllm/vllm-lora-deployment.yaml diff --git a/examples/poc/manifests/samples/vllm-lora-service.yaml b/examples/poc/manifests/vllm/vllm-lora-service.yaml similarity index 100% rename from examples/poc/manifests/samples/vllm-lora-service.yaml rename to examples/poc/manifests/vllm/vllm-lora-service.yaml From e98db98e41c867566c1d08d79ca97be1cbc75983 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Thu, 10 Oct 2024 21:07:44 +0000 Subject: [PATCH 5/8] moving llm-gw ext-proc port to 8081 --- examples/poc/manifests/gateway/gateway.yaml | 9 ++++++--- examples/poc/manifests/gateway/patch_policy.yaml | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/poc/manifests/gateway/gateway.yaml b/examples/poc/manifests/gateway/gateway.yaml index e98f1065..b7cdf3ff 100644 --- a/examples/poc/manifests/gateway/gateway.yaml +++ b/examples/poc/manifests/gateway/gateway.yaml @@ -3,18 +3,21 @@ apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: - name: llm-gateway + name: spec: - gatewayClassName: llm-gateway + gatewayClassName: listeners: - name: http protocol: HTTP port: 8080 + - name: llm-gw + protocol: HTTP + port: 8081 --- apiVersion: gateway.networking.k8s.io/v1 kind: GatewayClass metadata: - name: llm-gateway + name: spec: controllerName: gateway.envoyproxy.io/gatewayclass-controller --- \ No newline at end of file diff --git a/examples/poc/manifests/gateway/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml index 1ff0b74d..b45d2afa 100644 --- a/examples/poc/manifests/gateway/patch_policy.yaml +++ b/examples/poc/manifests/gateway/patch_policy.yaml @@ -7,7 +7,7 @@ spec: targetRef: group: gateway.networking.k8s.io kind: Gateway - name: inference-gateway + name: type: JSONPatch jsonPatches: # Necessary to create a cluster of the type: ORIGINAL_DST to allow for @@ -34,7 +34,7 @@ spec: # cluster we just made. - type: "type.googleapis.com/envoy.config.listener.v3.Listener" # The listener name is of the form // - name: default/inference-gateway/http + name: default//llm-gw operation: op: add path: "/filter_chains" From 234a0ac773727cda7a0dafb343ec40a257d5a952 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Mon, 14 Oct 2024 15:31:25 +0000 Subject: [PATCH 6/8] Update envoy to patch an HTTPRoute virtual host. Also adding the manifests to the top level Ext-Proc implementation --- .../gateway/enable_patch_policy.yaml | 15 ++-- examples/poc/manifests/gateway/ext_proc.yaml | 4 +- examples/poc/manifests/gateway/gateway.yaml | 26 ++++++- .../poc/manifests/gateway/patch_policy.yaml | 48 ++----------- pkg/README.md | 48 +++++++++++++ pkg/manifests/enable_patch_policy.yaml | 26 +++++++ pkg/manifests/ext_proc.yaml | 69 +++++++++++++++++++ pkg/manifests/gateway.yaml | 47 +++++++++++++ pkg/manifests/patch_policy.yaml | 38 ++++++++++ pkg/placeholder.md | 0 10 files changed, 267 insertions(+), 54 deletions(-) create mode 100644 pkg/README.md create mode 100644 pkg/manifests/enable_patch_policy.yaml create mode 100644 pkg/manifests/ext_proc.yaml create mode 100644 pkg/manifests/gateway.yaml create mode 100644 pkg/manifests/patch_policy.yaml delete mode 100644 pkg/placeholder.md diff --git a/examples/poc/manifests/gateway/enable_patch_policy.yaml b/examples/poc/manifests/gateway/enable_patch_policy.yaml index 2b72697b..c1d00e9a 100644 --- a/examples/poc/manifests/gateway/enable_patch_policy.yaml +++ b/examples/poc/manifests/gateway/enable_patch_policy.yaml @@ -16,10 +16,11 @@ data: gateway: controllerName: gateway.envoyproxy.io/gatewayclass-controller extensionApis: - enableEnvoyPatchPolicy: true -# admin: -# enablePprof: true -# address: -# host: 127.0.0.1 -# port: 19000 -# enabledDumpConfig: true + enableEnvoyPatchPolicy: true + enableBackend: true +# admin: +# enablePprof: true +# address: +# host: 127.0.0.1 +# port: 19000 +# enabledDumpConfig: true diff --git a/examples/poc/manifests/gateway/ext_proc.yaml b/examples/poc/manifests/gateway/ext_proc.yaml index ff956640..6112fa9e 100644 --- a/examples/poc/manifests/gateway/ext_proc.yaml +++ b/examples/poc/manifests/gateway/ext_proc.yaml @@ -65,5 +65,5 @@ spec: messageTimeout: 5s targetRef: group: gateway.networking.k8s.io - kind: Gateway - name: llm-gateway \ No newline at end of file + kind: HTTPRoute + name: llm-route \ No newline at end of file diff --git a/examples/poc/manifests/gateway/gateway.yaml b/examples/poc/manifests/gateway/gateway.yaml index b7cdf3ff..b964f911 100644 --- a/examples/poc/manifests/gateway/gateway.yaml +++ b/examples/poc/manifests/gateway/gateway.yaml @@ -20,4 +20,28 @@ metadata: name: spec: controllerName: gateway.envoyproxy.io/gatewayclass-controller ---- \ No newline at end of file +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: Backend +metadata: + name: backend-dummy +spec: + endpoints: + - fqdn: + # Both these values are arbitrary and unused as the PatchPolicy redirects requests. + hostname: 'foo.bar.com' + port: 8080 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: llm-route +spec: + parentRefs: + - name: inference-gateway + sectionName: llm-gw + rules: + - backendRefs: + - group: gateway.envoyproxy.io + kind: Backend + name: backend-dummy \ No newline at end of file diff --git a/examples/poc/manifests/gateway/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml index b45d2afa..b7681954 100644 --- a/examples/poc/manifests/gateway/patch_policy.yaml +++ b/examples/poc/manifests/gateway/patch_policy.yaml @@ -30,49 +30,9 @@ spec: lb_policy: CLUSTER_PROVIDED dns_lookup_family: V4_ONLY - # The listener is required to route requests to the original destination - # cluster we just made. - - type: "type.googleapis.com/envoy.config.listener.v3.Listener" - # The listener name is of the form // + - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration" name: default//llm-gw operation: - op: add - path: "/filter_chains" - value: - - filters: - - name: envoy.filters.network.http_connection_manager - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager - stat_prefix: http - codec_type: AUTO - route_config: - name: local_route - virtual_hosts: - - name: backend - domains: ["*"] - routes: - - match: - prefix: "/" - route: - cluster: original_destination_cluster - timeout: 10s - http_filters: - - name: envoy.filters.http.ext_proc - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor - failure_mode_allow: false - grpc_service: - envoy_grpc: - # This is the cluster name as created by the EnvoyExtensionPolicy - # Name is of the form /// - cluster_name: envoyextensionpolicy/default/ext-proc-policy/0 - processing_mode: - request_header_mode: "SEND" - response_header_mode: "SEND" - request_body_mode: "BUFFERED" - response_body_mode: "NONE" - request_trailer_mode: "SKIP" - response_trailer_mode: "SKIP" - - name: envoy.filters.http.router - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router \ No newline at end of file + op: replace + path: "/virtual_hosts/1/routes/0/route/cluster" + value: original_destination_cluster \ No newline at end of file diff --git a/pkg/README.md b/pkg/README.md new file mode 100644 index 00000000..a89d5489 --- /dev/null +++ b/pkg/README.md @@ -0,0 +1,48 @@ +## Quickstart + +### Steps + +1. **Deploy Sample vLLM Application** + + A sample vLLM deployment with the proper protocol to work with LLM Instance Gateway can be found [here](https://github.com/kubernetes-sigs/llm-instance-gateway/blob/6f9869d6595d2d0f8e6febcbec0f348cb44a3012/examples/poc/manifests/samples/vllm-lora-deployment.yaml#L18). + +1. **Update Envoy Gateway Config to enable Patch Policy** + + Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run: + ```bash + kubectl apply -f ./manifests/gateway/enable_patch_policy.yaml + kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system + + ``` + Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again. + + +1. **Deploy Gateway** + + ```bash + kubectl apply -f ./manifests/gateway/gateway.yaml + ``` + +1. **Deploy Ext-Proc** + + ```bash + kubectl apply -f ./manifests/gateway/ext_proc.yaml + kubectl apply -f ./manifests/gateway/patch_policy.yaml + ``` + **NOTE**: Ensure the `instance-gateway-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods. + +1. **Try it out** + + Wait until the gateway is ready. + + ```bash + IP=$(kubectl get gateway/llm-gateway -o jsonpath='{.status.addresses[0].value}') + PORT=8081 + + curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{ + "model": "tweet-summary", + "prompt": "Write as if you were a critic: San Francisco", + "max_tokens": 100, + "temperature": 0 + }' + ``` \ No newline at end of file diff --git a/pkg/manifests/enable_patch_policy.yaml b/pkg/manifests/enable_patch_policy.yaml new file mode 100644 index 00000000..c1d00e9a --- /dev/null +++ b/pkg/manifests/enable_patch_policy.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: envoy-gateway-config + namespace: envoy-gateway-system +data: +# This manifest's main purpose is to set `enabledEnvoyPatchPolicy` to `true`. +# Any field under `admin` is optional, and only for enabling the admin endpoints, for debugging. +# Admin Interface: https://www.envoyproxy.io/docs/envoy/latest/operations/admin +# PatchPolicy docs: https://gateway.envoyproxy.io/docs/tasks/extensibility/envoy-patch-policy/#enable-envoypatchpolicy + envoy-gateway.yaml: | + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: EnvoyGateway + provider: + type: Kubernetes + gateway: + controllerName: gateway.envoyproxy.io/gatewayclass-controller + extensionApis: + enableEnvoyPatchPolicy: true + enableBackend: true +# admin: +# enablePprof: true +# address: +# host: 127.0.0.1 +# port: 19000 +# enabledDumpConfig: true diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml new file mode 100644 index 00000000..6112fa9e --- /dev/null +++ b/pkg/manifests/ext_proc.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: instance-gateway-ext-proc + namespace: default + labels: + app: instance-gateway-ext-proc +spec: + replicas: 1 + selector: + matchLabels: + app: instance-gateway-ext-proc + template: + metadata: + labels: + app: instance-gateway-ext-proc + spec: + containers: + - name: instance-gateway-ext-proc + image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo + args: + #TODO: specify label selector and dynamically update pods + - -pods + - "vllm-78665f78c4-h4kx4,vllm-78665f78c4-hnz84" + - -podIPs + - "10.24.11.6:8000,10.24.5.7:8000" + - -enable-fairness + - "false" + ports: + - containerPort: 9002 + - name: curl + image: curlimages/curl + command: ["sleep", "3600"] +--- +apiVersion: v1 +kind: Service +metadata: + name: instance-gateway-ext-proc + namespace: default +spec: + selector: + app: instance-gateway-ext-proc + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + type: ClusterIP +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyExtensionPolicy +metadata: + name: ext-proc-policy + namespace: default +spec: + extProc: + - backendRefs: + - group: "" + kind: Service + name: instance-gateway-ext-proc + port: 9002 + processingMode: + request: + body: Buffered + response: + messageTimeout: 5s + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: llm-route \ No newline at end of file diff --git a/pkg/manifests/gateway.yaml b/pkg/manifests/gateway.yaml new file mode 100644 index 00000000..b964f911 --- /dev/null +++ b/pkg/manifests/gateway.yaml @@ -0,0 +1,47 @@ + +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: +spec: + gatewayClassName: + listeners: + - name: http + protocol: HTTP + port: 8080 + - name: llm-gw + protocol: HTTP + port: 8081 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + name: +spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: Backend +metadata: + name: backend-dummy +spec: + endpoints: + - fqdn: + # Both these values are arbitrary and unused as the PatchPolicy redirects requests. + hostname: 'foo.bar.com' + port: 8080 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: llm-route +spec: + parentRefs: + - name: inference-gateway + sectionName: llm-gw + rules: + - backendRefs: + - group: gateway.envoyproxy.io + kind: Backend + name: backend-dummy \ No newline at end of file diff --git a/pkg/manifests/patch_policy.yaml b/pkg/manifests/patch_policy.yaml new file mode 100644 index 00000000..b7681954 --- /dev/null +++ b/pkg/manifests/patch_policy.yaml @@ -0,0 +1,38 @@ +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyPatchPolicy +metadata: + name: custom-response-patch-policy + namespace: default +spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: + type: JSONPatch + jsonPatches: + # Necessary to create a cluster of the type: ORIGINAL_DST to allow for + # direct pod scheduling. Which is heavily utilized in our scheduling. + # Specifically the field `original_dst_lb_config` allows us to enable + # `use_http_header` and `http_header_name`. + # Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto + - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster" + name: original_destination_cluster + operation: + op: add + path: "" + value: + name: original_destination_cluster + type: ORIGINAL_DST + original_dst_lb_config: + use_http_header: true + http_header_name: "target-pod" + connect_timeout: 6s + lb_policy: CLUSTER_PROVIDED + dns_lookup_family: V4_ONLY + + - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration" + name: default//llm-gw + operation: + op: replace + path: "/virtual_hosts/1/routes/0/route/cluster" + value: original_destination_cluster \ No newline at end of file diff --git a/pkg/placeholder.md b/pkg/placeholder.md deleted file mode 100644 index e69de29b..00000000 From cc9105fa7b76ecb2241af84d73839e37fabd8624 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Mon, 14 Oct 2024 15:31:57 +0000 Subject: [PATCH 7/8] Removing image ref so the most recent image is used --- pkg/manifests/ext_proc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index 6112fa9e..07babb8a 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: instance-gateway-ext-proc - image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo + image: args: #TODO: specify label selector and dynamically update pods - -pods From 32f050cd2844a61307a0ade3739acddcb52f1656 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 16 Oct 2024 20:30:47 +0000 Subject: [PATCH 8/8] Grammatical changes --- examples/poc/README.md | 2 +- pkg/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/poc/README.md b/examples/poc/README.md index 460ba1da..739084a9 100644 --- a/examples/poc/README.md +++ b/examples/poc/README.md @@ -33,7 +33,7 @@ This project sets up an Envoy gateway with a custom external processing which im kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system ``` - Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again. + Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again. 1. **Deploy Gateway** diff --git a/pkg/README.md b/pkg/README.md index a89d5489..eee9a68e 100644 --- a/pkg/README.md +++ b/pkg/README.md @@ -14,7 +14,7 @@ kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system ``` - Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again. + Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again. 1. **Deploy Gateway**