kubernetes-sigs · k8s-ci-robot · Oct 16, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
diff --git a/examples/poc/README.md b/examples/poc/README.md
@@ -17,29 +17,41 @@ This project sets up an Envoy gateway with a custom external processing which im
 ### Steps
 
 1. **Deploy Sample vLLM Application**
+
    NOTE: Create a HuggingFace API token and store it in a secret named `hf-token` with key `token`. This is configured in the `HUGGING_FACE_HUB_TOKEN` and `HF_TOKEN` environment variables in `./manifests/samples/vllm-lora-deployment.yaml`.
 
    ```bash
-   kubectl apply -f ./manifests/samples/vllm-lora-deployment.yaml
-   kubectl apply -f ./manifests/samples/vllm-lora-service.yaml
+   kubectl apply -f ./manifests/vllm/vllm-lora-deployment.yaml
+   kubectl apply -f ./manifests/vllm/vllm-lora-service.yaml
    ```
 
-2. **Install GatewayClass with Ext Proc**
-   A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed into the `llm-gateway` namespace. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyProxy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used.
+1. **Update Envoy Gateway Config to enable Patch Policy**
+
+   Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run:
+   ```bash
+   kubectl apply -f ./manifests/gateway/enable_patch_policy.yaml
+   kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
 
-   NOTE: Ensure the `llm-route-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods.
+   ```
+   Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again.
+
+
+1. **Deploy Gateway**
 
    ```bash
-   kubectl apply -f ./manifests/installation.yaml
+   kubectl apply -f ./manifests/gateway/gateway.yaml
    ```
 
-3. **Deploy Gateway**
+1. **Deploy Ext-Proc**
 
    ```bash
-   kubectl apply -f ./manifests/samples/gateway.yaml
+   kubectl apply -f ./manifests/gateway/ext_proc.yaml
+   kubectl apply -f ./manifests/gateway/patch_policy.yaml
    ```
+   **NOTE**: Ensure the `instance-gateway-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods.
+
+1. **Try it out**
 
-4. **Try it out**
    Wait until the gateway is ready.
 
    ```bash

diff --git a/examples/poc/manifests/gateway/enable_patch_policy.yaml b/examples/poc/manifests/gateway/enable_patch_policy.yaml
@@ -0,0 +1,25 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: envoy-gateway-config
+  namespace: envoy-gateway-system
+data:
+# This manifest's main purpose is to set `enabledEnvoyPatchPolicy` to `true`.
+# Any field under `admin` is optional, and only for enabling the admin endpoints, for debugging.
+# Admin Interface: https://www.envoyproxy.io/docs/envoy/latest/operations/admin
+# PatchPolicy docs: https://gateway.envoyproxy.io/docs/tasks/extensibility/envoy-patch-policy/#enable-envoypatchpolicy 
+  envoy-gateway.yaml: |
+    apiVersion: gateway.envoyproxy.io/v1alpha1
+    kind: EnvoyGateway
+    provider:
+      type: Kubernetes
+    gateway:
+      controllerName: gateway.envoyproxy.io/gatewayclass-controller
+    extensionApis:
+      enableEnvoyPatchPolicy: true
+#       admin:
+#        enablePprof: true
+#        address:
+#          host: 127.0.0.1
+#          port: 19000
+#       enabledDumpConfig: true
diff --git a/examples/poc/manifests/gateway/ext_proc.yaml b/examples/poc/manifests/gateway/ext_proc.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: instance-gateway-ext-proc
+  namespace: default
+  labels:
+    app: instance-gateway-ext-proc
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: instance-gateway-ext-proc
+  template:
+    metadata:
+      labels:
+        app: instance-gateway-ext-proc
+    spec:
+      containers:
+      - name: instance-gateway-ext-proc
+        image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo
+        args:
+        #TODO: specify label selector and dynamically update pods
+        - -pods
+        - "vllm-78665f78c4-h4kx4,vllm-78665f78c4-hnz84"
+        - -podIPs
+        - "10.24.11.6:8000,10.24.5.7:8000"
+        - -enable-fairness
+        - "false"
+        ports:
+        - containerPort: 9002
+      - name: curl
+        image: curlimages/curl
+        command: ["sleep", "3600"]
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: instance-gateway-ext-proc
+  namespace: default
+spec:
+  selector:
+    app: instance-gateway-ext-proc
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+  type: ClusterIP
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyExtensionPolicy
+metadata:
+  name: ext-proc-policy
+  namespace: default
+spec:
+  extProc:
+    - backendRefs:
+      - group: ""
+        kind: Service
+        name: instance-gateway-ext-proc
+        port: 9002
+      processingMode:
+        request:
+          body: Buffered
+        response:
+      messageTimeout: 5s
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: Gateway
+    name: llm-gateway
diff --git a/examples/poc/manifests/samples/gateway.yaml → examples/poc/manifests/gateway/gateway.yaml b/examples/poc/manifests/samples/gateway.yaml → examples/poc/manifests/gateway/gateway.yaml
@@ -10,3 +10,11 @@ spec:
     - name: http
       protocol: HTTP
       port: 8080
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  name: llm-gateway
+spec:
+  controllerName: gateway.envoyproxy.io/gatewayclass-controller
+---
diff --git a/examples/poc/manifests/gateway/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml
@@ -0,0 +1,78 @@
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyPatchPolicy
+metadata:
+  name: custom-response-patch-policy
+  namespace: default
+spec:
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  type: JSONPatch
+  jsonPatches:
+    # Necessary to create a cluster of the type: ORIGINAL_DST to allow for 
+    # direct pod scheduling. Which is heavily utilized in our scheduling.
+    # Specifically the field `original_dst_lb_config` allows us to enable
+    # `use_http_header` and `http_header_name`. 
+    # Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto
+    - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
+      name: original_destination_cluster
+      operation:
+        op: add
+        path: ""
+        value:
+          name: original_destination_cluster
+          type: ORIGINAL_DST
+          original_dst_lb_config:
+            use_http_header: true
+            http_header_name: "target-pod"
+          connect_timeout: 6s
+          lb_policy: CLUSTER_PROVIDED
+          dns_lookup_family: V4_ONLY
+
+    # The listener is required to route requests to the original destination
+    # cluster we just made.
+    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
+      # The listener name is of the form <GatewayNamespace>/<GatewayName>/<GatewayListenerName>
+      name: default/inference-gateway/http
+      operation:
+        op: add
+        path: "/filter_chains"
+        value:
+          - filters:
+            - name: envoy.filters.network.http_connection_manager
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                stat_prefix: http
+                codec_type: AUTO
+                route_config:
+                  name: local_route
+                  virtual_hosts:      
+                    - name: backend
+                      domains: ["*"]
+                      routes:
+                        - match:
+                            prefix: "/"
+                          route:  
+                            cluster: original_destination_cluster
+                            timeout: 10s
+                http_filters:
+                - name: envoy.filters.http.ext_proc
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                    failure_mode_allow: false
+                    grpc_service:
+                      envoy_grpc:
+                        # This is the cluster name as created by the EnvoyExtensionPolicy
+                        # Name is of the form <CRDKind>/<GatewayNamespace>/<ExtensionPolicyName>/<IndexOfBackend>
+                        cluster_name: envoyextensionpolicy/default/ext-proc-policy/0
+                    processing_mode:
+                      request_header_mode: "SEND"
+                      response_header_mode: "SEND"
+                      request_body_mode: "BUFFERED"
+                      response_body_mode: "NONE"
+                      request_trailer_mode: "SKIP"
+                      response_trailer_mode: "SKIP"
+                - name: envoy.filters.http.router
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml
diff --git a/...nifests/samples/vllm-lora-deployment.yaml → .../manifests/vllm/vllm-lora-deployment.yaml b/...nifests/samples/vllm-lora-deployment.yaml → .../manifests/vllm/vllm-lora-deployment.yaml
diff --git a/.../manifests/samples/vllm-lora-service.yaml → ...poc/manifests/vllm/vllm-lora-service.yaml b/.../manifests/samples/vllm-lora-service.yaml → ...poc/manifests/vllm/vllm-lora-service.yaml