From 868a8617f573ffe95cae340e49cd75330542d1eb Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Wed, 9 Oct 2024 17:43:06 +0000
Subject: [PATCH 1/8] moving all yaml to default namespace

---
 examples/poc/manifests/installation.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml
index 57ecd185..d6620c20 100644
--- a/examples/poc/manifests/installation.yaml
+++ b/examples/poc/manifests/installation.yaml
@@ -8,7 +8,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1
 kind: EnvoyProxy
 metadata:
   name: llm-route-envoy-config
-  namespace: llm-gateway
+  namespace: default
 spec:
   provider:
     type: Kubernetes
@@ -103,14 +103,14 @@ spec:
     group: gateway.envoyproxy.io
     kind: EnvoyProxy
     name: llm-route-envoy-config
-    namespace: llm-gateway
+    namespace: default
 
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: llm-route-ext-proc
-  namespace: llm-gateway
+  namespace: default
   labels:
     app: llm-route-ext-proc
 spec:
@@ -144,7 +144,7 @@ apiVersion: v1
 kind: Service
 metadata:
   name: llm-route-ext-proc
-  namespace: llm-gateway
+  namespace: default
 spec:
   selector:
     app: llm-route-ext-proc

From 9fa80a96c5d8bef89396948dc8ca8c1229bcf018 Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Wed, 9 Oct 2024 17:55:30 +0000
Subject: [PATCH 2/8] adding new files

---
 .../poc/manifests/enable_patch_policy.yaml    | 25 +++++
 examples/poc/manifests/installation.yaml      | 94 -------------------
 examples/poc/manifests/patch_policy.yaml      | 78 +++++++++++++++
 3 files changed, 103 insertions(+), 94 deletions(-)
 create mode 100644 examples/poc/manifests/enable_patch_policy.yaml
 create mode 100644 examples/poc/manifests/patch_policy.yaml

diff --git a/examples/poc/manifests/enable_patch_policy.yaml b/examples/poc/manifests/enable_patch_policy.yaml
new file mode 100644
index 00000000..a3bc6160
--- /dev/null
+++ b/examples/poc/manifests/enable_patch_policy.yaml
@@ -0,0 +1,25 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: envoy-gateway-config
+  namespace: envoy-gateway-system
+data:
+# This manifest's main purpose is to set `enabledEnvoyPatchPolicy` to `true`.
+# Any field under `admin` is optional, and only for enabling the admin endpoints, for debugging.
+# Admin Interface: https://www.envoyproxy.io/docs/envoy/latest/operations/admin
+# PatchPolicy docs: https://gateway.envoyproxy.io/docs/tasks/extensibility/envoy-patch-policy/#enable-envoypatchpolicy 
+  envoy-gateway.yaml: |
+    apiVersion: gateway.envoyproxy.io/v1alpha1
+    kind: EnvoyGateway
+    provider:
+      type: Kubernetes
+    gateway:
+      controllerName: gateway.envoyproxy.io/gatewayclass-controller
+    extensionApis:
+      enableEnvoyPatchPolicy: true
+    admin:
+      enablePprof: true
+      address:
+        host: 127.0.0.1
+        port: 19000
+      enabledDumpConfig: true
diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml
index d6620c20..0fa0d308 100644
--- a/examples/poc/manifests/installation.yaml
+++ b/examples/poc/manifests/installation.yaml
@@ -1,97 +1,3 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: llm-gateway
-
----
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: EnvoyProxy
-metadata:
-  name: llm-route-envoy-config
-  namespace: default
-spec:
-  provider:
-    type: Kubernetes
-    kubernetes:
-      envoyService:
-        patch:
-          type: StrategicMerge
-          value:
-            spec:
-              ports:
-                - name: http-8081
-                  port: 8081
-                  protocol: TCP
-                  targetPort: 8081
-  bootstrap:
-    type: Merge 
-    value: |
-      static_resources:
-        listeners:
-          - name: listener_0
-            address:
-              socket_address:
-                address: 0.0.0.0
-                port_value: 8081
-            filter_chains:
-              - filters:
-                  - name: envoy.filters.network.http_connection_manager
-                    typed_config:
-                      "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-                      stat_prefix: http
-                      codec_type: AUTO
-                      route_config:
-                        name: local_route
-                        virtual_hosts:      
-                          - name: backend
-                            domains: ["*"]
-                            routes:
-                              - match:
-                                  prefix: "/"
-                                route:  
-                                  cluster: original_destination_cluster
-                                  timeout: 1000s  # Increase route timeout
-                      http_filters:
-                        - name: envoy.filters.http.ext_proc
-                          typed_config:
-                            "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
-                            failure_mode_allow: false
-                            grpc_service:
-                              envoy_grpc:
-                                cluster_name: ext_proc_cluster
-                            processing_mode:
-                              request_header_mode: "SEND"
-                              response_header_mode: "SEND"
-                              request_body_mode: "BUFFERED"
-                              response_body_mode: "NONE"
-                              request_trailer_mode: "SKIP"
-                              response_trailer_mode: "SKIP"
-                        - name: envoy.filters.http.router
-                          typed_config:
-                            "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
-        clusters:
-          - name: original_destination_cluster
-            type: ORIGINAL_DST
-            original_dst_lb_config:
-              use_http_header: true
-              http_header_name: "target-pod"
-            connect_timeout: 6s
-            lb_policy: CLUSTER_PROVIDED
-            dns_lookup_family: V4_ONLY
-          - name: ext_proc_cluster
-            connect_timeout: 1000s
-            type: LOGICAL_DNS
-            http2_protocol_options: {}
-            lb_policy: ROUND_ROBIN
-            load_assignment:
-              cluster_name: ext_proc_cluster
-              endpoints:
-                - lb_endpoints:
-                    - endpoint:
-                        address:
-                          socket_address:
-                            address: llm-route-ext-proc.llm-gateway.svc.cluster.local
-                            port_value: 9002
 ---
 apiVersion: gateway.networking.k8s.io/v1
 kind: GatewayClass
diff --git a/examples/poc/manifests/patch_policy.yaml b/examples/poc/manifests/patch_policy.yaml
new file mode 100644
index 00000000..1ff0b74d
--- /dev/null
+++ b/examples/poc/manifests/patch_policy.yaml
@@ -0,0 +1,78 @@
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyPatchPolicy
+metadata:
+  name: custom-response-patch-policy
+  namespace: default
+spec:
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  type: JSONPatch
+  jsonPatches:
+    # Necessary to create a cluster of the type: ORIGINAL_DST to allow for 
+    # direct pod scheduling. Which is heavily utilized in our scheduling.
+    # Specifically the field `original_dst_lb_config` allows us to enable
+    # `use_http_header` and `http_header_name`. 
+    # Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto
+    - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
+      name: original_destination_cluster
+      operation:
+        op: add
+        path: ""
+        value:
+          name: original_destination_cluster
+          type: ORIGINAL_DST
+          original_dst_lb_config:
+            use_http_header: true
+            http_header_name: "target-pod"
+          connect_timeout: 6s
+          lb_policy: CLUSTER_PROVIDED
+          dns_lookup_family: V4_ONLY
+
+    # The listener is required to route requests to the original destination
+    # cluster we just made.
+    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
+      # The listener name is of the form <GatewayNamespace>/<GatewayName>/<GatewayListenerName>
+      name: default/inference-gateway/http
+      operation:
+        op: add
+        path: "/filter_chains"
+        value:
+          - filters:
+            - name: envoy.filters.network.http_connection_manager
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                stat_prefix: http
+                codec_type: AUTO
+                route_config:
+                  name: local_route
+                  virtual_hosts:      
+                    - name: backend
+                      domains: ["*"]
+                      routes:
+                        - match:
+                            prefix: "/"
+                          route:  
+                            cluster: original_destination_cluster
+                            timeout: 10s
+                http_filters:
+                - name: envoy.filters.http.ext_proc
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                    failure_mode_allow: false
+                    grpc_service:
+                      envoy_grpc:
+                        # This is the cluster name as created by the EnvoyExtensionPolicy
+                        # Name is of the form <CRDKind>/<GatewayNamespace>/<ExtensionPolicyName>/<IndexOfBackend>
+                        cluster_name: envoyextensionpolicy/default/ext-proc-policy/0
+                    processing_mode:
+                      request_header_mode: "SEND"
+                      response_header_mode: "SEND"
+                      request_body_mode: "BUFFERED"
+                      response_body_mode: "NONE"
+                      request_trailer_mode: "SKIP"
+                      response_trailer_mode: "SKIP"
+                - name: envoy.filters.http.router
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
\ No newline at end of file

From c3571bd4812dd67ececadcc56b8fb22cebc787de Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Wed, 9 Oct 2024 20:13:37 +0000
Subject: [PATCH 3/8] small updates

---
 examples/poc/README.md                   |  2 +-
 examples/poc/manifests/installation.yaml | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/examples/poc/README.md b/examples/poc/README.md
index 6c1cbdc9..96ee1f16 100644
--- a/examples/poc/README.md
+++ b/examples/poc/README.md
@@ -25,7 +25,7 @@ This project sets up an Envoy gateway with a custom external processing which im
    ```
 
 2. **Install GatewayClass with Ext Proc**
-   A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed into the `llm-gateway` namespace. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyProxy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used.
+   A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyExtensionPolicy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used.
 
    NOTE: Ensure the `llm-route-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods.
 
diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/installation.yaml
index 0fa0d308..f1795d0b 100644
--- a/examples/poc/manifests/installation.yaml
+++ b/examples/poc/manifests/installation.yaml
@@ -10,7 +10,28 @@ spec:
     kind: EnvoyProxy
     name: llm-route-envoy-config
     namespace: default
-
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyExtensionPolicy
+metadata:
+  name: ext-proc-policy
+  namespace: default
+spec:
+  extProc:
+    - backendRefs:
+      - group: ""
+        kind: Service
+        name: grpc-server-service
+        port: 9002
+      processingMode:
+        request:
+          body: Buffered
+        response:
+      messageTimeout: 5s
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
 ---
 apiVersion: apps/v1
 kind: Deployment

From c46a4966ed447890b79063887fc84ae5eead41a8 Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Wed, 9 Oct 2024 21:13:39 +0000
Subject: [PATCH 4/8] Update PoC to support Envoy best Practices

---
 examples/poc/README.md                        | 30 +++++---
 .../{ => gateway}/enable_patch_policy.yaml    | 12 ++--
 .../ext_proc.yaml}                            | 71 ++++++++-----------
 .../{samples => gateway}/gateway.yaml         |  8 +++
 .../manifests/{ => gateway}/patch_policy.yaml |  0
 .../vllm-lora-deployment.yaml                 |  0
 .../{samples => vllm}/vllm-lora-service.yaml  |  0
 7 files changed, 64 insertions(+), 57 deletions(-)
 rename examples/poc/manifests/{ => gateway}/enable_patch_policy.yaml (84%)
 rename examples/poc/manifests/{installation.yaml => gateway/ext_proc.yaml} (68%)
 rename examples/poc/manifests/{samples => gateway}/gateway.yaml (53%)
 rename examples/poc/manifests/{ => gateway}/patch_policy.yaml (100%)
 rename examples/poc/manifests/{samples => vllm}/vllm-lora-deployment.yaml (100%)
 rename examples/poc/manifests/{samples => vllm}/vllm-lora-service.yaml (100%)

diff --git a/examples/poc/README.md b/examples/poc/README.md
index 96ee1f16..460ba1da 100644
--- a/examples/poc/README.md
+++ b/examples/poc/README.md
@@ -17,29 +17,41 @@ This project sets up an Envoy gateway with a custom external processing which im
 ### Steps
 
 1. **Deploy Sample vLLM Application**
+
    NOTE: Create a HuggingFace API token and store it in a secret named `hf-token` with key `token`. This is configured in the `HUGGING_FACE_HUB_TOKEN` and `HF_TOKEN` environment variables in `./manifests/samples/vllm-lora-deployment.yaml`.
 
    ```bash
-   kubectl apply -f ./manifests/samples/vllm-lora-deployment.yaml
-   kubectl apply -f ./manifests/samples/vllm-lora-service.yaml
+   kubectl apply -f ./manifests/vllm/vllm-lora-deployment.yaml
+   kubectl apply -f ./manifests/vllm/vllm-lora-service.yaml
    ```
 
-2. **Install GatewayClass with Ext Proc**
-   A custom GatewayClass `llm-gateway` which is configured with the llm routing ext proc will be installed. It's configured to listen on port 8081 for traffic through ext-proc (in addition to the default 8080), see the `EnvoyExtensionPolicy` configuration in `installation.yaml`. When you create Gateways, make sure the `llm-gateway` GatewayClass is used.
+1. **Update Envoy Gateway Config to enable Patch Policy**
+
+   Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run:
+   ```bash
+   kubectl apply -f ./manifests/gateway/enable_patch_policy.yaml
+   kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
 
-   NOTE: Ensure the `llm-route-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods.
+   ```
+   Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again.
+
+
+1. **Deploy Gateway**
 
    ```bash
-   kubectl apply -f ./manifests/installation.yaml
+   kubectl apply -f ./manifests/gateway/gateway.yaml
    ```
 
-3. **Deploy Gateway**
+1. **Deploy Ext-Proc**
 
    ```bash
-   kubectl apply -f ./manifests/samples/gateway.yaml
+   kubectl apply -f ./manifests/gateway/ext_proc.yaml
+   kubectl apply -f ./manifests/gateway/patch_policy.yaml
    ```
+   **NOTE**: Ensure the `instance-gateway-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods.
+
+1. **Try it out**
 
-4. **Try it out**
    Wait until the gateway is ready.
 
    ```bash
diff --git a/examples/poc/manifests/enable_patch_policy.yaml b/examples/poc/manifests/gateway/enable_patch_policy.yaml
similarity index 84%
rename from examples/poc/manifests/enable_patch_policy.yaml
rename to examples/poc/manifests/gateway/enable_patch_policy.yaml
index a3bc6160..2b72697b 100644
--- a/examples/poc/manifests/enable_patch_policy.yaml
+++ b/examples/poc/manifests/gateway/enable_patch_policy.yaml
@@ -17,9 +17,9 @@ data:
       controllerName: gateway.envoyproxy.io/gatewayclass-controller
     extensionApis:
       enableEnvoyPatchPolicy: true
-    admin:
-      enablePprof: true
-      address:
-        host: 127.0.0.1
-        port: 19000
-      enabledDumpConfig: true
+#       admin:
+#        enablePprof: true
+#        address:
+#          host: 127.0.0.1
+#          port: 19000
+#       enabledDumpConfig: true
diff --git a/examples/poc/manifests/installation.yaml b/examples/poc/manifests/gateway/ext_proc.yaml
similarity index 68%
rename from examples/poc/manifests/installation.yaml
rename to examples/poc/manifests/gateway/ext_proc.yaml
index f1795d0b..ff956640 100644
--- a/examples/poc/manifests/installation.yaml
+++ b/examples/poc/manifests/gateway/ext_proc.yaml
@@ -1,57 +1,22 @@
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: GatewayClass
-metadata:
-  name: llm-gateway
-spec:
-  controllerName: gateway.envoyproxy.io/gatewayclass-controller
-  parametersRef:  
-    group: gateway.envoyproxy.io
-    kind: EnvoyProxy
-    name: llm-route-envoy-config
-    namespace: default
----
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: EnvoyExtensionPolicy
-metadata:
-  name: ext-proc-policy
-  namespace: default
-spec:
-  extProc:
-    - backendRefs:
-      - group: ""
-        kind: Service
-        name: grpc-server-service
-        port: 9002
-      processingMode:
-        request:
-          body: Buffered
-        response:
-      messageTimeout: 5s
-  targetRef:
-    group: gateway.networking.k8s.io
-    kind: Gateway
-    name: inference-gateway
----
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-route-ext-proc
+  name: instance-gateway-ext-proc
   namespace: default
   labels:
-    app: llm-route-ext-proc
+    app: instance-gateway-ext-proc
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: llm-route-ext-proc
+      app: instance-gateway-ext-proc
   template:
     metadata:
       labels:
-        app: llm-route-ext-proc
+        app: instance-gateway-ext-proc
     spec:
       containers:
-      - name: llm-route-ext-proc
+      - name: instance-gateway-ext-proc
         image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo
         args:
         #TODO: specify label selector and dynamically update pods
@@ -70,13 +35,35 @@ spec:
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-route-ext-proc
+  name: instance-gateway-ext-proc
   namespace: default
 spec:
   selector:
-    app: llm-route-ext-proc
+    app: instance-gateway-ext-proc
   ports:
     - protocol: TCP
       port: 9002
       targetPort: 9002
   type: ClusterIP
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyExtensionPolicy
+metadata:
+  name: ext-proc-policy
+  namespace: default
+spec:
+  extProc:
+    - backendRefs:
+      - group: ""
+        kind: Service
+        name: instance-gateway-ext-proc
+        port: 9002
+      processingMode:
+        request:
+          body: Buffered
+        response:
+      messageTimeout: 5s
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: Gateway
+    name: llm-gateway
\ No newline at end of file
diff --git a/examples/poc/manifests/samples/gateway.yaml b/examples/poc/manifests/gateway/gateway.yaml
similarity index 53%
rename from examples/poc/manifests/samples/gateway.yaml
rename to examples/poc/manifests/gateway/gateway.yaml
index 0f3f1803..e98f1065 100644
--- a/examples/poc/manifests/samples/gateway.yaml
+++ b/examples/poc/manifests/gateway/gateway.yaml
@@ -10,3 +10,11 @@ spec:
     - name: http
       protocol: HTTP
       port: 8080
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  name: llm-gateway
+spec:
+  controllerName: gateway.envoyproxy.io/gatewayclass-controller
+---
\ No newline at end of file
diff --git a/examples/poc/manifests/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml
similarity index 100%
rename from examples/poc/manifests/patch_policy.yaml
rename to examples/poc/manifests/gateway/patch_policy.yaml
diff --git a/examples/poc/manifests/samples/vllm-lora-deployment.yaml b/examples/poc/manifests/vllm/vllm-lora-deployment.yaml
similarity index 100%
rename from examples/poc/manifests/samples/vllm-lora-deployment.yaml
rename to examples/poc/manifests/vllm/vllm-lora-deployment.yaml
diff --git a/examples/poc/manifests/samples/vllm-lora-service.yaml b/examples/poc/manifests/vllm/vllm-lora-service.yaml
similarity index 100%
rename from examples/poc/manifests/samples/vllm-lora-service.yaml
rename to examples/poc/manifests/vllm/vllm-lora-service.yaml

From e98db98e41c867566c1d08d79ca97be1cbc75983 Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Thu, 10 Oct 2024 21:07:44 +0000
Subject: [PATCH 5/8] moving llm-gw ext-proc port to 8081

---
 examples/poc/manifests/gateway/gateway.yaml      | 9 ++++++---
 examples/poc/manifests/gateway/patch_policy.yaml | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/examples/poc/manifests/gateway/gateway.yaml b/examples/poc/manifests/gateway/gateway.yaml
index e98f1065..b7cdf3ff 100644
--- a/examples/poc/manifests/gateway/gateway.yaml
+++ b/examples/poc/manifests/gateway/gateway.yaml
@@ -3,18 +3,21 @@
 apiVersion: gateway.networking.k8s.io/v1
 kind: Gateway
 metadata:
-  name: llm-gateway
+  name: <GATEWAY-NAME>
 spec:
-  gatewayClassName: llm-gateway
+  gatewayClassName: <GATEWAY-NAME>
   listeners:
     - name: http
       protocol: HTTP
       port: 8080
+    - name: llm-gw
+      protocol: HTTP
+      port: 8081
 ---
 apiVersion: gateway.networking.k8s.io/v1
 kind: GatewayClass
 metadata:
-  name: llm-gateway
+  name: <GATEWAY-NAME>
 spec:
   controllerName: gateway.envoyproxy.io/gatewayclass-controller
 ---
\ No newline at end of file
diff --git a/examples/poc/manifests/gateway/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml
index 1ff0b74d..b45d2afa 100644
--- a/examples/poc/manifests/gateway/patch_policy.yaml
+++ b/examples/poc/manifests/gateway/patch_policy.yaml
@@ -7,7 +7,7 @@ spec:
   targetRef:
     group: gateway.networking.k8s.io
     kind: Gateway
-    name: inference-gateway
+    name: <GATEWAY-NAME>
   type: JSONPatch
   jsonPatches:
     # Necessary to create a cluster of the type: ORIGINAL_DST to allow for 
@@ -34,7 +34,7 @@ spec:
     # cluster we just made.
     - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
       # The listener name is of the form <GatewayNamespace>/<GatewayName>/<GatewayListenerName>
-      name: default/inference-gateway/http
+      name: default/<GATEWAY-NAME>/llm-gw
       operation:
         op: add
         path: "/filter_chains"

From 234a0ac773727cda7a0dafb343ec40a257d5a952 Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Mon, 14 Oct 2024 15:31:25 +0000
Subject: [PATCH 6/8] Update envoy to patch an HTTPRoute virtual host. Also
 adding the manifests to the top level Ext-Proc implementation

---
 .../gateway/enable_patch_policy.yaml          | 15 ++--
 examples/poc/manifests/gateway/ext_proc.yaml  |  4 +-
 examples/poc/manifests/gateway/gateway.yaml   | 26 ++++++-
 .../poc/manifests/gateway/patch_policy.yaml   | 48 ++-----------
 pkg/README.md                                 | 48 +++++++++++++
 pkg/manifests/enable_patch_policy.yaml        | 26 +++++++
 pkg/manifests/ext_proc.yaml                   | 69 +++++++++++++++++++
 pkg/manifests/gateway.yaml                    | 47 +++++++++++++
 pkg/manifests/patch_policy.yaml               | 38 ++++++++++
 pkg/placeholder.md                            |  0
 10 files changed, 267 insertions(+), 54 deletions(-)
 create mode 100644 pkg/README.md
 create mode 100644 pkg/manifests/enable_patch_policy.yaml
 create mode 100644 pkg/manifests/ext_proc.yaml
 create mode 100644 pkg/manifests/gateway.yaml
 create mode 100644 pkg/manifests/patch_policy.yaml
 delete mode 100644 pkg/placeholder.md

diff --git a/examples/poc/manifests/gateway/enable_patch_policy.yaml b/examples/poc/manifests/gateway/enable_patch_policy.yaml
index 2b72697b..c1d00e9a 100644
--- a/examples/poc/manifests/gateway/enable_patch_policy.yaml
+++ b/examples/poc/manifests/gateway/enable_patch_policy.yaml
@@ -16,10 +16,11 @@ data:
     gateway:
       controllerName: gateway.envoyproxy.io/gatewayclass-controller
     extensionApis:
-      enableEnvoyPatchPolicy: true
-#       admin:
-#        enablePprof: true
-#        address:
-#          host: 127.0.0.1
-#          port: 19000
-#       enabledDumpConfig: true
+      enableEnvoyPatchPolicy: true      
+      enableBackend: true
+#    admin:
+#      enablePprof: true
+#      address:
+#        host: 127.0.0.1
+#        port: 19000
+#      enabledDumpConfig: true
diff --git a/examples/poc/manifests/gateway/ext_proc.yaml b/examples/poc/manifests/gateway/ext_proc.yaml
index ff956640..6112fa9e 100644
--- a/examples/poc/manifests/gateway/ext_proc.yaml
+++ b/examples/poc/manifests/gateway/ext_proc.yaml
@@ -65,5 +65,5 @@ spec:
       messageTimeout: 5s
   targetRef:
     group: gateway.networking.k8s.io
-    kind: Gateway
-    name: llm-gateway
\ No newline at end of file
+    kind: HTTPRoute
+    name: llm-route
\ No newline at end of file
diff --git a/examples/poc/manifests/gateway/gateway.yaml b/examples/poc/manifests/gateway/gateway.yaml
index b7cdf3ff..b964f911 100644
--- a/examples/poc/manifests/gateway/gateway.yaml
+++ b/examples/poc/manifests/gateway/gateway.yaml
@@ -20,4 +20,28 @@ metadata:
   name: <GATEWAY-NAME>
 spec:
   controllerName: gateway.envoyproxy.io/gatewayclass-controller
----
\ No newline at end of file
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: Backend
+metadata:
+  name: backend-dummy
+spec:
+  endpoints:
+    - fqdn:
+        # Both these values are arbitrary and unused as the PatchPolicy redirects requests.
+        hostname: 'foo.bar.com'
+        port: 8080
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+spec:
+  parentRefs:
+    - name: inference-gateway
+      sectionName: llm-gw
+  rules:
+  - backendRefs:
+      - group: gateway.envoyproxy.io
+        kind: Backend
+        name: backend-dummy
\ No newline at end of file
diff --git a/examples/poc/manifests/gateway/patch_policy.yaml b/examples/poc/manifests/gateway/patch_policy.yaml
index b45d2afa..b7681954 100644
--- a/examples/poc/manifests/gateway/patch_policy.yaml
+++ b/examples/poc/manifests/gateway/patch_policy.yaml
@@ -30,49 +30,9 @@ spec:
           lb_policy: CLUSTER_PROVIDED
           dns_lookup_family: V4_ONLY
 
-    # The listener is required to route requests to the original destination
-    # cluster we just made.
-    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
-      # The listener name is of the form <GatewayNamespace>/<GatewayName>/<GatewayListenerName>
+    - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
       name: default/<GATEWAY-NAME>/llm-gw
       operation:
-        op: add
-        path: "/filter_chains"
-        value:
-          - filters:
-            - name: envoy.filters.network.http_connection_manager
-              typed_config:
-                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-                stat_prefix: http
-                codec_type: AUTO
-                route_config:
-                  name: local_route
-                  virtual_hosts:      
-                    - name: backend
-                      domains: ["*"]
-                      routes:
-                        - match:
-                            prefix: "/"
-                          route:  
-                            cluster: original_destination_cluster
-                            timeout: 10s
-                http_filters:
-                - name: envoy.filters.http.ext_proc
-                  typed_config:
-                    "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
-                    failure_mode_allow: false
-                    grpc_service:
-                      envoy_grpc:
-                        # This is the cluster name as created by the EnvoyExtensionPolicy
-                        # Name is of the form <CRDKind>/<GatewayNamespace>/<ExtensionPolicyName>/<IndexOfBackend>
-                        cluster_name: envoyextensionpolicy/default/ext-proc-policy/0
-                    processing_mode:
-                      request_header_mode: "SEND"
-                      response_header_mode: "SEND"
-                      request_body_mode: "BUFFERED"
-                      response_body_mode: "NONE"
-                      request_trailer_mode: "SKIP"
-                      response_trailer_mode: "SKIP"
-                - name: envoy.filters.http.router
-                  typed_config:
-                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
\ No newline at end of file
+        op: replace
+        path: "/virtual_hosts/1/routes/0/route/cluster"
+        value: original_destination_cluster
\ No newline at end of file
diff --git a/pkg/README.md b/pkg/README.md
new file mode 100644
index 00000000..a89d5489
--- /dev/null
+++ b/pkg/README.md
@@ -0,0 +1,48 @@
+## Quickstart
+
+### Steps
+
+1. **Deploy Sample vLLM Application**
+
+   A sample vLLM deployment with the proper protocol to work with LLM Instance Gateway can be found [here](https://github.com/kubernetes-sigs/llm-instance-gateway/blob/6f9869d6595d2d0f8e6febcbec0f348cb44a3012/examples/poc/manifests/samples/vllm-lora-deployment.yaml#L18).
+
+1. **Update Envoy Gateway Config to enable Patch Policy**
+
+   Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run:
+   ```bash
+   kubectl apply -f ./manifests/gateway/enable_patch_policy.yaml
+   kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
+
+   ```
+   Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again.
+
+
+1. **Deploy Gateway**
+
+   ```bash
+   kubectl apply -f ./manifests/gateway/gateway.yaml
+   ```
+
+1. **Deploy Ext-Proc**
+
+   ```bash
+   kubectl apply -f ./manifests/gateway/ext_proc.yaml
+   kubectl apply -f ./manifests/gateway/patch_policy.yaml
+   ```
+   **NOTE**: Ensure the `instance-gateway-ext-proc` deployment is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers. This won't be needed once we make ext proc dynamically read the pods.
+
+1. **Try it out**
+
+   Wait until the gateway is ready.
+
+   ```bash
+   IP=$(kubectl get gateway/llm-gateway -o jsonpath='{.status.addresses[0].value}')
+   PORT=8081
+
+   curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
+   "model": "tweet-summary",
+   "prompt": "Write as if you were a critic: San Francisco",
+   "max_tokens": 100,
+   "temperature": 0
+   }'
+   ```
\ No newline at end of file
diff --git a/pkg/manifests/enable_patch_policy.yaml b/pkg/manifests/enable_patch_policy.yaml
new file mode 100644
index 00000000..c1d00e9a
--- /dev/null
+++ b/pkg/manifests/enable_patch_policy.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: envoy-gateway-config
+  namespace: envoy-gateway-system
+data:
+# This manifest's main purpose is to set `enabledEnvoyPatchPolicy` to `true`.
+# Any field under `admin` is optional, and only for enabling the admin endpoints, for debugging.
+# Admin Interface: https://www.envoyproxy.io/docs/envoy/latest/operations/admin
+# PatchPolicy docs: https://gateway.envoyproxy.io/docs/tasks/extensibility/envoy-patch-policy/#enable-envoypatchpolicy 
+  envoy-gateway.yaml: |
+    apiVersion: gateway.envoyproxy.io/v1alpha1
+    kind: EnvoyGateway
+    provider:
+      type: Kubernetes
+    gateway:
+      controllerName: gateway.envoyproxy.io/gatewayclass-controller
+    extensionApis:
+      enableEnvoyPatchPolicy: true      
+      enableBackend: true
+#    admin:
+#      enablePprof: true
+#      address:
+#        host: 127.0.0.1
+#        port: 19000
+#      enabledDumpConfig: true
diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml
new file mode 100644
index 00000000..6112fa9e
--- /dev/null
+++ b/pkg/manifests/ext_proc.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: instance-gateway-ext-proc
+  namespace: default
+  labels:
+    app: instance-gateway-ext-proc
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: instance-gateway-ext-proc
+  template:
+    metadata:
+      labels:
+        app: instance-gateway-ext-proc
+    spec:
+      containers:
+      - name: instance-gateway-ext-proc
+        image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo
+        args:
+        #TODO: specify label selector and dynamically update pods
+        - -pods
+        - "vllm-78665f78c4-h4kx4,vllm-78665f78c4-hnz84"
+        - -podIPs
+        - "10.24.11.6:8000,10.24.5.7:8000"
+        - -enable-fairness
+        - "false"
+        ports:
+        - containerPort: 9002
+      - name: curl
+        image: curlimages/curl
+        command: ["sleep", "3600"]
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: instance-gateway-ext-proc
+  namespace: default
+spec:
+  selector:
+    app: instance-gateway-ext-proc
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+  type: ClusterIP
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyExtensionPolicy
+metadata:
+  name: ext-proc-policy
+  namespace: default
+spec:
+  extProc:
+    - backendRefs:
+      - group: ""
+        kind: Service
+        name: instance-gateway-ext-proc
+        port: 9002
+      processingMode:
+        request:
+          body: Buffered
+        response:
+      messageTimeout: 5s
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: HTTPRoute
+    name: llm-route
\ No newline at end of file
diff --git a/pkg/manifests/gateway.yaml b/pkg/manifests/gateway.yaml
new file mode 100644
index 00000000..b964f911
--- /dev/null
+++ b/pkg/manifests/gateway.yaml
@@ -0,0 +1,47 @@
+
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: <GATEWAY-NAME>
+spec:
+  gatewayClassName: <GATEWAY-NAME>
+  listeners:
+    - name: http
+      protocol: HTTP
+      port: 8080
+    - name: llm-gw
+      protocol: HTTP
+      port: 8081
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  name: <GATEWAY-NAME>
+spec:
+  controllerName: gateway.envoyproxy.io/gatewayclass-controller
+---
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: Backend
+metadata:
+  name: backend-dummy
+spec:
+  endpoints:
+    - fqdn:
+        # Both these values are arbitrary and unused as the PatchPolicy redirects requests.
+        hostname: 'foo.bar.com'
+        port: 8080
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+spec:
+  parentRefs:
+    - name: inference-gateway
+      sectionName: llm-gw
+  rules:
+  - backendRefs:
+      - group: gateway.envoyproxy.io
+        kind: Backend
+        name: backend-dummy
\ No newline at end of file
diff --git a/pkg/manifests/patch_policy.yaml b/pkg/manifests/patch_policy.yaml
new file mode 100644
index 00000000..b7681954
--- /dev/null
+++ b/pkg/manifests/patch_policy.yaml
@@ -0,0 +1,38 @@
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: EnvoyPatchPolicy
+metadata:
+  name: custom-response-patch-policy
+  namespace: default
+spec:
+  targetRef:
+    group: gateway.networking.k8s.io
+    kind: Gateway
+    name: <GATEWAY-NAME>
+  type: JSONPatch
+  jsonPatches:
+    # Necessary to create a cluster of the type: ORIGINAL_DST to allow for 
+    # direct pod scheduling. Which is heavily utilized in our scheduling.
+    # Specifically the field `original_dst_lb_config` allows us to enable
+    # `use_http_header` and `http_header_name`. 
+    # Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto
+    - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
+      name: original_destination_cluster
+      operation:
+        op: add
+        path: ""
+        value:
+          name: original_destination_cluster
+          type: ORIGINAL_DST
+          original_dst_lb_config:
+            use_http_header: true
+            http_header_name: "target-pod"
+          connect_timeout: 6s
+          lb_policy: CLUSTER_PROVIDED
+          dns_lookup_family: V4_ONLY
+
+    - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
+      name: default/<GATEWAY-NAME>/llm-gw
+      operation:
+        op: replace
+        path: "/virtual_hosts/1/routes/0/route/cluster"
+        value: original_destination_cluster
\ No newline at end of file
diff --git a/pkg/placeholder.md b/pkg/placeholder.md
deleted file mode 100644
index e69de29b..00000000

From cc9105fa7b76ecb2241af84d73839e37fabd8624 Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Mon, 14 Oct 2024 15:31:57 +0000
Subject: [PATCH 7/8] Removing image ref so the most recent image is used

---
 pkg/manifests/ext_proc.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml
index 6112fa9e..07babb8a 100644
--- a/pkg/manifests/ext_proc.yaml
+++ b/pkg/manifests/ext_proc.yaml
@@ -17,7 +17,7 @@ spec:
     spec:
       containers:
       - name: instance-gateway-ext-proc
-        image: ghcr.io/tomatillo-and-multiverse/ext-proc:demo
+        image: <BUILT-IMAGE>
         args:
         #TODO: specify label selector and dynamically update pods
         - -pods

From 32f050cd2844a61307a0ade3739acddcb52f1656 Mon Sep 17 00:00:00 2001
From: Kellen Swain <kfswain@google.com>
Date: Wed, 16 Oct 2024 20:30:47 +0000
Subject: [PATCH 8/8] Grammatical changes

---
 examples/poc/README.md | 2 +-
 pkg/README.md          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/poc/README.md b/examples/poc/README.md
index 460ba1da..739084a9 100644
--- a/examples/poc/README.md
+++ b/examples/poc/README.md
@@ -33,7 +33,7 @@ This project sets up an Envoy gateway with a custom external processing which im
    kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
 
    ```
-   Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again.
+   Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
 
 
 1. **Deploy Gateway**
diff --git a/pkg/README.md b/pkg/README.md
index a89d5489..eee9a68e 100644
--- a/pkg/README.md
+++ b/pkg/README.md
@@ -14,7 +14,7 @@
    kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
 
    ```
-   Additionally, if you would like the enable the admin interface, you can uncomment the admin lines and run this again.
+   Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
 
 
 1. **Deploy Gateway**