From 6bc07c6836892dbdf96322e5b66d6c1f57a4a93e Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 14:33:50 -0700
Subject: [PATCH 01/49] Create resources.yaml for kgateway

---
 .../manifests/gateway/kgateway/resources.yaml | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 config/manifests/gateway/kgateway/resources.yaml

diff --git a/config/manifests/gateway/kgateway/resources.yaml b/config/manifests/gateway/kgateway/resources.yaml
new file mode 100644
index 00000000..2856a6d2
--- /dev/null
+++ b/config/manifests/gateway/kgateway/resources.yaml
@@ -0,0 +1,40 @@
+# Requires Kgateway 2.0.0 or greater.
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: inference-gateway
+spec:
+  gatewayClassName: kgateway
+  listeners:
+    - name: http
+      protocol: HTTP
+      port: 8080
+    - name: llm-gw
+      protocol: HTTP
+      port: 8081
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+    sectionName: llm-gw
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: vllm-llama2-7b
+      port: 8000
+      weight: 1
+    matches:
+    - path:
+        type: PathPrefix
+        value: /
+    timeouts:
+      backendRequest: 24h
+      request: 24h

From 63d7c403de61ed3ba4c5b8d0c73a69ed7ae0b123 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 14:35:29 -0700
Subject: [PATCH 02/49] Update getting started guide for KGateway

---
 site-src/guides/index.md | 124 ++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 33 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index bcea5f9b..5637a8ee 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -3,7 +3,6 @@
 This quickstart guide is intended for engineers familiar with k8s and model servers (vLLM in this instance). The goal of this guide is to get a first, single InferencePool up and running! 
 
 ## **Prerequisites**
- - Envoy Gateway [v1.3.0](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher
  - A cluster with:
     - Support for services of type `LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running).
    For example, with Kind, you can follow [these steps](https://kind.sigs.k8s.io/docs/user/loadbalancer).
@@ -56,55 +55,114 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
    ```
-   
+
 ### Deploy InferenceModel
 
    Deploy the sample InferenceModel which is configured to load balance traffic between the `tweet-summary-0` and `tweet-summary-1`
    [LoRA adapters](https://docs.vllm.ai/en/latest/features/lora.html) of the sample model server.
+
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml
    ```
 
-### Update Envoy Gateway Config to enable Patch Policy**
+### Deploy Inference Gateway
 
-   Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run:
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/enable_patch_policy.yaml
-   kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
-   ```
-   Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
+   Choose one of the following options to deploy an Inference Gateway.
 
-### Deploy Gateway
+=== "Envoy Gateway"
 
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gateway.yaml
-   ```
-   > **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./config/manifests/gateway/ext-proc.yaml` file, and an additional `./config/manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.***
+      1. Requirements
 
-   Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
-   ```bash
-   $ kubectl get gateway inference-gateway
-   NAME                CLASS               ADDRESS         PROGRAMMED   AGE
-   inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
-   ```
-### Deploy the InferencePool and Extension
+         - Envoy Gateway [v1.3.0](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher.
 
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
-   ```
-### Deploy Envoy Gateway Custom Policies
+      1. Update Envoy Gateway Config to enable Patch Policy
 
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml
-   ```
-   > **_NOTE:_** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
-   
-### **OPTIONALLY**: Apply Traffic Policy
+         Our custom LLM Gateway ext-proc is patched into the existing Envoy Gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the
+         Envoy Gateway config map. To do this, apply the following manifest and restart Envoy Gateway:
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/enable_patch_policy.yaml
+         kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
+         ```
+
+         Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
+
+      1. Deploy GatewayClass, Gateway, Backend, and HTTPRoute resources
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gateway.yaml
+         ```
+
+         > **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./config/manifests/gateway/ext-proc.yaml` file, and an additional `./config/manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.***
+
+         Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
+         ```bash
+         $ kubectl get gateway inference-gateway
+         NAME                CLASS               ADDRESS         PROGRAMMED   AGE
+         inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
+         ```
 
-   For high-traffic benchmarking you can apply this manifest to avoid any defaults that can cause timeouts/errors.
+      1. Deploy Envoy Gateway Custom Policies
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml
+         ```
+
+         > **_NOTE:_** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
+
+      1. Apply Traffic Policy (Optional)
+
+         For high-traffic benchmarking you can apply this manifest to avoid any defaults that can cause timeouts/errors.
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/traffic_policy.yaml
+         ```
+
+=== "Kgateway"
+
+      [Kgateway](https://kgateway.dev/) v2.0.0 adds support for inference extension as a **technical preview**. This means do not
+      run Kgateway with inference extension in production environments. Refer to [Issue 10411](https://github.com/kgateway-dev/kgateway/issues/10411)
+      for the list of caveats, supported features, etc.
+
+      1. Requirements
+
+         - [Helm](https://helm.sh/docs/intro/install/) installed.
+         - Gateway API [CRDs](https://gateway-api.sigs.k8s.io/guides/#installing-gateway-api) installed.
+
+      2. Install Kgateway CRDs
+
+         ```bash
+         helm upgrade -i --create-namespace --namespace kgateway-system --version v2.0.0-main kgateway-crds https://github.com/danehans/toolbox/raw/refs/heads/main/charts/338661f3be-kgateway-crds-1.0.1-dev.tgz
+         ```
+
+      3. Install Kgateway
+
+         ```bash
+         helm upgrade --install kgateway "https://github.com/danehans/toolbox/raw/refs/heads/main/charts/338661f3be-kgateway-1.0.1-dev.tgz" \
+         -n kgateway-system \
+         --set image.registry=danehans \
+         --set image.pullPolicy=Always \
+         --set inferenceExtension.enabled="true" \
+         --version 1.0.1-dev
+         ```
+
+      4. Deploy Gateway and HTTPRoute resources
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/resources.yaml
+         ```
+
+         Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
+         ```bash
+         $ kubectl get gateway inference-gateway
+         NAME                CLASS               ADDRESS         PROGRAMMED   AGE
+         inference-gateway   kgateway            <MY_ADDRESS>    True         22s
+         ```
+
+### Deploy the InferencePool and Extension
 
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/traffic_policy.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
    ```
 
 ### Try it out

From 048189af7cf50787d1c281d002021e4519284713 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 14:58:25 -0700
Subject: [PATCH 03/49] Replace Envoy Gateway user guide with GKE user guide

---
 site-src/guides/index.md | 49 ++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 5637a8ee..f26f0038 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -69,32 +69,33 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
    Choose one of the following options to deploy an Inference Gateway.
 
-=== "Envoy Gateway"
+=== "GKE"
 
-      1. Requirements
-
-         - Envoy Gateway [v1.3.0](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher.
-
-      1. Update Envoy Gateway Config to enable Patch Policy
-
-         Our custom LLM Gateway ext-proc is patched into the existing Envoy Gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the
-         Envoy Gateway config map. To do this, apply the following manifest and restart Envoy Gateway:
+      1. Enable the Gateway API
 
          ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/enable_patch_policy.yaml
-         kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
+         gcloud container clusters update <CLUSTER_NAME> \
+             --location=<CLUSTER_LOCATION> \
+             --gateway-api=standard
          ```
 
-         Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
+      1. Create the proxy-only subnet
+         A proxy-only subnet provides a set of IP addresses that Google uses to run Envoy proxies on your behalf. 
+         ```
+         gcloud compute networks subnets create proxy-only-subnet \
+             --purpose=REGIONAL_MANAGED_PROXY \
+             --role=ACTIVE \
+             --region=<REGION> \
+             --network=<VPC_NETWORK_NAME> \
+             --range=<CIDR_RANGE>
+         ```
 
-      1. Deploy GatewayClass, Gateway, Backend, and HTTPRoute resources
+      1. Deploy Gateway, HTTPRoute and HealthCheckPolicy resources
 
          ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gateway.yaml
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/resources.yaml
          ```
 
-         > **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./config/manifests/gateway/ext-proc.yaml` file, and an additional `./config/manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.***
-
          Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
          ```bash
          $ kubectl get gateway inference-gateway
@@ -102,22 +103,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
          ```
 
-      1. Deploy Envoy Gateway Custom Policies
-
-         ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml
-         ```
-
-         > **_NOTE:_** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
-
-      1. Apply Traffic Policy (Optional)
-
-         For high-traffic benchmarking you can apply this manifest to avoid any defaults that can cause timeouts/errors.
-
-         ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/traffic_policy.yaml
-         ```
-
 === "Kgateway"
 
       [Kgateway](https://kgateway.dev/) v2.0.0 adds support for inference extension as a **technical preview**. This means do not

From a679070d895361507f7130cdb41f2ff3550f4b4d Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 15:00:02 -0700
Subject: [PATCH 04/49] Create resources.yaml for GKE Gateway

---
 config/manifests/gateway/gke/resources.yaml | 46 +++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 config/manifests/gateway/gke/resources.yaml

diff --git a/config/manifests/gateway/gke/resources.yaml b/config/manifests/gateway/gke/resources.yaml
new file mode 100644
index 00000000..b4461660
--- /dev/null
+++ b/config/manifests/gateway/gke/resources.yaml
@@ -0,0 +1,46 @@
+kind: Gateway
+apiVersion: gateway.networking.k8s.io/v1beta1
+metadata:
+ name: e2e-inference-gateway
+spec:
+ gatewayClassName: gke-l7-regional-external-managed
+ listeners:
+ - name: http
+   port: 80
+   protocol: HTTP
+   allowedRoutes:
+     kinds:
+     - kind: HTTPRoute
+     namespaces:
+       from: All
+---
+apiVersion: gateway.networking.k8s.io/v1beta1
+kind: HTTPRoute
+metadata:
+ name: mytest-inference-httproute
+spec:
+ parentRefs:
+ - name: e2e-inference-gateway
+   kind: Gateway
+ rules:
+ - backendRefs:
+   - group: inference.networking.x-k8s.io
+     name: vllm-llama2-7b
+     kind: InferencePool
+---
+kind: HealthCheckPolicy
+apiVersion: networking.gke.io/v1
+metadata:
+  name: health-check-policy
+  namespace: default
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: vllm-llama2-7b
+  default:
+    config:
+      type: HTTP
+      httpHealthCheck:
+          requestPath: /health
+          port: 8000

From a627ea735e255876dfffa6dae9c7c688000a9c87 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 15:01:08 -0700
Subject: [PATCH 05/49] Delete
 config/manifests/gateway/enable_patch_policy.yaml

---
 .../gateway/enable_patch_policy.yaml          | 27 -------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 config/manifests/gateway/enable_patch_policy.yaml

diff --git a/config/manifests/gateway/enable_patch_policy.yaml b/config/manifests/gateway/enable_patch_policy.yaml
deleted file mode 100644
index 1e9818a1..00000000
--- a/config/manifests/gateway/enable_patch_policy.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: envoy-gateway-config
-  namespace: envoy-gateway-system
-data:
-# This manifest's main purpose is to set `enabledEnvoyPatchPolicy` to `true`.
-# This only needs to be ran once on your cluster (unless you'd like to change anything. i.e. enabling the admin dash)
-# Any field under `admin` is optional, and only for enabling the admin endpoints, for debugging.
-# Admin Interface: https://www.envoyproxy.io/docs/envoy/latest/operations/admin
-# PatchPolicy docs: https://gateway.envoyproxy.io/docs/tasks/extensibility/envoy-patch-policy/#enable-envoypatchpolicy 
-  envoy-gateway.yaml: |
-    apiVersion: gateway.envoyproxy.io/v1alpha1
-    kind: EnvoyGateway
-    provider:
-      type: Kubernetes
-    gateway:
-      controllerName: gateway.envoyproxy.io/gatewayclass-controller
-    extensionApis:
-      enableEnvoyPatchPolicy: true      
-      enableBackend: true
-#    admin:
-#      enablePprof: true
-#      address:
-#        host: 127.0.0.1
-#        port: 19000
-#      enabledDumpConfig: true

From 7b490deede6474de748d67eb204f53f91ef0adc5 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 15:01:17 -0700
Subject: [PATCH 06/49] Delete config/manifests/gateway/gateway.yaml

---
 config/manifests/gateway/gateway.yaml | 50 ---------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 config/manifests/gateway/gateway.yaml

diff --git a/config/manifests/gateway/gateway.yaml b/config/manifests/gateway/gateway.yaml
deleted file mode 100644
index 32f5d484..00000000
--- a/config/manifests/gateway/gateway.yaml
+++ /dev/null
@@ -1,50 +0,0 @@
-
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: inference-gateway
-spec:
-  gatewayClassName: inference-gateway
-  listeners:
-    - name: http
-      protocol: HTTP
-      port: 8080
-    - name: llm-gw
-      protocol: HTTP
-      port: 8081
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: GatewayClass
-metadata:
-  name: inference-gateway
-spec:
-  controllerName: gateway.envoyproxy.io/gatewayclass-controller
----
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: Backend
-metadata:
-  name: backend-dummy
-spec:
-  endpoints:
-    - fqdn:
-        # Both these values are arbitrary and unused as the PatchPolicy redirects requests.
-        hostname: 'foo.bar.com'
-        port: 8080
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: llm-route
-spec:
-  parentRefs:
-    - name: inference-gateway
-      sectionName: llm-gw
-  rules:
-  - backendRefs:
-      - group: gateway.envoyproxy.io
-        kind: Backend
-        name: backend-dummy
-    timeouts:
-      request: "24h"
-      backendRequest: "24h"

From 9c8d00d204718dc391fb56cb53b811705bde083f Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 15:01:25 -0700
Subject: [PATCH 07/49] Delete config/manifests/gateway/patch_policy.yaml

---
 config/manifests/gateway/patch_policy.yaml | 123 ---------------------
 1 file changed, 123 deletions(-)
 delete mode 100644 config/manifests/gateway/patch_policy.yaml

diff --git a/config/manifests/gateway/patch_policy.yaml b/config/manifests/gateway/patch_policy.yaml
deleted file mode 100644
index a40c8e27..00000000
--- a/config/manifests/gateway/patch_policy.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: EnvoyPatchPolicy
-metadata:
-  name: custom-response-patch-policy
-  namespace: default
-spec:
-  targetRef:
-    group: gateway.networking.k8s.io
-    kind: Gateway
-    name: inference-gateway
-  type: JSONPatch
-  jsonPatches:
-    # Necessary to create a cluster of the type: ORIGINAL_DST to allow for 
-    # direct pod scheduling. Which is heavily utilized in our scheduling.
-    # Specifically the field `original_dst_lb_config` allows us to enable
-    # `use_http_header` and `http_header_name`. 
-    # Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto
-    - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
-      name: original_destination_cluster
-      operation:
-        op: add
-        path: ""
-        value:
-          name: original_destination_cluster
-          type: ORIGINAL_DST
-          original_dst_lb_config:
-            use_http_header: true
-            http_header_name: "x-gateway-destination-endpoint"
-          connect_timeout: 1000s
-          lb_policy: CLUSTER_PROVIDED
-          dns_lookup_family: V4_ONLY
-          circuit_breakers:
-            thresholds:
-            - max_connections: 40000
-              max_pending_requests: 40000
-              max_requests: 40000
-
-    # This ensures that envoy accepts untrusted certificates. We tried to explicitly
-    # set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work
-    # and what worked is setting the common_tls_context to empty.
-    - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
-      name: "envoyextensionpolicy/default/ext-proc-policy/extproc/0"
-      operation:
-        op: add
-        path: "/transport_socket"
-        value:
-          name: "envoy.transport_sockets.tls"
-          typed_config:
-            "@type": "type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext"
-            common_tls_context: {}
-    - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
-      name: default/inference-gateway/llm-gw
-      operation:
-        op: replace
-        path: "/virtual_hosts/0/routes/0/route/cluster"
-        value: original_destination_cluster
-# Comment the below to disable full duplex streaming
-# NOTE: As of https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/552
-# FULL_DUPLEX_STREAMED is the primary supported protocol for ext-proc. The buffered variant is no longer
-# being actively developed, may be missing features/fixes, and will soon be removed.
-    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
-      name: "default/inference-gateway/llm-gw"
-      operation:
-        op: add
-        path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/request_body_mode"
-        value: FULL_DUPLEX_STREAMED
-    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
-      name: "default/inference-gateway/llm-gw"
-      operation:
-        op: add
-        path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/request_trailer_mode"
-        value: SEND
-    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
-      name: "default/inference-gateway/llm-gw"
-      operation:
-        op: add
-        path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/response_body_mode"
-        value: FULL_DUPLEX_STREAMED
-    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
-      name: "default/inference-gateway/llm-gw"
-      operation:
-        op: replace
-        path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/response_trailer_mode"
-        value: SEND
-    - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
-      name: "default/inference-gateway/llm-gw"
-      operation:
-        op: replace
-        path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/response_header_mode"
-        value: SEND
----
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: EnvoyExtensionPolicy
-metadata:
-  name: ext-proc-policy
-  namespace: default
-spec:
-  extProc:
-    - backendRefs:
-      - group: ""
-        kind: Service
-        name: vllm-llama2-7b-epp
-        port: 9002
-      processingMode:
-        allowModeOverride: true
-        request:
-          body: Buffered
-        response:
-      # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
-      # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly. 
-      messageTimeout: 1000s
-      backendSettings:
-        circuitBreaker:
-          maxConnections: 40000
-          maxPendingRequests: 40000
-          maxParallelRequests: 40000
-        timeout:
-          tcp:
-            connectTimeout: 24h
-  targetRef:
-    group: gateway.networking.k8s.io
-    kind: HTTPRoute
-    name: llm-route

From 05199350bcbe5d55b5d1551852663468a2de6b7d Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 15:01:37 -0700
Subject: [PATCH 08/49] Delete config/manifests/gateway/traffic_policy.yaml

---
 config/manifests/gateway/traffic_policy.yaml | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 config/manifests/gateway/traffic_policy.yaml

diff --git a/config/manifests/gateway/traffic_policy.yaml b/config/manifests/gateway/traffic_policy.yaml
deleted file mode 100644
index e110f173..00000000
--- a/config/manifests/gateway/traffic_policy.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-apiVersion: gateway.envoyproxy.io/v1alpha1
-kind: BackendTrafficPolicy
-metadata:
-  name: high-connection-route-policy
-spec:
-  targetRefs:
-  - group: gateway.networking.k8s.io
-    kind: HTTPRoute
-    name: llm-route
-  circuitBreaker:
-    maxConnections: 40000
-    maxPendingRequests: 40000
-    maxParallelRequests: 40000 
-  timeout:
-    tcp:
-      connectTimeout: 24h
\ No newline at end of file

From 3e7e74ebb07bdc819f2c8536c5ac073af347d8a3 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 15:51:38 -0700
Subject: [PATCH 09/49] Add http2 appProtocol to EPP service

---
 config/manifests/inferencepool.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/manifests/inferencepool.yaml b/config/manifests/inferencepool.yaml
index ca2e4a88..96246cbd 100644
--- a/config/manifests/inferencepool.yaml
+++ b/config/manifests/inferencepool.yaml
@@ -22,6 +22,7 @@ spec:
     - protocol: TCP
       port: 9002
       targetPort: 9002
+      appProtocol: http2
   type: ClusterIP
 ---
 apiVersion: apps/v1

From a140a3e07a8b20869f44e3293a2c25213cfe185b Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 16:29:46 -0700
Subject: [PATCH 10/49] Add user guide for Istio

---
 site-src/guides/index.md | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index f26f0038..de7c795a 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -80,6 +80,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          ```
 
       1. Create the proxy-only subnet
+      
          A proxy-only subnet provides a set of IP addresses that Google uses to run Envoy proxies on your behalf. 
          ```
          gcloud compute networks subnets create proxy-only-subnet \
@@ -103,6 +104,32 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
          ```
 
+=== "Istio"
+
+      1. Install Istio
+      
+      Follow the Istio installation guide https://istio.io/latest/docs/setup/install/
+
+      1. Deploy Gateway and HTTPRoute
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/resources.yaml
+         ```
+
+      1. Label the gateway
+
+         ```bash
+         kubectl label gateway llm-gateway istio.io/enable-inference-extproc=true
+         ```
+
+      1. Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
+      
+         ```bash
+         $ kubectl get gateway inference-gateway
+         NAME                CLASS               ADDRESS         PROGRAMMED   AGE
+         inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
+         ```
+
 === "Kgateway"
 
       [Kgateway](https://kgateway.dev/) v2.0.0 adds support for inference extension as a **technical preview**. This means do not
@@ -114,13 +141,13 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          - [Helm](https://helm.sh/docs/intro/install/) installed.
          - Gateway API [CRDs](https://gateway-api.sigs.k8s.io/guides/#installing-gateway-api) installed.
 
-      2. Install Kgateway CRDs
+      1. Install Kgateway CRDs
 
          ```bash
          helm upgrade -i --create-namespace --namespace kgateway-system --version v2.0.0-main kgateway-crds https://github.com/danehans/toolbox/raw/refs/heads/main/charts/338661f3be-kgateway-crds-1.0.1-dev.tgz
          ```
 
-      3. Install Kgateway
+      1. Install Kgateway
 
          ```bash
          helm upgrade --install kgateway "https://github.com/danehans/toolbox/raw/refs/heads/main/charts/338661f3be-kgateway-1.0.1-dev.tgz" \
@@ -131,7 +158,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          --version 1.0.1-dev
          ```
 
-      4. Deploy Gateway and HTTPRoute resources
+      1. Deploy Gateway and HTTPRoute resources
 
          ```bash
          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/resources.yaml

From 8a878f89e920aa06dcef89360729021f0209be3f Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 16:36:48 -0700
Subject: [PATCH 11/49] Create resources.yaml for Istio

---
 config/manifests/gateway/istio/resources.yaml | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 config/manifests/gateway/istio/resources.yaml

diff --git a/config/manifests/gateway/istio/resources.yaml b/config/manifests/gateway/istio/resources.yaml
new file mode 100644
index 00000000..f943bd12
--- /dev/null
+++ b/config/manifests/gateway/istio/resources.yaml
@@ -0,0 +1,35 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: inference-gateway
+spec:
+  gatewayClassName: istio
+  listeners:
+  - allowedRoutes:
+      namespaces:
+        from: Same
+    name: http
+    port: 80
+    protocol: HTTP
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+spec:
+  hostnames:
+  - foo.example.com
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: vllm-llama2-7b
+      weight: 1
+    matches:
+    - path:
+        type: PathPrefix
+        value: /completion

From f0b59e458179e0552ba3245615947e256b2303f1 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 16:39:31 -0700
Subject: [PATCH 12/49] Fix GKE gateway name to match the user guide

---
 config/manifests/gateway/gke/resources.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/manifests/gateway/gke/resources.yaml b/config/manifests/gateway/gke/resources.yaml
index b4461660..c371b2b3 100644
--- a/config/manifests/gateway/gke/resources.yaml
+++ b/config/manifests/gateway/gke/resources.yaml
@@ -1,7 +1,7 @@
 kind: Gateway
 apiVersion: gateway.networking.k8s.io/v1beta1
 metadata:
- name: e2e-inference-gateway
+ name: inference-gateway
 spec:
  gatewayClassName: gke-l7-regional-external-managed
  listeners:
@@ -20,7 +20,7 @@ metadata:
  name: mytest-inference-httproute
 spec:
  parentRefs:
- - name: e2e-inference-gateway
+ - name: inference-gateway
    kind: Gateway
  rules:
  - backendRefs:

From c06cffd20b973e32796c6eb88be9a0b6f2ccd64e Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Tue, 25 Mar 2025 16:52:48 -0700
Subject: [PATCH 13/49] Fix cleanup instructions to refer up-to-date YAMLs

---
 site-src/guides/index.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index de7c795a..ea938ebe 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -198,12 +198,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    The following cleanup assumes you would like to clean ALL resources that were created in this quickstart guide.  
    please be careful not to delete resources you'd like to keep.
    ```bash
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/traffic_policy.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/extension_policy.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/resources.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/resources.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/resources.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gateway.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/enable_patch_policy.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml --ignore-not-found

From 21100f9e08c23680b9fe7a5dc7464b0d7f009f45 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 10:25:20 -0700
Subject: [PATCH 14/49] Allow Istio gateway to use HTTPRoute from all
 namespaces

---
 config/manifests/gateway/istio/resources.yaml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/config/manifests/gateway/istio/resources.yaml b/config/manifests/gateway/istio/resources.yaml
index f943bd12..2435f522 100644
--- a/config/manifests/gateway/istio/resources.yaml
+++ b/config/manifests/gateway/istio/resources.yaml
@@ -5,12 +5,14 @@ metadata:
 spec:
   gatewayClassName: istio
   listeners:
-  - allowedRoutes:
-      namespaces:
-        from: Same
-    name: http
+  - name: http
     port: 80
     protocol: HTTP
+    allowedRoutes:
+     kinds:
+     - kind: HTTPRoute
+     namespaces:
+       from: All
 ---
 apiVersion: gateway.networking.k8s.io/v1
 kind: HTTPRoute

From d8d4666895a23510ad54dffc20dbc76a91b41d15 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 10:31:14 -0700
Subject: [PATCH 15/49] Update Kgateway port number to 80

---
 config/manifests/gateway/kgateway/resources.yaml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/config/manifests/gateway/kgateway/resources.yaml b/config/manifests/gateway/kgateway/resources.yaml
index 2856a6d2..fc8dd1da 100644
--- a/config/manifests/gateway/kgateway/resources.yaml
+++ b/config/manifests/gateway/kgateway/resources.yaml
@@ -7,12 +7,14 @@ metadata:
 spec:
   gatewayClassName: kgateway
   listeners:
-    - name: http
-      protocol: HTTP
-      port: 8080
-    - name: llm-gw
-      protocol: HTTP
-      port: 8081
+  - name: http
+    port: 80
+    protocol: HTTP
+    allowedRoutes:
+      kinds:
+      - kind: HTTPRoute
+      namespaces:
+        from: All
 ---
 apiVersion: gateway.networking.k8s.io/v1
 kind: HTTPRoute

From ee7fa97faaa4b1691f64dd59d25658b1eaafd74a Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 10:34:09 -0700
Subject: [PATCH 16/49] Update gateway port to 80

---
 site-src/guides/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index ea938ebe..bfa851c6 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -183,7 +183,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
    ```bash
    IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}')
-   PORT=8081
+   PORT=80
 
    curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
    "model": "tweet-summary",

From 59cbe2e7ce2b4bfe6fbd7421f975d05ccd58b6d9 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 11:23:45 -0700
Subject: [PATCH 17/49] Remove the sectionName from Kgateway HTTPRoute

---
 config/manifests/gateway/kgateway/resources.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/config/manifests/gateway/kgateway/resources.yaml b/config/manifests/gateway/kgateway/resources.yaml
index fc8dd1da..b66d47ab 100644
--- a/config/manifests/gateway/kgateway/resources.yaml
+++ b/config/manifests/gateway/kgateway/resources.yaml
@@ -25,7 +25,6 @@ spec:
   - group: gateway.networking.k8s.io
     kind: Gateway
     name: inference-gateway
-    sectionName: llm-gw
   rules:
   - backendRefs:
     - group: inference.networking.x-k8s.io

From afc64dc4c1d475e823f599d2250228cd0bb0be01 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 13:55:10 -0700
Subject: [PATCH 18/49] Create common httproute YAML

---
 config/manifests/gateway/httproute.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 config/manifests/gateway/httproute.yaml

diff --git a/config/manifests/gateway/httproute.yaml b/config/manifests/gateway/httproute.yaml
new file mode 100644
index 00000000..500e26fd
--- /dev/null
+++ b/config/manifests/gateway/httproute.yaml
@@ -0,0 +1,19 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: vllm-llama2-7b
+      weight: 1
+    matches:
+    - path:
+        type: PathPrefix
+        value: /

From 8d235f6b59f961702611a861c41e3e5f79e87a7e Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 13:56:00 -0700
Subject: [PATCH 19/49] Create healthcheck.yaml for GKE gateway

---
 config/manifests/gateway/gke/healthcheck.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 config/manifests/gateway/gke/healthcheck.yaml

diff --git a/config/manifests/gateway/gke/healthcheck.yaml b/config/manifests/gateway/gke/healthcheck.yaml
new file mode 100644
index 00000000..95f4f2d2
--- /dev/null
+++ b/config/manifests/gateway/gke/healthcheck.yaml
@@ -0,0 +1,16 @@
+kind: HealthCheckPolicy
+apiVersion: networking.gke.io/v1
+metadata:
+  name: health-check-policy
+  namespace: default
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: vllm-llama2-7b
+  default:
+    config:
+      type: HTTP
+      httpHealthCheck:
+          requestPath: /health
+          port: 8000

From 52318b35ed3336a94d2d7abb006ddba08633f69d Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 13:57:13 -0700
Subject: [PATCH 20/49] Separate gateway.yaml for GKE gateway

---
 config/manifests/gateway/gke/gateway.yaml   | 15 +++++++
 config/manifests/gateway/gke/resources.yaml | 46 ---------------------
 2 files changed, 15 insertions(+), 46 deletions(-)
 create mode 100644 config/manifests/gateway/gke/gateway.yaml
 delete mode 100644 config/manifests/gateway/gke/resources.yaml

diff --git a/config/manifests/gateway/gke/gateway.yaml b/config/manifests/gateway/gke/gateway.yaml
new file mode 100644
index 00000000..3cf5645f
--- /dev/null
+++ b/config/manifests/gateway/gke/gateway.yaml
@@ -0,0 +1,15 @@
+kind: Gateway
+apiVersion: gateway.networking.k8s.io/v1beta1
+metadata:
+ name: inference-gateway
+spec:
+ gatewayClassName: gke-l7-regional-external-managed
+ listeners:
+ - name: http
+   port: 80
+   protocol: HTTP
+   allowedRoutes:
+     kinds:
+     - kind: HTTPRoute
+     namespaces:
+       from: All
diff --git a/config/manifests/gateway/gke/resources.yaml b/config/manifests/gateway/gke/resources.yaml
deleted file mode 100644
index c371b2b3..00000000
--- a/config/manifests/gateway/gke/resources.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-kind: Gateway
-apiVersion: gateway.networking.k8s.io/v1beta1
-metadata:
- name: inference-gateway
-spec:
- gatewayClassName: gke-l7-regional-external-managed
- listeners:
- - name: http
-   port: 80
-   protocol: HTTP
-   allowedRoutes:
-     kinds:
-     - kind: HTTPRoute
-     namespaces:
-       from: All
----
-apiVersion: gateway.networking.k8s.io/v1beta1
-kind: HTTPRoute
-metadata:
- name: mytest-inference-httproute
-spec:
- parentRefs:
- - name: inference-gateway
-   kind: Gateway
- rules:
- - backendRefs:
-   - group: inference.networking.x-k8s.io
-     name: vllm-llama2-7b
-     kind: InferencePool
----
-kind: HealthCheckPolicy
-apiVersion: networking.gke.io/v1
-metadata:
-  name: health-check-policy
-  namespace: default
-spec:
-  targetRef:
-    group: "inference.networking.x-k8s.io"
-    kind: InferencePool
-    name: vllm-llama2-7b
-  default:
-    config:
-      type: HTTP
-      httpHealthCheck:
-          requestPath: /health
-          port: 8000

From 9343660f330023a2e71cee0c193693d89cf9e7cf Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 13:58:01 -0700
Subject: [PATCH 21/49] Separate gateway.yaml for Istio

---
 config/manifests/gateway/istio/gateway.yaml   | 15 ++++++++
 config/manifests/gateway/istio/resources.yaml | 37 -------------------
 2 files changed, 15 insertions(+), 37 deletions(-)
 create mode 100644 config/manifests/gateway/istio/gateway.yaml
 delete mode 100644 config/manifests/gateway/istio/resources.yaml

diff --git a/config/manifests/gateway/istio/gateway.yaml b/config/manifests/gateway/istio/gateway.yaml
new file mode 100644
index 00000000..5376b6b0
--- /dev/null
+++ b/config/manifests/gateway/istio/gateway.yaml
@@ -0,0 +1,15 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: inference-gateway
+spec:
+  gatewayClassName: istio
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+    allowedRoutes:
+     kinds:
+     - kind: HTTPRoute
+     namespaces:
+       from: All
diff --git a/config/manifests/gateway/istio/resources.yaml b/config/manifests/gateway/istio/resources.yaml
deleted file mode 100644
index 2435f522..00000000
--- a/config/manifests/gateway/istio/resources.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: inference-gateway
-spec:
-  gatewayClassName: istio
-  listeners:
-  - name: http
-    port: 80
-    protocol: HTTP
-    allowedRoutes:
-     kinds:
-     - kind: HTTPRoute
-     namespaces:
-       from: All
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: llm-route
-spec:
-  hostnames:
-  - foo.example.com
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: inference-gateway
-  rules:
-  - backendRefs:
-    - group: inference.networking.x-k8s.io
-      kind: InferencePool
-      name: vllm-llama2-7b
-      weight: 1
-    matches:
-    - path:
-        type: PathPrefix
-        value: /completion

From 8ef12a843f038717bf2361fb943e0ff75b4688af Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 13:58:45 -0700
Subject: [PATCH 22/49] Separate gateway.yaml for Kgateway

---
 .../manifests/gateway/kgateway/gateway.yaml   | 17 ++++++++
 .../manifests/gateway/kgateway/resources.yaml | 41 -------------------
 2 files changed, 17 insertions(+), 41 deletions(-)
 create mode 100644 config/manifests/gateway/kgateway/gateway.yaml
 delete mode 100644 config/manifests/gateway/kgateway/resources.yaml

diff --git a/config/manifests/gateway/kgateway/gateway.yaml b/config/manifests/gateway/kgateway/gateway.yaml
new file mode 100644
index 00000000..dccd2889
--- /dev/null
+++ b/config/manifests/gateway/kgateway/gateway.yaml
@@ -0,0 +1,17 @@
+# Requires Kgateway 2.0.0 or greater.
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: inference-gateway
+spec:
+  gatewayClassName: kgateway
+  listeners:
+  - name: http
+    port: 80
+    protocol: HTTP
+    allowedRoutes:
+      kinds:
+      - kind: HTTPRoute
+      namespaces:
+        from: All
diff --git a/config/manifests/gateway/kgateway/resources.yaml b/config/manifests/gateway/kgateway/resources.yaml
deleted file mode 100644
index b66d47ab..00000000
--- a/config/manifests/gateway/kgateway/resources.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-# Requires Kgateway 2.0.0 or greater.
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: inference-gateway
-spec:
-  gatewayClassName: kgateway
-  listeners:
-  - name: http
-    port: 80
-    protocol: HTTP
-    allowedRoutes:
-      kinds:
-      - kind: HTTPRoute
-      namespaces:
-        from: All
----
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: llm-route
-spec:
-  parentRefs:
-  - group: gateway.networking.k8s.io
-    kind: Gateway
-    name: inference-gateway
-  rules:
-  - backendRefs:
-    - group: inference.networking.x-k8s.io
-      kind: InferencePool
-      name: vllm-llama2-7b
-      port: 8000
-      weight: 1
-    matches:
-    - path:
-        type: PathPrefix
-        value: /
-    timeouts:
-      backendRequest: 24h
-      request: 24h

From 557c44f47512ebdb4ecc71f8e8ea09ec49341c8d Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 14:09:19 -0700
Subject: [PATCH 23/49] Update the user guide to use shared HTTPRoute YAML

---
 site-src/guides/index.md | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index bfa851c6..97f58a8a 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -91,10 +91,11 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
              --range=<CIDR_RANGE>
          ```
 
-      1. Deploy Gateway, HTTPRoute and HealthCheckPolicy resources
+      1. Deploy Gateway and HealthCheckPolicy resources
 
          ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/resources.yaml
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml
          ```
 
          Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
@@ -106,14 +107,17 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
 === "Istio"
 
+      Please note that this feature is currently in an experimental phase and is not intended for production use. 
+      The implementation and user experience are subject to changes as we continue to iterate on this project.
+
       1. Install Istio
       
-      Follow the Istio installation guide https://istio.io/latest/docs/setup/install/
+         Please follow the [Istio installation guide](https://istio.io/latest/docs/setup/install/).
 
-      1. Deploy Gateway and HTTPRoute
+      1. Deploy Gateway
 
          ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/resources.yaml
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml
          ```
 
       1. Label the gateway
@@ -158,10 +162,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          --version 1.0.1-dev
          ```
 
-      1. Deploy Gateway and HTTPRoute resources
+      1. Deploy Gateway
 
          ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/resources.yaml
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml
          ```
 
          Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
@@ -177,6 +181,12 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
    ```
 
+### Deploy the HTTPRoute
+
+   ```bash
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml
+   ```
+
 ### Try it out
 
    Wait until the gateway is ready.
@@ -198,9 +208,11 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    The following cleanup assumes you would like to clean ALL resources that were created in this quickstart guide.  
    please be careful not to delete resources you'd like to keep.
    ```bash
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/resources.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/resources.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/resources.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml --ignore-not-found

From 6d48b5bbef37f77ed43ec4b8dd0e918b1d0199ad Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:18:26 -0700
Subject: [PATCH 24/49] Add EPP DestinationRule for Istio

---
 config/manifests/gateway/istio/destination-rule.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 config/manifests/gateway/istio/destination-rule.yaml

diff --git a/config/manifests/gateway/istio/destination-rule.yaml b/config/manifests/gateway/istio/destination-rule.yaml
new file mode 100644
index 00000000..a295273c
--- /dev/null
+++ b/config/manifests/gateway/istio/destination-rule.yaml
@@ -0,0 +1,10 @@
+apiVersion: networking.istio.io/v1
+kind: DestinationRule
+metadata:
+  name: epp-insecure-tls
+spec:
+  host: vllm-llama2-7b-epp.default.svc.cluster.local
+  trafficPolicy:
+      tls:
+        mode: SIMPLE
+        insecureSkipVerify: true

From e512145fb3d663851ecb3457e46048c79a228329 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:30:16 -0700
Subject: [PATCH 25/49] Add instructions for bypassing TLS verification for
 Istio

---
 site-src/guides/index.md | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 97f58a8a..110235fb 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -65,6 +65,12 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml
    ```
 
+### Deploy the InferencePool and Extension
+
+   ```bash
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
+   ```
+
 ### Deploy Inference Gateway
 
    Choose one of the following options to deploy an Inference Gateway.
@@ -114,6 +120,14 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
       
          Please follow the [Istio installation guide](https://istio.io/latest/docs/setup/install/).
 
+      1. If you run the Endpoint Picker (EPP) with TLS (with `--secureServing=true`), it is currently using a self-signed certificate 
+      and the gateway cannot successfully validate the CA signature and the SAN. Apply the destination rule to bypass verification as 
+      a temporary workaround. A better TLS implementation is being discussed in https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582.
+
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
+         ```
+
       1. Deploy Gateway
 
          ```bash
@@ -175,12 +189,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          inference-gateway   kgateway            <MY_ADDRESS>    True         22s
          ```
 
-### Deploy the InferencePool and Extension
-
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
-   ```
-
 ### Deploy the HTTPRoute
 
    ```bash
@@ -211,6 +219,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found

From e82e074883ddad31ad6c7ac61319aecef0d41bb0 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:32:59 -0700
Subject: [PATCH 26/49] Update CRDs to the latest v0.2.0 release

Co-authored-by: Rob Scott <rob.scott87@gmail.com>
---
 site-src/guides/index.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 110235fb..e8d528dd 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -52,8 +52,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 ### Install the Inference Extension CRDs
 
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.2.0/manifests.yaml
    ```
 
 ### Deploy InferenceModel

From ff8b2a17b951b0b66a5f3ea7e99e8ebac72e1ecb Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:33:53 -0700
Subject: [PATCH 27/49] Update gateway to use the v1 API

Co-authored-by: Rob Scott <rob.scott87@gmail.com>
---
 config/manifests/gateway/gke/gateway.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/manifests/gateway/gke/gateway.yaml b/config/manifests/gateway/gke/gateway.yaml
index 3cf5645f..b231e207 100644
--- a/config/manifests/gateway/gke/gateway.yaml
+++ b/config/manifests/gateway/gke/gateway.yaml
@@ -1,5 +1,5 @@
 kind: Gateway
-apiVersion: gateway.networking.k8s.io/v1beta1
+apiVersion: gateway.networking.k8s.io/v1
 metadata:
  name: inference-gateway
 spec:

From f6f9538cd108648cb0e7742c4443c7eaffa17395 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:34:25 -0700
Subject: [PATCH 28/49] Remove weight from HTTPRoute

Co-authored-by: Rob Scott <rob.scott87@gmail.com>
---
 config/manifests/gateway/httproute.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/config/manifests/gateway/httproute.yaml b/config/manifests/gateway/httproute.yaml
index 500e26fd..5bd8bfb6 100644
--- a/config/manifests/gateway/httproute.yaml
+++ b/config/manifests/gateway/httproute.yaml
@@ -12,7 +12,6 @@ spec:
     - group: inference.networking.x-k8s.io
       kind: InferencePool
       name: vllm-llama2-7b
-      weight: 1
     matches:
     - path:
         type: PathPrefix

From efb8c354437d66a577c8385b0c956f6689f09472 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:36:15 -0700
Subject: [PATCH 29/49] Update gateway.yaml

Remove allowed routes from GKE gateway YAML
---
 config/manifests/gateway/gke/gateway.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/config/manifests/gateway/gke/gateway.yaml b/config/manifests/gateway/gke/gateway.yaml
index b231e207..942cde5c 100644
--- a/config/manifests/gateway/gke/gateway.yaml
+++ b/config/manifests/gateway/gke/gateway.yaml
@@ -8,8 +8,3 @@ spec:
  - name: http
    port: 80
    protocol: HTTP
-   allowedRoutes:
-     kinds:
-     - kind: HTTPRoute
-     namespaces:
-       from: All

From 5a2677efc737fc6fd4fa2481d8b9bc92fd78279f Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:39:13 -0700
Subject: [PATCH 30/49] Remove allowedRoutes from Istio gateway

---
 config/manifests/gateway/istio/gateway.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/config/manifests/gateway/istio/gateway.yaml b/config/manifests/gateway/istio/gateway.yaml
index 5376b6b0..dd762678 100644
--- a/config/manifests/gateway/istio/gateway.yaml
+++ b/config/manifests/gateway/istio/gateway.yaml
@@ -8,8 +8,3 @@ spec:
   - name: http
     port: 80
     protocol: HTTP
-    allowedRoutes:
-     kinds:
-     - kind: HTTPRoute
-     namespaces:
-       from: All

From ce19438ac5471591bb79a26d92c6ff9036052c38 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Wed, 26 Mar 2025 17:39:49 -0700
Subject: [PATCH 31/49] Remove allowedRoutes from Kgateway

---
 config/manifests/gateway/kgateway/gateway.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/config/manifests/gateway/kgateway/gateway.yaml b/config/manifests/gateway/kgateway/gateway.yaml
index dccd2889..fb146b75 100644
--- a/config/manifests/gateway/kgateway/gateway.yaml
+++ b/config/manifests/gateway/kgateway/gateway.yaml
@@ -10,8 +10,3 @@ spec:
   - name: http
     port: 80
     protocol: HTTP
-    allowedRoutes:
-      kinds:
-      - kind: HTTPRoute
-      namespaces:
-        from: All

From d4932587c4a6d5cdf3761d098fc90269332030fd Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Thu, 27 Mar 2025 10:32:19 -0700
Subject: [PATCH 32/49] Update latest instructions for installing Istio and
 addressing some comments

---
 site-src/guides/index.md | 75 +++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index e8d528dd..31ad86e2 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -51,8 +51,17 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
 ### Install the Inference Extension CRDs
 
+=== "Latest Release"
+
+   ```bash
+   VERSION=v0.2.0
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$VERSION/manifests.yaml
+   ```
+
+=== "Dev Version"
+
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.2.0/manifests.yaml
+   kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
    ```
 
 ### Deploy InferenceModel
@@ -76,25 +85,8 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
 === "GKE"
 
-      1. Enable the Gateway API
-
-         ```bash
-         gcloud container clusters update <CLUSTER_NAME> \
-             --location=<CLUSTER_LOCATION> \
-             --gateway-api=standard
-         ```
-
-      1. Create the proxy-only subnet
-      
-         A proxy-only subnet provides a set of IP addresses that Google uses to run Envoy proxies on your behalf. 
-         ```
-         gcloud compute networks subnets create proxy-only-subnet \
-             --purpose=REGIONAL_MANAGED_PROXY \
-             --role=ACTIVE \
-             --region=<REGION> \
-             --network=<VPC_NETWORK_NAME> \
-             --range=<CIDR_RANGE>
-         ```
+      1. Enable the Gateway API and configure proxy-only subnets when necessary. See [Deploy Gateways](https://cloud.google.com/kubernetes-engine/docs/how-to/deploying-gateways) 
+      for detailed instructions.
 
       1. Deploy Gateway and HealthCheckPolicy resources
 
@@ -117,11 +109,24 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
       1. Install Istio
       
-         Please follow the [Istio installation guide](https://istio.io/latest/docs/setup/install/).
+         ```
+         TAG=1.26-alpha.80c74f7f43482c226f4f4b10b4dda6261b67a71f
+         # on Linux
+         wget https://storage.googleapis.com/istio-build/dev/$TAG/istioctl-$TAG-linux-amd64.tar.gz
+         tar -xvf istioctl-$TAG-linux-amd64.tar.gz
+         # on macOS
+         wget https://storage.googleapis.com/istio-build/dev/$TAG/istioctl-$TAG-osx.tar.gz
+         tar -xvf istioctl-$TAG-osx.tar.gz
+         # on Windows
+         wget https://storage.googleapis.com/istio-build/dev/$TAG/istioctl-$TAG-win.zip
+         unzip istioctl-$TAG-win.zip
+
+         ./istioctl install --set tag=$TAG --set hub=gcr.io/istio-testing
+         ```
 
       1. If you run the Endpoint Picker (EPP) with TLS (with `--secureServing=true`), it is currently using a self-signed certificate 
       and the gateway cannot successfully validate the CA signature and the SAN. Apply the destination rule to bypass verification as 
-      a temporary workaround. A better TLS implementation is being discussed in https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582.
+      a temporary workaround. A better TLS implementation is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
 
          ```bash
          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
@@ -213,7 +218,20 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 ### Cleanup
 
    The following cleanup assumes you would like to clean ALL resources that were created in this quickstart guide.  
-   please be careful not to delete resources you'd like to keep.
+   Please be careful not to delete resources you'd like to keep.
+
+   1. Uninstall the Inference Pool
+
+   ```bash
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
+   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
+   kubectl delete secret hf-token --ignore-not-found
+   ```
+
+   1. Uninstall the Gateway
+
    ```bash
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml --ignore-not-found
@@ -221,11 +239,12 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
+   ```
+
+   1. Uninstall the CRDs
+
+   ```bash
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
-   kubectl delete secret hf-token --ignore-not-found
+   kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found
    ```

From 9cb25759f1c6c6db6a8608e14b399baf6b616d2b Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Thu, 27 Mar 2025 10:41:58 -0700
Subject: [PATCH 33/49] Fix indentation for installing CRDs

---
 site-src/guides/index.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 31ad86e2..b7821674 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -53,16 +53,16 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
 === "Latest Release"
 
-   ```bash
-   VERSION=v0.2.0
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$VERSION/manifests.yaml
-   ```
+      ```bash
+      VERSION=v0.2.0
+      kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$VERSION/manifests.yaml
+      ```
 
 === "Dev Version"
 
-   ```bash
-   kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
-   ```
+      ```bash
+      kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
+      ```
 
 ### Deploy InferenceModel
 

From 35a835fa6431f061360179e8fba0b09be52ac0a4 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 10:39:41 -0700
Subject: [PATCH 34/49] Addressing code review comments

---
 site-src/guides/index.md | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index e2c54250..d3b462a7 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -1,6 +1,10 @@
 # Getting started with Gateway API Inference Extension
 
-This quickstart guide is intended for engineers familiar with k8s and model servers (vLLM in this instance). The goal of this guide is to get a first, single InferencePool up and running! 
+??? example "Experimental"
+
+    This project is still in an alpha state and breaking changes may occur in the future.
+
+This quickstart guide is intended for engineers familiar with k8s and model servers (vLLM in this instance). The goal of this guide is to get an Inference Gateway up and running! 
 
 ## **Prerequisites**
  - A cluster with:
@@ -124,15 +128,15 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          ./istioctl install --set tag=$TAG --set hub=gcr.io/istio-testing
          ```
 
-      1. If you run the Endpoint Picker (EPP) with TLS (with `--secureServing=true`), it is currently using a self-signed certificate 
-      and the gateway cannot successfully validate the CA signature and the SAN. Apply the destination rule to bypass verification as 
-      a temporary workaround. A better TLS implementation is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
+      1. Deploy Gateway
 
-         ```bash
-         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
-         ```
+         ??? note
 
-      1. Deploy Gateway
+             If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true`, it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
+
+             ```bash
+             kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
+             ```
 
          ```bash
          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml
@@ -166,18 +170,14 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
       1. Install Kgateway CRDs
 
          ```bash
-         helm upgrade -i --create-namespace --namespace kgateway-system --version v2.0.0-main kgateway-crds https://github.com/danehans/toolbox/raw/refs/heads/main/charts/338661f3be-kgateway-crds-1.0.1-dev.tgz
+         helm upgrade -i --create-namespace --namespace kgateway-system --version $VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
          ```
 
       1. Install Kgateway
 
          ```bash
-         helm upgrade --install kgateway "https://github.com/danehans/toolbox/raw/refs/heads/main/charts/338661f3be-kgateway-1.0.1-dev.tgz" \
-         -n kgateway-system \
-         --set image.registry=danehans \
-         --set image.pullPolicy=Always \
-         --set inferenceExtension.enabled="true" \
-         --version 1.0.1-dev
+         helm upgrade -i --namespace kgateway-system --version $VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway
+--set inferenceExtension.enabled=true
          ```
 
       1. Deploy Gateway
@@ -244,7 +244,5 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    1. Uninstall the CRDs
 
    ```bash
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml --ignore-not-found
    kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found
    ```

From 0a24389fc42b4b0a5a1248541050eb7582d849f4 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 10:46:30 -0700
Subject: [PATCH 35/49] Fix indentation

---
 site-src/guides/index.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index d3b462a7..2e1d3ee9 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -128,15 +128,15 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          ./istioctl install --set tag=$TAG --set hub=gcr.io/istio-testing
          ```
 
-      1. Deploy Gateway
+      ??? note
 
-         ??? note
+      If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true`, it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
 
-             If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true`, it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
+      ```bash
+      kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
+      ```
 
-             ```bash
-             kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
-             ```
+      1. Deploy Gateway
 
          ```bash
          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml
@@ -177,7 +177,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
          ```bash
          helm upgrade -i --namespace kgateway-system --version $VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway
---set inferenceExtension.enabled=true
+         --set inferenceExtension.enabled=true
          ```
 
       1. Deploy Gateway

From c1b563b6e5caa984f131280fdd3cf8e7e7f24421 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 10:54:18 -0700
Subject: [PATCH 36/49] Update Istio installation instructions

---
 site-src/guides/index.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 2e1d3ee9..44ed7953 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -111,6 +111,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
       Please note that this feature is currently in an experimental phase and is not intended for production use. 
       The implementation and user experience are subject to changes as we continue to iterate on this project.
 
+      1.  Requirements
+
+         - Gateway API [CRDs](https://gateway-api.sigs.k8s.io/guides/#installing-gateway-api) installed.
+
       1. Install Istio
       
          ```
@@ -128,13 +132,11 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          ./istioctl install --set tag=$TAG --set hub=gcr.io/istio-testing
          ```
 
-      ??? note
-
-      If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true`, it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
+      1. If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true`, it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
 
-      ```bash
-      kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
-      ```
+         ```bash
+         kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml
+         ```
 
       1. Deploy Gateway
 

From 6d3642a40c4c78504ee3f452610696df1cf4e7c9 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 10:58:42 -0700
Subject: [PATCH 37/49] Fix indentation

---
 site-src/guides/index.md | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 44ed7953..4ce33229 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -150,8 +150,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          kubectl label gateway llm-gateway istio.io/enable-inference-extproc=true
          ```
 
-      1. Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
-      
+         Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
          ```bash
          $ kubectl get gateway inference-gateway
          NAME                CLASS               ADDRESS         PROGRAMMED   AGE
@@ -224,27 +223,27 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
    1. Uninstall the Inference Pool
 
-   ```bash
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
-   kubectl delete secret hf-token --ignore-not-found
+      ```bash
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
+      kubectl delete secret hf-token --ignore-not-found
    ```
 
    1. Uninstall the Gateway
 
-   ```bash
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml --ignore-not-found
-   kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml --ignore-not-found
-   ```
+      ```bash
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml --ignore-not-found
+      ```
 
    1. Uninstall the CRDs
 
-   ```bash
-   kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found
-   ```
+      ```bash
+      kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found
+      ```

From 6a9f91a03209f40551b13fe65448959fb2a1dd02 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 11:00:05 -0700
Subject: [PATCH 38/49] Fix indentation

---
 site-src/guides/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 4ce33229..34e9cec6 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -229,7 +229,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
       kubectl delete secret hf-token --ignore-not-found
-   ```
+      ```
 
    1. Uninstall the Gateway
 

From b6d4c7a8b1082b858b3648a80c759aaa1a1a0a8e Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 11:07:16 -0700
Subject: [PATCH 39/49] Add more spacing to the CPU based model instructions

---
 site-src/guides/index.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 34e9cec6..57e5a231 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -42,11 +42,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
       This setup is using the formal `vllm-cpu` image, which according to the documentation can run vLLM on x86 CPU platform.
       For this setup, we use approximately 9.5GB of memory and 12 CPUs for each replica.  
-      While it is possible to deploy the model server with less resources, this is not recommended.  
-      For example, in our tests, loading the model using 8GB of memory and 1 CPU was possible but took almost 3.5 minutes and inference requests took unreasonable time.  
-      In general, there is a tradeoff between the memory and CPU we allocate to our pods and the performance. The more memory and CPU we allocate the better performance we can get.  
-      After running multiple configurations of these values we decided in this sample to use 9.5GB of memory and 12 CPUs for each replica, which gives reasonable response times. You can increase those numbers and potentially may even get better response times.
-      For modifying the allocated resources, adjust the numbers in `./config/manifests/vllm/cpu-deployment.yaml` as needed.  
+      
+      While it is possible to deploy the model server with less resources, this is not recommended. For example, in our tests, loading the model using 8GB of memory and 1 CPU was possible but took almost 3.5 minutes and inference requests took unreasonable time. In general, there is a tradeoff between the memory and CPU we allocate to our pods and the performance. The more memory and CPU we allocate the better performance we can get.
+      
+      After running multiple configurations of these values we decided in this sample to use 9.5GB of memory and 12 CPUs for each replica, which gives reasonable response times. You can increase those numbers and potentially may even get better response times. For modifying the allocated resources, adjust the numbers in [cpu-deployment.yaml](https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml) as needed.  
 
       Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
       ```bash

From e9f2298315ef5e208b7f7da88a482d1a497ffc75 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 11:25:06 -0700
Subject: [PATCH 40/49] Removing comments from kgateway

---
 config/manifests/gateway/kgateway/gateway.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/config/manifests/gateway/kgateway/gateway.yaml b/config/manifests/gateway/kgateway/gateway.yaml
index fb146b75..7bcd08a6 100644
--- a/config/manifests/gateway/kgateway/gateway.yaml
+++ b/config/manifests/gateway/kgateway/gateway.yaml
@@ -1,5 +1,3 @@
-# Requires Kgateway 2.0.0 or greater.
----
 apiVersion: gateway.networking.k8s.io/v1
 kind: Gateway
 metadata:

From 484f19f22a8d70f1aa928617314b6a308b3dadf6 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 14:12:44 -0700
Subject: [PATCH 41/49] Add clarification on the EPP secureServing default
 value.

Co-authored-by: Rob Scott <rob.scott87@gmail.com>
---
 site-src/guides/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 57e5a231..cf4b8dad 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -131,7 +131,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          ./istioctl install --set tag=$TAG --set hub=gcr.io/istio-testing
          ```
 
-      1. If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true`, it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
+      1. If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true` (the default mode), it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
 
          ```bash
          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml

From d71f29cb524720ff81e679a538313eae4a033610 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 14:28:31 -0700
Subject: [PATCH 42/49] Add instructions for configuring timeout

---
 site-src/guides/index.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index cf4b8dad..b671461a 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -199,6 +199,28 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute.yaml
    ```
 
+### Configure Timeouts
+
+   Given that default timeouts for above implementations may be insufficient for most inference workloads, it is recommended to configure a timeout appropriate for your intended use case.
+
+=== "GKE"
+
+      ```bash
+      kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gcp-backend-policy.yaml
+      ```
+
+=== "Istio"
+
+      ```bash
+      kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute-with-timeout.yaml
+      ```
+
+=== "Kgateway"
+
+      ```bash
+      kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/httproute-with-timeout.yaml
+      ```
+
 ### Try it out
 
    Wait until the gateway is ready.

From 41fc08323fe0287e8fb85305a4ce84c1ccaacac2 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 14:32:55 -0700
Subject: [PATCH 43/49] Create httproute-with-timeout.yaml

---
 .../gateway/httproute-with-timeout.yaml       | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 config/manifests/gateway/httproute-with-timeout.yaml

diff --git a/config/manifests/gateway/httproute-with-timeout.yaml b/config/manifests/gateway/httproute-with-timeout.yaml
new file mode 100644
index 00000000..060f18c5
--- /dev/null
+++ b/config/manifests/gateway/httproute-with-timeout.yaml
@@ -0,0 +1,20 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: vllm-llama2-7b
+    matches:
+    - path:
+        type: PathPrefix
+        value: /
+    timeouts:
+      request: 300s

From d5fd70fd45fd5a1070f9bf1d71ec755a3d495279 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 14:39:51 -0700
Subject: [PATCH 44/49] Create gcp-backend-policy.yaml

---
 config/manifests/gateway/gke/gcp-backend-policy.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 config/manifests/gateway/gke/gcp-backend-policy.yaml

diff --git a/config/manifests/gateway/gke/gcp-backend-policy.yaml b/config/manifests/gateway/gke/gcp-backend-policy.yaml
new file mode 100644
index 00000000..519a5a93
--- /dev/null
+++ b/config/manifests/gateway/gke/gcp-backend-policy.yaml
@@ -0,0 +1,11 @@
+apiVersion: networking.gke.io/v1
+kind: GCPBackendPolicy
+metadata:
+  name: inferencepool-backend-policy
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: vllm-llama3-8b-instruct
+  default:
+    timeoutSec: 300

From d0ddd165945f2d183c35a523716e050b59191839 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 14:45:25 -0700
Subject: [PATCH 45/49] Add cleanup for GCPBackendPolicy

---
 site-src/guides/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index b671461a..e147f554 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -257,6 +257,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
       ```bash
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/healthcheck.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gcp-backend-policy.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/gateway.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/gateway.yaml --ignore-not-found

From e1c0b1d53814a9cf0a2bd9d2d7313af58f4bf07a Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 15:07:27 -0700
Subject: [PATCH 46/49] Remove namespace from destination-rule.yaml

---
 config/manifests/gateway/istio/destination-rule.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/manifests/gateway/istio/destination-rule.yaml b/config/manifests/gateway/istio/destination-rule.yaml
index a295273c..f9cd0c3c 100644
--- a/config/manifests/gateway/istio/destination-rule.yaml
+++ b/config/manifests/gateway/istio/destination-rule.yaml
@@ -3,7 +3,7 @@ kind: DestinationRule
 metadata:
   name: epp-insecure-tls
 spec:
-  host: vllm-llama2-7b-epp.default.svc.cluster.local
+  host: vllm-llama2-7b-epp
   trafficPolicy:
       tls:
         mode: SIMPLE

From e4471ec610161b9086d2398bb130e15e5566a7e4 Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 15:11:51 -0700
Subject: [PATCH 47/49] Rename inferencepool.yaml to
 inferencepool-resources.yaml

---
 .../{inferencepool.yaml => inferencepool-resources.yaml}          | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename config/manifests/{inferencepool.yaml => inferencepool-resources.yaml} (100%)

diff --git a/config/manifests/inferencepool.yaml b/config/manifests/inferencepool-resources.yaml
similarity index 100%
rename from config/manifests/inferencepool.yaml
rename to config/manifests/inferencepool-resources.yaml

From 365d847ccc64331bb09a21a8f026e71da83f978b Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 15:15:22 -0700
Subject: [PATCH 48/49] Rename inferencepool.yaml to
 inferencepool-resources.yaml

---
 test/e2e/epp/e2e_suite_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
index f9dea1cc..643bbf75 100644
--- a/test/e2e/epp/e2e_suite_test.go
+++ b/test/e2e/epp/e2e_suite_test.go
@@ -75,7 +75,7 @@ const (
 	// inferModelManifest is the manifest for the inference model CRD.
 	inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
 	// inferExtManifest is the manifest for the inference extension test resources.
-	inferExtManifest = "../../../config/manifests/inferencepool.yaml"
+	inferExtManifest = "../../../config/manifests/inferencepool-resources.yaml"
 	// envoyManifest is the manifest for the envoy proxy test resources.
 	envoyManifest = "../../testdata/envoy.yaml"
 	// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.

From c82487d6e599e525e69a0ba13b237199e91c266f Mon Sep 17 00:00:00 2001
From: Nicole Xin <nxin@google.com>
Date: Fri, 28 Mar 2025 15:16:27 -0700
Subject: [PATCH 49/49] Rename inferencepool.yaml to
 inferencepool-resources.yaml

---
 site-src/guides/index.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index e147f554..4548d5cd 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -79,7 +79,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 ### Deploy the InferencePool and Extension
 
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool-resources.yaml
    ```
 
 ### Deploy Inference Gateway
@@ -245,7 +245,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    1. Uninstall the Inference Pool
 
       ```bash
-      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml --ignore-not-found
+      kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool-resources.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
       kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found