kubernetes-sigs · k8s-ci-robot · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025
diff --git a/config/manifests/ext_proc.yaml → config/manifests/inferencepool.yaml b/config/manifests/ext_proc.yaml → config/manifests/inferencepool.yaml
@@ -1,81 +1,53 @@
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: pod-read
-rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["discovery.k8s.io"]
-  resources: ["endpointslices"]
-  verbs: ["get", "watch", "list"]
-- apiGroups:
-  - authentication.k8s.io
-  resources:
-  - tokenreviews
-  verbs:
-  - create
-- apiGroups:
-  - authorization.k8s.io
-  resources:
-  - subjectaccessreviews
-  verbs:
-  - create
---- 
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: pod-read-binding
-subjects:
-- kind: ServiceAccount
-  name: default
-  namespace: default
-roleRef:
-  kind: ClusterRole
-  name: pod-read
----
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
   labels:
-  name: my-pool
+  name: vllm-llama2-7b
 spec:
   targetPortNumber: 8000
   selector:
-    app: my-pool
+    app: vllm-llama2-7b
   extensionRef:
-    name: inference-gateway-ext-proc
+    name: vllm-llama2-7b-epp
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama2-7b-epp
+  namespace: default
+spec:
+  selector:
+    app: vllm-llama2-7b-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+  type: ClusterIP
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: inference-gateway-ext-proc
+  name: vllm-llama2-7b-epp
   namespace: default
   labels:
-    app: inference-gateway-ext-proc
+    app: vllm-llama2-7b-epp
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: inference-gateway-ext-proc
+      app: vllm-llama2-7b-epp
   template:
     metadata:
       labels:
-        app: inference-gateway-ext-proc
+        app: vllm-llama2-7b-epp
     spec:
       containers:
-      - name: inference-gateway-ext-proc
+      - name: epp
         image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
         imagePullPolicy: Always
         args:
         - -poolName
-        - "my-pool"
+        - "vllm-llama2-7b"
         - -v
         - "4"
         - -grpcPort
@@ -103,16 +75,44 @@ spec:
           initialDelaySeconds: 5
           periodSeconds: 10
 ---
-apiVersion: v1
-kind: Service
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
 metadata:
-  name: inference-gateway-ext-proc
+  name: pod-read
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["discovery.k8s.io"]
+  resources: ["endpointslices"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+--- 
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+subjects:
+- kind: ServiceAccount
+  name: default
   namespace: default
-spec:
-  selector:
-    app: inference-gateway-ext-proc
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-  type: ClusterIP
+roleRef:
+  kind: ClusterRole
+  name: pod-read
diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml
@@ -1,16 +1,16 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: my-pool
+  name: vllm-llama2-7b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: my-pool
+      app: vllm-llama2-7b
   template:
     metadata:
       labels:
-        app: my-pool
+        app: vllm-llama2-7b
     spec:
       containers:
         - name: lora

diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml
@@ -1,16 +1,16 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: my-pool
+  name: vllm-llama2-7b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: my-pool
+      app: vllm-llama2-7b
   template:
     metadata:
       labels:
-        app: my-pool
+        app: vllm-llama2-7b
     spec:
       containers:
         - name: lora

diff --git a/site-src/guides/index.md b/site-src/guides/index.md
@@ -80,10 +80,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    NAME                CLASS               ADDRESS         PROGRAMMED   AGE
    inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
    ```
-### Deploy the Inference Extension and InferencePool
+### Deploy the InferencePool and Extension
 
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
    ```
 ### Deploy Envoy Gateway Custom Policies
 
@@ -134,4 +134,4 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
    kubectl delete secret hf-token --ignore-not-found
-   ```
+   ```
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
@@ -57,15 +57,15 @@ const (
 	// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
 	nsName = "default"
 	// modelServerName is the name of the model server test resources.
-	modelServerName = "my-pool"
+	modelServerName = "vllm-llama2-7b"
 	// modelName is the test model name.
 	modelName = "tweet-summary"
 	// envoyName is the name of the envoy proxy test resources.
 	envoyName = "envoy"
 	// envoyPort is the listener port number of the test envoy proxy.
 	envoyPort = "8081"
 	// inferExtName is the name of the inference extension test resources.
-	inferExtName = "inference-gateway-ext-proc"
+	inferExtName = "vllm-llama2-7b-epp"
 	// clientManifest is the manifest for the client test resources.
 	clientManifest = "../../testdata/client.yaml"
 	// modelServerSecretManifest is the manifest for the model server secret resource.
@@ -75,7 +75,7 @@ const (
 	// inferModelManifest is the manifest for the inference model CRD.
 	inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
 	// inferExtManifest is the manifest for the inference extension test resources.
-	inferExtManifest = "../../../config/manifests/ext_proc.yaml"
+	inferExtManifest = "../../../config/manifests/inferencepool.yaml"
 	// envoyManifest is the manifest for the envoy proxy test resources.
 	envoyManifest = "../../testdata/envoy.yaml"
 	// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.

diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml
@@ -100,7 +100,7 @@ data:
                           grpc_service:
                             envoy_grpc:
                               cluster_name: ext_proc
-                              authority: inference-gateway-ext-proc.default:9002
+                              authority: vllm-llama2-7b-epp.default:9002
                             timeout: 10s
                           processing_mode:
                             request_header_mode: SEND
@@ -194,7 +194,7 @@ data:
                   - endpoint:
                       address:
                         socket_address:
-                          address: inference-gateway-ext-proc.default
+                          address: vllm-llama2-7b-epp.default
                           port_value: 9002
                     health_status: HEALTHY
                     load_balancing_weight: 1