kubernetes-sigs
diff --git a/‎api/v1alpha1/inferencemodel_types.go
-7 b/‎api/v1alpha1/inferencemodel_types.go
-7
diff --git a/‎api/v1alpha1/inferencepool_types.go
-7 b/‎api/v1alpha1/inferencepool_types.go
-7
diff --git a/‎examples/poc/manifests/inferencepool-with-model.yaml renamed to ‎examples/poc/manifests/inferencemodel.yaml
-10 b/‎examples/poc/manifests/inferencepool-with-model.yaml renamed to ‎examples/poc/manifests/inferencemodel.yaml
-10
diff --git a/‎examples/poc/manifests/vllm/vllm-lora-deployment.yaml
+9-3 b/‎examples/poc/manifests/vllm/vllm-lora-deployment.yaml
+9-3
diff --git a/‎pkg/README.md
+10-8 b/‎pkg/README.md
+10-8
diff --git a/‎pkg/crd/install.go
-106 b/‎pkg/crd/install.go
-106
diff --git a/‎pkg/crd/install_test.go
-130 b/‎pkg/crd/install_test.go
-130
@@ -20,13 +20,6 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
-const (
-	// KindInferenceModel is the InferenceModel kind.
-	KindInferenceModel = "InferenceModel"
-	// ResourceInferenceModel is the name of the inferencemodels resource.
-	ResourceInferenceModel = "inferencemodels"
-)
-
 // InferenceModel is the Schema for the InferenceModels API.
 //
 // +kubebuilder:object:root=true
 
@@ -20,13 +20,6 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
-const (
-	// KindInferencePool is the InferencePool kind.
-	KindInferencePool = "InferencePool"
-	// ResourceInferencePool is the name of the inferencepools resource.
-	ResourceInferencePool = "inferencepools"
-)
-
 // InferencePool is the Schema for the InferencePools API.
 //
 // +kubebuilder:object:root=true
 
@@ -1,14 +1,4 @@
 apiVersion: inference.networking.x-k8s.io/v1alpha1
-kind: InferencePool
-metadata:
-  labels:
-  name: vllm-llama2-7b-pool
-spec:
-  targetPortNumber: 8000
-  selector:
-    app: vllm-llama2-7b-pool
----
-apiVersion: inference.networking.x-k8s.io/v1alpha1
 kind: InferenceModel
 metadata:
   labels:
 
@@ -1,4 +1,13 @@
 apiVersion: v1
+kind: Secret
+metadata:
+  name: hf-token
+  labels:
+    app: vllm
+stringData:
+  token: $HF_TOKEN
+---
+apiVersion: v1
 kind: Service
 metadata:
   name: vllm-llama2-7b-pool
@@ -10,14 +19,11 @@ spec:
     port: 8000
     targetPort: 8000
   type: ClusterIP
-
 ---
-
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: vllm-llama2-7b-pool
-  namespace: default
 spec:
   replicas: 3
   selector:
 
@@ -1,24 +1,26 @@
 ## Quickstart
 
 ### Requirements
-The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher.
+
+- The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher.
+- 3 GPUs are required to run the vLLM deployment. Adjust the number of replicas as needed.
 
 ### Steps
 
 1. **Deploy Sample vLLM Application**
 
-   Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model. 
-   Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
+   Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
+
+   Replace `$HF_TOKEN` in `../examples/poc/manifests/vllm/vllm-lora-deployment.yaml` with your Hugging Face secret and then deploy the sample vLLM deployment.
    ```bash
-   kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
    kubectl apply -f ../examples/poc/manifests/vllm/vllm-lora-deployment.yaml
    ```
 
-1. **Deploy InferenceModel and InferencePool**
+1. **Deploy InferenceModel**
 
-   Deploy a sample InferenceModel and InferencePool configuration based on the vLLM deployments mentioned above.
+   Deploy a sample InferenceModel configuration based on the vLLM deployments mentioned above.
    ```bash
-   kubectl apply -f ../examples/poc/manifests/inferencepool-with-model.yaml
+   kubectl apply -f ../examples/poc/manifests/inferencemodel.yaml
    ```
 
 1. **Update Envoy Gateway Config to enable Patch Policy**
@@ -36,7 +38,7 @@ The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy.
    kubectl apply -f ./manifests/gateway.yaml
    ```
 
-1. **Deploy Ext-Proc**
+1. **Deploy Ext-Proc and InferencePool**
 
    ```bash
    kubectl apply -f ./manifests/ext_proc.yaml