diff --git a/mkdocs.yml b/mkdocs.yml
index c9bc30e0..a024c16d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -56,6 +56,7 @@ nav:
   - Guides:
     - User Guides:
       - Getting started: guides/index.md
+      - Adapter Rollout: guides/adapter-rollout.md
     - Implementer's Guide: guides/implementers.md
   - Reference:
     - API Reference: reference/spec.md
diff --git a/pkg/manifests/inferencemodel.yaml b/pkg/manifests/inferencemodel.yaml
index 0085a89d..2a292c16 100644
--- a/pkg/manifests/inferencemodel.yaml
+++ b/pkg/manifests/inferencemodel.yaml
@@ -1,21 +1,12 @@
 apiVersion: inference.networking.x-k8s.io/v1alpha1
 kind: InferenceModel
 metadata:
-  labels:
-    app.kubernetes.io/name: api
-    app.kubernetes.io/managed-by: kustomize
   name: inferencemodel-sample
 spec:
   modelName: tweet-summary
   criticality: Critical
   poolRef:
-    # this is the default val:
-    group: inference.networking.x-k8s.io
-    # this is the default val:
-    kind: InferencePool
     name: vllm-llama2-7b-pool
   targetModels:
-  - name: tweet-summary-0
-    weight: 50
   - name: tweet-summary-1
-    weight: 50
+    weight: 100
diff --git a/pkg/manifests/vllm/deployment-with-syncer.yaml b/pkg/manifests/vllm/deployment-with-syncer.yaml
deleted file mode 100644
index d6110f4b..00000000
--- a/pkg/manifests/vllm/deployment-with-syncer.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: vllm-llama2-7b-pool
-spec:
-  selector:
-    app: vllm-llama2-7b-pool
-  ports:
-  - protocol: TCP
-    port: 8000
-    targetPort: 8000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vllm-llama2-7b-pool
-spec:
-  replicas: 3
-  selector:
-    matchLabels:
-      app: vllm-llama2-7b-pool
-  template:
-    metadata:
-      labels:
-        app: vllm-llama2-7b-pool
-    spec:
-      containers:
-        - name: lora
-          image: "vllm/vllm-openai:latest"
-          imagePullPolicy: Always
-          command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
-          args:
-          - "--model"
-          - "meta-llama/Llama-2-7b-hf"
-          - "--tensor-parallel-size"
-          - "1"
-          - "--port"
-          - "8000"
-          - "--enable-lora"
-          - "--max-loras"
-          - "4"
-          - "--max-cpu-loras"
-          - "12"
-          - "--lora-modules"
-          - '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
-          - '{"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
-          env:
-            - name: PORT
-              value: "8000"
-            - name: HUGGING_FACE_HUB_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: hf-token
-                  key: token
-            - name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
-              value: "true"
-          ports:
-            - containerPort: 8000
-              name: http
-              protocol: TCP
-          livenessProbe:
-            failureThreshold: 240
-            httpGet:
-              path: /health
-              port: http
-              scheme: HTTP
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            successThreshold: 1
-            timeoutSeconds: 1
-          readinessProbe:
-            failureThreshold: 600
-            httpGet:
-              path: /health
-              port: http
-              scheme: HTTP
-            initialDelaySeconds: 5
-            periodSeconds: 5
-            successThreshold: 1
-            timeoutSeconds: 1
-          resources:
-            limits:
-              nvidia.com/gpu: 1
-            requests:
-              nvidia.com/gpu: 1
-          volumeMounts:
-            - mountPath: /data
-              name: data
-            - mountPath: /dev/shm
-              name: shm
-            - name: adapters
-              mountPath: "/adapters"
-      initContainers:
-        - name: lora-adapter-syncer
-          tty: true
-          stdin: true 
-          image: us-central1-docker.pkg.dev/ahg-gke-dev/jobset2/lora-syncer:6dc97be
-          restartPolicy: Always
-          imagePullPolicy: Always
-          env: 
-            - name: DYNAMIC_LORA_ROLLOUT_CONFIG
-              value: "/config/configmap.yaml"
-          volumeMounts: # DO NOT USE subPath
-          - name: config-volume
-            mountPath:  /config
-      restartPolicy: Always
-      schedulerName: default-scheduler
-      terminationGracePeriodSeconds: 30
-      volumes:
-        - name: data
-          emptyDir: {}
-        - name: shm
-          emptyDir:
-            medium: Memory
-        - name: adapters
-          emptyDir: {}
-        - name: config-volume
-          configMap:
-            name: dynamic-lora-config
-
----
-
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: dynamic-lora-config
-data:
-  configmap.yaml: |
-      vLLMLoRAConfig:
-        name: sql-loras-llama
-        port: 8000
-        ensureExist:
-          models:
-          - base-model: meta-llama/Llama-2-7b-hf
-            id: tweet-summary-0
-            source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
-          - base-model: meta-llama/Llama-2-7b-hf
-            id: tweet-summary-1
-            source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
-        ensureNotExist:
-          models:
-          - base-model: meta-llama/Llama-2-7b-hf
-            id: tweet-summary-2
-            source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
\ No newline at end of file
diff --git a/pkg/manifests/vllm/deployment.yaml b/pkg/manifests/vllm/deployment.yaml
index 1d115f4d..a54d99b3 100644
--- a/pkg/manifests/vllm/deployment.yaml
+++ b/pkg/manifests/vllm/deployment.yaml
@@ -1,16 +1,3 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: vllm-llama2-7b-pool
-spec:
-  selector:
-    app: vllm-llama2-7b-pool
-  ports:
-  - protocol: TCP
-    port: 8000
-    targetPort: 8000
-  type: ClusterIP
----
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -39,7 +26,7 @@ spec:
           - "8000"
           - "--enable-lora"
           - "--max-loras"
-          - "4"
+          - "2"
           - "--max-cpu-loras"
           - "12"
           - "--lora-modules"
@@ -53,6 +40,8 @@ spec:
                 secretKeyRef:
                   name: hf-token
                   key: token
+            - name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
+              value: "true"
           ports:
             - containerPort: 8000
               name: http
@@ -89,6 +78,19 @@ spec:
               name: shm
             - name: adapters
               mountPath: "/adapters"
+      initContainers:
+        - name: lora-adapter-syncer
+          tty: true
+          stdin: true 
+          image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
+          restartPolicy: Always
+          imagePullPolicy: Always
+          env: 
+            - name: DYNAMIC_LORA_ROLLOUT_CONFIG
+              value: "/config/configmap.yaml"
+          volumeMounts: # DO NOT USE subPath, dynamic configmap updates don't work on subPaths
+          - name: config-volume
+            mountPath:  /config
       restartPolicy: Always
       schedulerName: default-scheduler
       terminationGracePeriodSeconds: 30
@@ -100,3 +102,22 @@ spec:
             medium: Memory
         - name: adapters
           emptyDir: {}
+        - name: config-volume
+          configMap:
+            name: vllm-llama2-7b-adapters
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: vllm-llama2-7b-adapters
+data:
+  configmap.yaml: |
+      vLLMLoRAConfig:
+        name: vllm-llama2-7b
+        port: 8000
+        ensureExist:
+          models:
+          - base-model: meta-llama/Llama-2-7b-hf
+            id: tweet-summary-1
+            source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
+  
diff --git a/site-src/guides/adapter-rollout.md b/site-src/guides/adapter-rollout.md
new file mode 100644
index 00000000..9ce8c3a4
--- /dev/null
+++ b/site-src/guides/adapter-rollout.md
@@ -0,0 +1,133 @@
+# Adapter Rollout
+
+The goal of this guide is to demonstrate how to rollout a new adapter version.
+
+## **Prerequisites**
+
+Follow the steps in the [main guide](index.md)
+
+
+## **Safely rollout v2 adapter**
+
+### Load the new adapter version to the model servers
+
+This guide leverages the LoRA syncer sidecar to dynamically manage adapters within a vLLM deployment, enabling users to add or remove them through a shared ConfigMap.
+
+
+Modify the LoRA syncer ConfigMap to initiate loading of the new adapter version.
+
+
+```bash
+   kubectl edit configmap vllm-llama2-7b-adapters
+```
+
+Change the ConfigMap to match the following (note the new entry under models):
+
+```yaml
+        apiVersion: v1
+        kind: ConfigMap
+        metadata:
+        name: vllm-llama2-7b-adapters
+        data:
+        configmap.yaml: |
+             vLLMLoRAConfig:
+                name: vllm-llama2-7b-adapters
+                port: 8000
+                ensureExist:
+                    models:
+                    - base-model: meta-llama/Llama-2-7b-hf
+                      id: tweet-summary-1
+                      source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
+                    - base-model: meta-llama/Llama-2-7b-hf
+                      id: tweet-summary-2
+                      source: mahimairaja/tweet-summarization-llama-2-finetuned
+```
+
+The new adapter version is applied to the model servers live, without requiring a restart.
+
+
+### Direct traffic to the new adapter version
+
+Modify the InferenceModel to configure a canary rollout with traffic splitting. In this example, 10% of traffic for tweet-summary model will be sent to the new ***tweet-summary-2*** adapter.
+
+
+```bash
+   kubectl edit inferencemodel tweet-summary
+```
+
+Change the targetModels list in InferenceModel to match the following:
+
+
+```yaml
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: InferenceModel
+metadata:
+  name: inferencemodel-sample
+spec:
+  modelName: tweet-summary
+  criticality: Critical
+  poolRef:
+    name: vllm-llama2-7b-pool
+  targetModels:
+  - name: tweet-summary-1
+    weight: 90
+  - name: tweet-summary-2
+    weight: 10
+    
+```
+
+The above configuration means one in every ten requests should be sent to the new version. Try it out:
+
+1. Get the gateway IP:
+```bash
+IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}'); PORT=8081
+```
+
+2. Send a few requests as follows:
+```bash
+curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
+"model": "tweet-summary",
+"prompt": "Write as if you were a critic: San Francisco",
+"max_tokens": 100,
+"temperature": 0
+}'
+```
+
+### Finish the rollout
+
+
+Modify the InferenceModel to direct 100% of the traffic to the latest version of the adapter.
+
+```yaml
+model:
+    name: tweet-summary
+    targetModels:
+    targetModelName: tweet-summary-2
+            weight: 100
+```
+
+Unload the older versions from the servers by updating the LoRA syncer ConfigMap to list the older version under the `ensureNotExist` list:
+
+```yaml
+    apiVersion: v1
+    kind: ConfigMap
+    metadata:
+    name: dynamic-lora-config
+    data:
+    configmap.yaml: |
+            vLLMLoRAConfig:
+                name: sql-loras-llama
+                port: 8000
+                ensureExist:
+                    models:
+                    - base-model: meta-llama/Llama-2-7b-hf
+                      id: tweet-summary-2
+                      source: mahimairaja/tweet-summarization-llama-2-finetuned
+                ensureNotExist:
+                    models:
+                    - base-model: meta-llama/Llama-2-7b-hf
+                      id: tweet-summary-1
+                      source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
+```
+
+With this, all requests should be served by the new adapter version.
diff --git a/site-src/guides/dynamic-lora.md b/site-src/guides/dynamic-lora.md
deleted file mode 100644
index ef3c2b0f..00000000
--- a/site-src/guides/dynamic-lora.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# Getting started with Gateway API Inference Extension with Dynamic lora updates on vllm
-
-The goal of this guide is to get a single InferencePool running with vLLM and demonstrate use of dynamic lora updating! 
-
-### Requirements
- - Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher
- - A cluster with:
-   - Support for Services of type `LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running). For example, with Kind,
-     you can follow [these steps](https://kind.sigs.k8s.io/docs/user/loadbalancer).
-   - 3 GPUs to run the sample model server. Adjust the number of replicas in `./manifests/vllm/deployment.yaml` as needed.
-
-### Steps
-
-1. **Deploy Sample VLLM Model Server with dynamic lora update enabled and dynamic lora syncer sidecar **
-    [Redeploy the vLLM deployment with Dynamic lora adapter enabled and Lora syncer sidecar and configmap](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/manifests/vllm/dynamic-lora-sidecar/deployment.yaml)
-
-Rest of the steps are same as [general setup](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/site-src/guides/index.md)
-
-
-### Safely rollout v2 adapter
-    
-1. Update the LoRA syncer ConfigMap to make the new adapter version available on the model servers.
-
-```yaml
-        apiVersion: v1
-        kind: ConfigMap
-        metadata:
-        name: dynamic-lora-config
-        data:
-        configmap.yaml: |
-             vLLMLoRAConfig:
-                name: sql-loras-llama
-                port: 8000
-                ensureExist:
-                    models:
-                    - base-model: meta-llama/Llama-2-7b-hf
-                      id: tweet-summary-0
-                      source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
-                    - base-model: meta-llama/Llama-2-7b-hf
-                      id: tweet-summary-1
-                      source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
-                    - base-model: meta-llama/Llama-2-7b-hf
-                      id: tweet-summary-2
-                      source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
-2. Configure a canary rollout with traffic split using LLMService. In this example, 40% of traffic for tweet-summary model will be sent to the ***tweet-summary-2*** adapter .
-
-```yaml
-model:
-    name: tweet-summary
-    targetModels:
-    targetModelName: tweet-summary-0
-            weight: 20
-    targetModelName: tweet-summary-1
-            weight: 40
-    targetModelName: tweet-summary-2
-            weight: 40
-    
-```
-            
-3. Finish rollout by setting the traffic to the new version 100%.
-```yaml
-model:
-    name: tweet-summary
-    targetModels:
-    targetModelName: tweet-summary-2
-            weight: 100
-```
-    
-4. Remove v1 from dynamic lora configmap.
-```yaml
-    apiVersion: v1
-    kind: ConfigMap
-    metadata:
-    name: dynamic-lora-config
-    data:
-    configmap.yaml: |
-            vLLMLoRAConfig:
-                name: sql-loras-llama
-                port: 8000
-                ensureExist:
-                    models:
-                    - base-model: meta-llama/Llama-2-7b-hf
-                      id: tweet-summary-2
-                      source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
-                ensureNotExist:
-                    models:
-                    - base-model: meta-llama/Llama-2-7b-hf
-                      id: tweet-summary-1
-                      source: gs://[HUGGING FACE PATH]
-                    - base-model: meta-llama/Llama-2-7b-hf
-                      id: tweet-summary-0
-                      source: gs://[HUGGING FACE PATH]
-```
diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 2cc971c6..b9c38d87 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -2,16 +2,16 @@
 
 This quickstart guide is intended for engineers familiar with k8s and model servers (vLLM in this instance). The goal of this guide is to get a first, single InferencePool up and running! 
 
-### Requirements
+## **Prerequisites**
  - Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher
  - A cluster with:
    - Support for Services of type `LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running). For example, with Kind,
      you can follow [these steps](https://kind.sigs.k8s.io/docs/user/loadbalancer).
    - 3 GPUs to run the sample model server. Adjust the number of replicas in `./manifests/vllm/deployment.yaml` as needed.
 
-### Steps
+## **Steps**
 
-1. **Deploy Sample Model Server**
+### Deploy Sample Model Server
 
    Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
    Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
@@ -20,22 +20,20 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
    ```
 
+### Install the Inference Extension CRDs
 
-
-
-1. **Install the Inference Extension CRDs:**
-
-   ```sh
+   ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.1.0/manifests.yaml
    
-1. **Deploy InferenceModel**
+### Deploy InferenceModel
 
    Deploy the sample InferenceModel which is configured to load balance traffic between the `tweet-summary-0` and `tweet-summary-1`
    [LoRA adapters](https://docs.vllm.ai/en/latest/features/lora.html) of the sample model server.
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencemodel.yaml
    ```
-1. **Update Envoy Gateway Config to enable Patch Policy**
+
+### Update Envoy Gateway Config to enable Patch Policy**
 
    Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run:
    ```bash
@@ -43,7 +41,8 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
    ```
    Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
-1. **Deploy Gateway**
+
+### Deploy Gateway
 
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/gateway.yaml
@@ -56,26 +55,28 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    NAME                CLASS               ADDRESS         PROGRAMMED   AGE
    inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
    ```
-1. **Deploy the Inference Extension and InferencePool**
+### Deploy the Inference Extension and InferencePool
 
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/ext_proc.yaml
    ```
-1. **Deploy Envoy Gateway Custom Policies**
+### Deploy Envoy Gateway Custom Policies
 
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/extension_policy.yaml
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/patch_policy.yaml
    ```
    > **_NOTE:_** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
-1. **OPTIONALLY**: Apply Traffic Policy
+   
+### **OPTIONALLY**: Apply Traffic Policy
 
    For high-traffic benchmarking you can apply this manifest to avoid any defaults that can cause timeouts/errors.
 
    ```bash
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/traffic_policy.yaml
    ```
-1. **Try it out**
+
+### Try it out
 
    Wait until the gateway is ready.
 
@@ -89,4 +90,4 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    "max_tokens": 100,
    "temperature": 0
    }'
-   ```
\ No newline at end of file
+   ```