Skip to content

Commit b74aced

Browse files
committed
Make all guides use the same deployment so that we can till one story as the user navigates through the guides
1 parent e27c0ea commit b74aced

File tree

2 files changed

+35
-151
lines changed

2 files changed

+35
-151
lines changed

pkg/manifests/vllm/deployment-with-syncer.yaml

-137
This file was deleted.

pkg/manifests/vllm/deployment.yaml

+35-14
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,3 @@
1-
apiVersion: v1
2-
kind: Service
3-
metadata:
4-
name: vllm-llama2-7b-pool
5-
spec:
6-
selector:
7-
app: vllm-llama2-7b-pool
8-
ports:
9-
- protocol: TCP
10-
port: 8000
11-
targetPort: 8000
12-
type: ClusterIP
13-
---
141
apiVersion: apps/v1
152
kind: Deployment
163
metadata:
@@ -39,7 +26,7 @@ spec:
3926
- "8000"
4027
- "--enable-lora"
4128
- "--max-loras"
42-
- "4"
29+
- "2"
4330
- "--max-cpu-loras"
4431
- "12"
4532
- "--lora-modules"
@@ -53,6 +40,8 @@ spec:
5340
secretKeyRef:
5441
name: hf-token
5542
key: token
43+
- name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
44+
value: "true"
5645
ports:
5746
- containerPort: 8000
5847
name: http
@@ -89,6 +78,19 @@ spec:
8978
name: shm
9079
- name: adapters
9180
mountPath: "/adapters"
81+
initContainers:
82+
- name: lora-adapter-syncer
83+
tty: true
84+
stdin: true
85+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
86+
restartPolicy: Always
87+
imagePullPolicy: Always
88+
env:
89+
- name: DYNAMIC_LORA_ROLLOUT_CONFIG
90+
value: "/config/configmap.yaml"
91+
volumeMounts: # DO NOT USE subPath
92+
- name: config-volume
93+
mountPath: /config
9294
restartPolicy: Always
9395
schedulerName: default-scheduler
9496
terminationGracePeriodSeconds: 30
@@ -100,3 +102,22 @@ spec:
100102
medium: Memory
101103
- name: adapters
102104
emptyDir: {}
105+
- name: config-volume
106+
configMap:
107+
name: vllm-llama2-7b-adapters
108+
---
109+
apiVersion: v1
110+
kind: ConfigMap
111+
metadata:
112+
name: vllm-llama2-7b-adapters
113+
data:
114+
configmap.yaml: |
115+
vLLMLoRAConfig:
116+
name: vllm-llama2-7b
117+
port: 8000
118+
ensureExist:
119+
models:
120+
- base-model: meta-llama/Llama-2-7b-hf
121+
id: tweet-summary-1
122+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
123+

0 commit comments

Comments
 (0)