Skip to content

Commit bacbacc

Browse files
committed
Make all guides use the same deployment so that we can till one story as the user navigates through the guides
1 parent e27c0ea commit bacbacc

File tree

2 files changed

+38
-139
lines changed

2 files changed

+38
-139
lines changed

pkg/manifests/vllm/deployment-with-syncer.yaml

-137
This file was deleted.

pkg/manifests/vllm/deployment.yaml

+38-2
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ spec:
3939
- "8000"
4040
- "--enable-lora"
4141
- "--max-loras"
42-
- "4"
42+
- "2"
4343
- "--max-cpu-loras"
4444
- "12"
4545
- "--lora-modules"
46-
- '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
46+
- '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
4747
- '{"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
4848
env:
4949
- name: PORT
@@ -53,6 +53,8 @@ spec:
5353
secretKeyRef:
5454
name: hf-token
5555
key: token
56+
- name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
57+
value: "true"
5658
ports:
5759
- containerPort: 8000
5860
name: http
@@ -89,6 +91,19 @@ spec:
8991
name: shm
9092
- name: adapters
9193
mountPath: "/adapters"
94+
initContainers:
95+
- name: lora-adapter-syncer
96+
tty: true
97+
stdin: true
98+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
99+
restartPolicy: Always
100+
imagePullPolicy: Always
101+
env:
102+
- name: DYNAMIC_LORA_ROLLOUT_CONFIG
103+
value: "/config/configmap.yaml"
104+
volumeMounts: # DO NOT USE subPath
105+
- name: config-volume
106+
mountPath: /config
92107
restartPolicy: Always
93108
schedulerName: default-scheduler
94109
terminationGracePeriodSeconds: 30
@@ -100,3 +115,24 @@ spec:
100115
medium: Memory
101116
- name: adapters
102117
emptyDir: {}
118+
- name: config-volume
119+
configMap:
120+
name: vllm-llama2-7b-adapters
121+
122+
---
123+
124+
apiVersion: v1
125+
kind: ConfigMap
126+
metadata:
127+
name: vllm-llama2-7b-adapters
128+
data:
129+
configmap.yaml: |
130+
vLLMLoRAConfig:
131+
name: vllm-llama2-7b
132+
port: 8000
133+
ensureExist:
134+
models:
135+
- base-model: meta-llama/Llama-2-7b-hf
136+
id: tweet-summary-1
137+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
138+

0 commit comments

Comments
 (0)