Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 675fd47

Browse files
committedFeb 15, 2025
Make all guides use the same deployment so that we can till one story as the user navigates through the guides
1 parent ac02642 commit 675fd47

File tree

2 files changed

+35
-151
lines changed

2 files changed

+35
-151
lines changed
 

‎pkg/manifests/vllm/deployment-with-syncer.yaml

-137
This file was deleted.

‎pkg/manifests/vllm/deployment.yaml

+35-14
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,3 @@
1-
apiVersion: v1
2-
kind: Service
3-
metadata:
4-
name: vllm-llama2-7b-pool
5-
spec:
6-
selector:
7-
app: vllm-llama2-7b-pool
8-
ports:
9-
- protocol: TCP
10-
port: 8000
11-
targetPort: 8000
12-
type: ClusterIP
13-
---
141
apiVersion: apps/v1
152
kind: Deployment
163
metadata:
@@ -39,7 +26,7 @@ spec:
3926
- "8000"
4027
- "--enable-lora"
4128
- "--max-loras"
42-
- "4"
29+
- "2"
4330
- "--max-cpu-loras"
4431
- "12"
4532
- "--lora-modules"
@@ -53,6 +40,8 @@ spec:
5340
secretKeyRef:
5441
name: hf-token
5542
key: token
43+
- name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
44+
value: "true"
5645
ports:
5746
- containerPort: 8000
5847
name: http
@@ -89,6 +78,19 @@ spec:
8978
name: shm
9079
- name: adapters
9180
mountPath: "/adapters"
81+
initContainers:
82+
- name: lora-adapter-syncer
83+
tty: true
84+
stdin: true
85+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
86+
restartPolicy: Always
87+
imagePullPolicy: Always
88+
env:
89+
- name: DYNAMIC_LORA_ROLLOUT_CONFIG
90+
value: "/config/configmap.yaml"
91+
volumeMounts: # DO NOT USE subPath
92+
- name: config-volume
93+
mountPath: /config
9294
restartPolicy: Always
9395
schedulerName: default-scheduler
9496
terminationGracePeriodSeconds: 30
@@ -100,3 +102,22 @@ spec:
100102
medium: Memory
101103
- name: adapters
102104
emptyDir: {}
105+
- name: config-volume
106+
configMap:
107+
name: vllm-llama2-7b-adapters
108+
---
109+
apiVersion: v1
110+
kind: ConfigMap
111+
metadata:
112+
name: vllm-llama2-7b-adapters
113+
data:
114+
configmap.yaml: |
115+
vLLMLoRAConfig:
116+
name: vllm-llama2-7b
117+
port: 8000
118+
ensureExist:
119+
models:
120+
- base-model: meta-llama/Llama-2-7b-hf
121+
id: tweet-summary-1
122+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
123+

0 commit comments

Comments
 (0)
Please sign in to comment.