File tree 2 files changed +38
-139
lines changed
2 files changed +38
-139
lines changed Load Diff This file was deleted.
Original file line number Diff line number Diff line change @@ -39,11 +39,11 @@ spec:
39
39
- " 8000"
40
40
- " --enable-lora"
41
41
- " --max-loras"
42
- - " 4 "
42
+ - " 2 "
43
43
- " --max-cpu-loras"
44
44
- " 12"
45
45
- " --lora-modules"
46
- - ' {"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
46
+ - ' {"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
47
47
- ' {"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
48
48
env :
49
49
- name : PORT
53
53
secretKeyRef :
54
54
name : hf-token
55
55
key : token
56
+ - name : VLLM_ALLOW_RUNTIME_LORA_UPDATING
57
+ value : " true"
56
58
ports :
57
59
- containerPort : 8000
58
60
name : http
89
91
name : shm
90
92
- name : adapters
91
93
mountPath : " /adapters"
94
+ initContainers :
95
+ - name : lora-adapter-syncer
96
+ tty : true
97
+ stdin : true
98
+ image : us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
99
+ restartPolicy : Always
100
+ imagePullPolicy : Always
101
+ env :
102
+ - name : DYNAMIC_LORA_ROLLOUT_CONFIG
103
+ value : " /config/configmap.yaml"
104
+ volumeMounts : # DO NOT USE subPath
105
+ - name : config-volume
106
+ mountPath : /config
92
107
restartPolicy : Always
93
108
schedulerName : default-scheduler
94
109
terminationGracePeriodSeconds : 30
@@ -100,3 +115,24 @@ spec:
100
115
medium : Memory
101
116
- name : adapters
102
117
emptyDir : {}
118
+ - name : config-volume
119
+ configMap :
120
+ name : vllm-llama2-7b-adapters
121
+
122
+ ---
123
+
124
+ apiVersion : v1
125
+ kind : ConfigMap
126
+ metadata :
127
+ name : vllm-llama2-7b-adapters
128
+ data :
129
+ configmap.yaml : |
130
+ vLLMLoRAConfig:
131
+ name: vllm-llama2-7b
132
+ port: 8000
133
+ ensureExist:
134
+ models:
135
+ - base-model: meta-llama/Llama-2-7b-hf
136
+ id: tweet-summary-1
137
+ source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
138
+
You can’t perform that action at this time.
0 commit comments