File tree 2 files changed +35
-151
lines changed
2 files changed +35
-151
lines changed Load Diff This file was deleted.
Original file line number Diff line number Diff line change 1
- apiVersion : v1
2
- kind : Service
3
- metadata :
4
- name : vllm-llama2-7b-pool
5
- spec :
6
- selector :
7
- app : vllm-llama2-7b-pool
8
- ports :
9
- - protocol : TCP
10
- port : 8000
11
- targetPort : 8000
12
- type : ClusterIP
13
- ---
14
1
apiVersion : apps/v1
15
2
kind : Deployment
16
3
metadata :
39
26
- " 8000"
40
27
- " --enable-lora"
41
28
- " --max-loras"
42
- - " 4 "
29
+ - " 2 "
43
30
- " --max-cpu-loras"
44
31
- " 12"
45
32
- " --lora-modules"
53
40
secretKeyRef :
54
41
name : hf-token
55
42
key : token
43
+ - name : VLLM_ALLOW_RUNTIME_LORA_UPDATING
44
+ value : " true"
56
45
ports :
57
46
- containerPort : 8000
58
47
name : http
89
78
name : shm
90
79
- name : adapters
91
80
mountPath : " /adapters"
81
+ initContainers :
82
+ - name : lora-adapter-syncer
83
+ tty : true
84
+ stdin : true
85
+ image : us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
86
+ restartPolicy : Always
87
+ imagePullPolicy : Always
88
+ env :
89
+ - name : DYNAMIC_LORA_ROLLOUT_CONFIG
90
+ value : " /config/configmap.yaml"
91
+ volumeMounts : # DO NOT USE subPath
92
+ - name : config-volume
93
+ mountPath : /config
92
94
restartPolicy : Always
93
95
schedulerName : default-scheduler
94
96
terminationGracePeriodSeconds : 30
@@ -100,3 +102,22 @@ spec:
100
102
medium : Memory
101
103
- name : adapters
102
104
emptyDir : {}
105
+ - name : config-volume
106
+ configMap :
107
+ name : vllm-llama2-7b-adapters
108
+ ---
109
+ apiVersion : v1
110
+ kind : ConfigMap
111
+ metadata :
112
+ name : vllm-llama2-7b-adapters
113
+ data :
114
+ configmap.yaml : |
115
+ vLLMLoRAConfig:
116
+ name: vllm-llama2-7b
117
+ port: 8000
118
+ ensureExist:
119
+ models:
120
+ - base-model: meta-llama/Llama-2-7b-hf
121
+ id: tweet-summary-1
122
+ source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
123
+
You can’t perform that action at this time.
0 commit comments