Skip to content

Commit 350a5ff

Browse files
committed
update llama-stack manifests
Signed-off-by: sallyom <[email protected]>
1 parent 1505851 commit 350a5ff

File tree

3 files changed

+80
-86
lines changed

3 files changed

+80
-86
lines changed

kubernetes/llama-stack/configmap.yaml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
kind: ConfigMap
1+
kind: ConfigMap
22
apiVersion: v1
33
metadata:
44
name: run-config
@@ -18,10 +18,17 @@ data:
1818
- vector_io
1919
providers:
2020
inference:
21-
- provider_id: vllm-inference
21+
- provider_id: llama-3b
2222
provider_type: remote::vllm
2323
config:
24-
url: ${env.VLLM_URL}
24+
url: ${env.LLAMA3B_URL}
25+
max_tokens: 128000
26+
api_token: fake
27+
tls_verify: false
28+
- provider_id: granite
29+
provider_type: remote::vllm
30+
config:
31+
url: ${env.GRANITE_URL}
2532
max_tokens: 128000
2633
api_token: fake
2734
tls_verify: false
@@ -39,7 +46,7 @@ data:
3946
- provider_id: milvus
4047
provider_type: inline::milvus
4148
config:
42-
db_path: ${env.MILVUS_DB_PATH}
49+
db_path: ${env.MILVUS_DB_PATH}
4350
safety:
4451
- provider_id: llama-guard
4552
provider_type: inline::llama-guard
@@ -92,9 +99,8 @@ data:
9299
provider_type: inline::meta-reference
93100
config:
94101
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
95-
sinks: ${env.TELEMETRY_SINKS:console, otel_trace, otel_metric, sqlite}
102+
sinks: ${env.TELEMETRY_SINKS:console, otel_trace, sqlite}
96103
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:}
97-
otel_metric_endpoint: ${env.OTEL_METRIC_ENDPOINT:}
98104
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
99105
tool_runtime:
100106
- provider_id: brave-search
@@ -121,8 +127,12 @@ data:
121127
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
122128
models:
123129
- metadata: {}
124-
model_id: ${env.INFERENCE_MODEL}
125-
provider_id: vllm-inference
130+
model_id: ${env.LLAMA3B_MODEL}
131+
provider_id: llama-3b
132+
model_type: llm
133+
- metadata: {}
134+
model_id: ${env.GRANITE_MODEL}
135+
provider_id: granite
126136
model_type: llm
127137
- metadata: {}
128138
model_id: ${env.SAFETY_MODEL}

kubernetes/llama-stack/deployment.yaml

Lines changed: 61 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,75 @@
1+
kind: Deployment
12
apiVersion: apps/v1
2-
kind: Deployment
33
metadata:
44
name: llamastack-deployment
55
spec:
6-
replicas: 1
76
selector:
87
matchLabels:
98
app: llamastack
109
template:
1110
metadata:
11+
#annotations:
12+
# sidecar.opentelemetry.io/inject: otelsidecar
1213
labels:
1314
app: llamastack
1415
spec:
16+
volumes:
17+
- name: run-config-volume
18+
configMap:
19+
name: run-config
20+
defaultMode: 420
21+
- name: llama-persist
22+
persistentVolumeClaim:
23+
claimName: llama-persist
24+
- name: cache
25+
emptyDir: {}
26+
- name: pythain
27+
emptyDir: {}
1528
containers:
16-
- args:
17-
- --yaml-config
18-
- /app-config/config.yaml
19-
env:
20-
- name: VLLM_MAX_TOKENS
21-
value: "128000"
22-
- name: INFERENCE_MODEL
23-
value: meta-llama/Llama-3.2-3B-Instruct
24-
- name: VLLM_URL
25-
value: http://vllm:8000/v1
26-
- name: VLLM_API_TOKEN
27-
value: fake
28-
- name: SAFETY_MODEL
29-
value: meta-llama/Llama-Guard-3-8B
30-
- name: SAFETY_VLLM_URL
31-
value: http://safety.llama-serve.svc.cluster.local:8000/v1
32-
- name: OTEL_TRACE_ENDPOINT
33-
value: http://otel-collector-collector.observability-hub.svc.cluster.local:4318/v1/traces
34-
- name: OTEL_METRIC_ENDPOINT
35-
value: http://otel-collector-collector.observability-hub.svc.cluster.local:4318/v1/metrics
36-
- name: MILVUS_DB_PATH
37-
value: 'milvus.db'
38-
image: quay.io/redhat-et/llama:vllm-0.1.9
39-
imagePullPolicy: Always
40-
name: llamastack
41-
ports:
42-
- containerPort: 8321
43-
protocol: TCP
44-
resources: {}
45-
terminationMessagePath: /dev/termination-log
46-
terminationMessagePolicy: File
47-
volumeMounts:
48-
- mountPath: /app-config
49-
name: run-config-volume
50-
- mountPath: /.llama
51-
name: llama-persist
52-
- mountPath: /.cache
53-
name: cache
54-
dnsPolicy: ClusterFirst
55-
restartPolicy: Always
56-
schedulerName: default-scheduler
29+
- resources: {}
30+
terminationMessagePath: /dev/termination-log
31+
name: llamastack
32+
env:
33+
- name: MAX_TOKENS
34+
value: '128000'
35+
- name: VLLM_MAX_TOKENS
36+
value: '128000'
37+
- name: LLAMA3B_MODEL
38+
value: meta-llama/Llama-3.2-3B-Instruct
39+
- name: GRANITE_URL
40+
value: 'https://granite-8b-llama-serve.apps.ocp-beta-test.nerc.mghpcc.org/v1'
41+
- name: GRANITE_MODEL
42+
value: ibm-granite/granite-3.2-8b-instruct
43+
- name: LLAMA3B_URL
44+
value: 'https://llama32-3b-llama-serve.apps.ocp-beta-test.nerc.mghpcc.org/v1'
45+
- name: VLLM_API_TOKEN
46+
value: fake
47+
- name: OTEL_SERVICE_NAME
48+
value: om-llamastack
49+
- name: OTEL_TRACE_ENDPOINT
50+
value: 'http://otel-collector-collector.observability-hub.svc.cluster.local:4318/v1/traces'
51+
- name: SAFETY_MODEL
52+
value: meta-llama/Llama-Guard-3-8B
53+
- name: SAFETY_VLLM_URL
54+
value: 'http://safety.llama-serve.svc.cluster.local:8000/v1'
55+
- name: MILVUS_DB_PATH
56+
value: milvus.db
57+
ports:
58+
- containerPort: 8321
59+
protocol: TCP
60+
imagePullPolicy: Always
61+
volumeMounts:
62+
- name: pythain
63+
mountPath: /pythainlp-data
64+
- name: run-config-volume
65+
mountPath: /app-config
66+
- name: llama-persist
67+
mountPath: /.llama
68+
- name: cache
69+
mountPath: /.cache
70+
terminationMessagePolicy: File
71+
image: 'quay.io/redhat-et/llama:vllm-0.1.9'
72+
args:
73+
- '--config'
74+
- /app-config/config.yaml
5775
securityContext: {}
58-
terminationGracePeriodSeconds: 30
59-
volumes:
60-
- configMap:
61-
defaultMode: 420
62-
name: run-config
63-
name: run-config-volume
64-
- persistentVolumeClaim:
65-
claimName: llama-persist
66-
name: llama-persist
67-
- emptyDir: {}
68-
name: cache

kubernetes/llama-stack/template.yaml

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,9 @@
1-
kind: ConfigMap
1+
kind: ConfigMap
22
apiVersion: v1
33
metadata:
44
name: template
5-
uid: 53e66bb5-bebd-4a5d-8471-54728d3adc18
6-
resourceVersion: '602370800'
7-
creationTimestamp: '2025-02-20T18:20:10Z'
85
labels:
96
app: vllm
10-
managedFields:
11-
- manager: kubectl-create
12-
operation: Update
13-
apiVersion: v1
14-
time: '2025-02-20T18:20:10Z'
15-
fieldsType: FieldsV1
16-
fieldsV1:
17-
'f:data': {}
18-
'f:metadata':
19-
'f:labels':
20-
.: {}
21-
'f:app': {}
22-
- manager: Mozilla
23-
operation: Update
24-
apiVersion: v1
25-
time: '2025-02-25T18:54:13Z'
26-
fieldsType: FieldsV1
27-
fieldsV1:
28-
'f:data':
29-
'f:tool_chat_template_llama3.2_json.jinja': {}
307
data:
318
tool_chat_template_llama3.2_json.jinja: |
329
{{- bos_token }} {%- if custom_tools is defined %}

0 commit comments

Comments
 (0)