File tree 5 files changed +20
-20
lines changed
5 files changed +20
-20
lines changed Original file line number Diff line number Diff line change @@ -2,22 +2,22 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2
2
2
kind : InferencePool
3
3
metadata :
4
4
labels :
5
- name : my-pool
5
+ name : vllm-llama2-7b
6
6
spec :
7
7
targetPortNumber : 8000
8
8
selector :
9
- app : my-pool
9
+ app : vllm-llama2-7b
10
10
extensionRef :
11
- name : my-pool -epp
11
+ name : vllm-llama2-7b -epp
12
12
---
13
13
apiVersion : v1
14
14
kind : Service
15
15
metadata :
16
- name : my-pool -epp
16
+ name : vllm-llama2-7b -epp
17
17
namespace : default
18
18
spec :
19
19
selector :
20
- app : my-pool -epp
20
+ app : vllm-llama2-7b -epp
21
21
ports :
22
22
- protocol : TCP
23
23
port : 9002
@@ -27,27 +27,27 @@ spec:
27
27
apiVersion : apps/v1
28
28
kind : Deployment
29
29
metadata :
30
- name : my-pool -epp
30
+ name : vllm-llama2-7b -epp
31
31
namespace : default
32
32
labels :
33
- app : my-pool -epp
33
+ app : vllm-llama2-7b -epp
34
34
spec :
35
35
replicas : 1
36
36
selector :
37
37
matchLabels :
38
- app : my-pool -epp
38
+ app : vllm-llama2-7b -epp
39
39
template :
40
40
metadata :
41
41
labels :
42
- app : my-pool -epp
42
+ app : vllm-llama2-7b -epp
43
43
spec :
44
44
containers :
45
45
- name : epp
46
46
image : us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
47
47
imagePullPolicy : Always
48
48
args :
49
49
- -poolName
50
- - " my-pool "
50
+ - " vllm-llama2-7b "
51
51
- -v
52
52
- " 4"
53
53
- -grpcPort
Original file line number Diff line number Diff line change 1
1
apiVersion : apps/v1
2
2
kind : Deployment
3
3
metadata :
4
- name : my-pool
4
+ name : vllm-llama2-7b
5
5
spec :
6
6
replicas : 3
7
7
selector :
8
8
matchLabels :
9
- app : my-pool
9
+ app : vllm-llama2-7b
10
10
template :
11
11
metadata :
12
12
labels :
13
- app : my-pool
13
+ app : vllm-llama2-7b
14
14
spec :
15
15
containers :
16
16
- name : lora
Original file line number Diff line number Diff line change 1
1
apiVersion : apps/v1
2
2
kind : Deployment
3
3
metadata :
4
- name : my-pool
4
+ name : vllm-llama2-7b
5
5
spec :
6
6
replicas : 3
7
7
selector :
8
8
matchLabels :
9
- app : my-pool
9
+ app : vllm-llama2-7b
10
10
template :
11
11
metadata :
12
12
labels :
13
- app : my-pool
13
+ app : vllm-llama2-7b
14
14
spec :
15
15
containers :
16
16
- name : lora
Original file line number Diff line number Diff line change @@ -57,15 +57,15 @@ const (
57
57
// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
58
58
nsName = "default"
59
59
// modelServerName is the name of the model server test resources.
60
- modelServerName = "my-pool "
60
+ modelServerName = "vllm-llama2-7b "
61
61
// modelName is the test model name.
62
62
modelName = "tweet-summary"
63
63
// envoyName is the name of the envoy proxy test resources.
64
64
envoyName = "envoy"
65
65
// envoyPort is the listener port number of the test envoy proxy.
66
66
envoyPort = "8081"
67
67
// inferExtName is the name of the inference extension test resources.
68
- inferExtName = "my-pool -epp"
68
+ inferExtName = "vllm-llama2-7b -epp"
69
69
// clientManifest is the manifest for the client test resources.
70
70
clientManifest = "../../testdata/client.yaml"
71
71
// modelServerSecretManifest is the manifest for the model server secret resource.
Original file line number Diff line number Diff line change @@ -100,7 +100,7 @@ data:
100
100
grpc_service:
101
101
envoy_grpc:
102
102
cluster_name: ext_proc
103
- authority: my-pool -epp.default:9002
103
+ authority: vllm-llama2-7b -epp.default:9002
104
104
timeout: 10s
105
105
processing_mode:
106
106
request_header_mode: SEND
@@ -194,7 +194,7 @@ data:
194
194
- endpoint:
195
195
address:
196
196
socket_address:
197
- address: my-pool -epp.default
197
+ address: vllm-llama2-7b -epp.default
198
198
port_value: 9002
199
199
health_status: HEALTHY
200
200
load_balancing_weight: 1
You can’t perform that action at this time.
0 commit comments