Skip to content

Commit a591cd0

Browse files
authored
Rename ext_proc.yaml to inferencepool.yaml (#515)
* rename ext_proc.yaml to inferencepool.yaml * removed ext-proc suffix * rename my-pool to vllm-llama2-7b
1 parent d7a9dfa commit a591cd0

File tree

6 files changed

+76
-76
lines changed

6 files changed

+76
-76
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,53 @@
1-
kind: ClusterRole
2-
apiVersion: rbac.authorization.k8s.io/v1
3-
metadata:
4-
name: pod-read
5-
rules:
6-
- apiGroups: ["inference.networking.x-k8s.io"]
7-
resources: ["inferencemodels"]
8-
verbs: ["get", "watch", "list"]
9-
- apiGroups: [""]
10-
resources: ["pods"]
11-
verbs: ["get", "watch", "list"]
12-
- apiGroups: ["inference.networking.x-k8s.io"]
13-
resources: ["inferencepools"]
14-
verbs: ["get", "watch", "list"]
15-
- apiGroups: ["discovery.k8s.io"]
16-
resources: ["endpointslices"]
17-
verbs: ["get", "watch", "list"]
18-
- apiGroups:
19-
- authentication.k8s.io
20-
resources:
21-
- tokenreviews
22-
verbs:
23-
- create
24-
- apiGroups:
25-
- authorization.k8s.io
26-
resources:
27-
- subjectaccessreviews
28-
verbs:
29-
- create
30-
---
31-
kind: ClusterRoleBinding
32-
apiVersion: rbac.authorization.k8s.io/v1
33-
metadata:
34-
name: pod-read-binding
35-
subjects:
36-
- kind: ServiceAccount
37-
name: default
38-
namespace: default
39-
roleRef:
40-
kind: ClusterRole
41-
name: pod-read
42-
---
431
apiVersion: inference.networking.x-k8s.io/v1alpha2
442
kind: InferencePool
453
metadata:
464
labels:
47-
name: my-pool
5+
name: vllm-llama2-7b
486
spec:
497
targetPortNumber: 8000
508
selector:
51-
app: my-pool
9+
app: vllm-llama2-7b
5210
extensionRef:
53-
name: inference-gateway-ext-proc
11+
name: vllm-llama2-7b-epp
12+
---
13+
apiVersion: v1
14+
kind: Service
15+
metadata:
16+
name: vllm-llama2-7b-epp
17+
namespace: default
18+
spec:
19+
selector:
20+
app: vllm-llama2-7b-epp
21+
ports:
22+
- protocol: TCP
23+
port: 9002
24+
targetPort: 9002
25+
type: ClusterIP
5426
---
5527
apiVersion: apps/v1
5628
kind: Deployment
5729
metadata:
58-
name: inference-gateway-ext-proc
30+
name: vllm-llama2-7b-epp
5931
namespace: default
6032
labels:
61-
app: inference-gateway-ext-proc
33+
app: vllm-llama2-7b-epp
6234
spec:
6335
replicas: 1
6436
selector:
6537
matchLabels:
66-
app: inference-gateway-ext-proc
38+
app: vllm-llama2-7b-epp
6739
template:
6840
metadata:
6941
labels:
70-
app: inference-gateway-ext-proc
42+
app: vllm-llama2-7b-epp
7143
spec:
7244
containers:
73-
- name: inference-gateway-ext-proc
45+
- name: epp
7446
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
7547
imagePullPolicy: Always
7648
args:
7749
- -poolName
78-
- "my-pool"
50+
- "vllm-llama2-7b"
7951
- -v
8052
- "4"
8153
- -grpcPort
@@ -103,16 +75,44 @@ spec:
10375
initialDelaySeconds: 5
10476
periodSeconds: 10
10577
---
106-
apiVersion: v1
107-
kind: Service
78+
kind: ClusterRole
79+
apiVersion: rbac.authorization.k8s.io/v1
10880
metadata:
109-
name: inference-gateway-ext-proc
81+
name: pod-read
82+
rules:
83+
- apiGroups: ["inference.networking.x-k8s.io"]
84+
resources: ["inferencemodels"]
85+
verbs: ["get", "watch", "list"]
86+
- apiGroups: [""]
87+
resources: ["pods"]
88+
verbs: ["get", "watch", "list"]
89+
- apiGroups: ["inference.networking.x-k8s.io"]
90+
resources: ["inferencepools"]
91+
verbs: ["get", "watch", "list"]
92+
- apiGroups: ["discovery.k8s.io"]
93+
resources: ["endpointslices"]
94+
verbs: ["get", "watch", "list"]
95+
- apiGroups:
96+
- authentication.k8s.io
97+
resources:
98+
- tokenreviews
99+
verbs:
100+
- create
101+
- apiGroups:
102+
- authorization.k8s.io
103+
resources:
104+
- subjectaccessreviews
105+
verbs:
106+
- create
107+
---
108+
kind: ClusterRoleBinding
109+
apiVersion: rbac.authorization.k8s.io/v1
110+
metadata:
111+
name: pod-read-binding
112+
subjects:
113+
- kind: ServiceAccount
114+
name: default
110115
namespace: default
111-
spec:
112-
selector:
113-
app: inference-gateway-ext-proc
114-
ports:
115-
- protocol: TCP
116-
port: 9002
117-
targetPort: 9002
118-
type: ClusterIP
116+
roleRef:
117+
kind: ClusterRole
118+
name: pod-read

config/manifests/vllm/cpu-deployment.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
apiVersion: apps/v1
22
kind: Deployment
33
metadata:
4-
name: my-pool
4+
name: vllm-llama2-7b
55
spec:
66
replicas: 3
77
selector:
88
matchLabels:
9-
app: my-pool
9+
app: vllm-llama2-7b
1010
template:
1111
metadata:
1212
labels:
13-
app: my-pool
13+
app: vllm-llama2-7b
1414
spec:
1515
containers:
1616
- name: lora

config/manifests/vllm/gpu-deployment.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
apiVersion: apps/v1
22
kind: Deployment
33
metadata:
4-
name: my-pool
4+
name: vllm-llama2-7b
55
spec:
66
replicas: 3
77
selector:
88
matchLabels:
9-
app: my-pool
9+
app: vllm-llama2-7b
1010
template:
1111
metadata:
1212
labels:
13-
app: my-pool
13+
app: vllm-llama2-7b
1414
spec:
1515
containers:
1616
- name: lora

site-src/guides/index.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
8080
NAME CLASS ADDRESS PROGRAMMED AGE
8181
inference-gateway inference-gateway <MY_ADDRESS> True 22s
8282
```
83-
### Deploy the Inference Extension and InferencePool
83+
### Deploy the InferencePool and Extension
8484

8585
```bash
86-
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc.yaml
86+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
8787
```
8888
### Deploy Envoy Gateway Custom Policies
8989

@@ -134,4 +134,4 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
134134
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
135135
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
136136
kubectl delete secret hf-token --ignore-not-found
137-
```
137+
```

test/e2e/epp/e2e_suite_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,15 @@ const (
5757
// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
5858
nsName = "default"
5959
// modelServerName is the name of the model server test resources.
60-
modelServerName = "my-pool"
60+
modelServerName = "vllm-llama2-7b"
6161
// modelName is the test model name.
6262
modelName = "tweet-summary"
6363
// envoyName is the name of the envoy proxy test resources.
6464
envoyName = "envoy"
6565
// envoyPort is the listener port number of the test envoy proxy.
6666
envoyPort = "8081"
6767
// inferExtName is the name of the inference extension test resources.
68-
inferExtName = "inference-gateway-ext-proc"
68+
inferExtName = "vllm-llama2-7b-epp"
6969
// clientManifest is the manifest for the client test resources.
7070
clientManifest = "../../testdata/client.yaml"
7171
// modelServerSecretManifest is the manifest for the model server secret resource.
@@ -75,7 +75,7 @@ const (
7575
// inferModelManifest is the manifest for the inference model CRD.
7676
inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
7777
// inferExtManifest is the manifest for the inference extension test resources.
78-
inferExtManifest = "../../../config/manifests/ext_proc.yaml"
78+
inferExtManifest = "../../../config/manifests/inferencepool.yaml"
7979
// envoyManifest is the manifest for the envoy proxy test resources.
8080
envoyManifest = "../../testdata/envoy.yaml"
8181
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.

test/testdata/envoy.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ data:
100100
grpc_service:
101101
envoy_grpc:
102102
cluster_name: ext_proc
103-
authority: inference-gateway-ext-proc.default:9002
103+
authority: vllm-llama2-7b-epp.default:9002
104104
timeout: 10s
105105
processing_mode:
106106
request_header_mode: SEND
@@ -194,7 +194,7 @@ data:
194194
- endpoint:
195195
address:
196196
socket_address:
197-
address: inference-gateway-ext-proc.default
197+
address: vllm-llama2-7b-epp.default
198198
port_value: 9002
199199
health_status: HEALTHY
200200
load_balancing_weight: 1

0 commit comments

Comments
 (0)