Skip to content

Commit f2919d8

Browse files
committed
rename ext_proc.yaml to inferencepool.yaml
1 parent e014105 commit f2919d8

File tree

3 files changed

+57
-57
lines changed

3 files changed

+57
-57
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,3 @@
1-
kind: ClusterRole
2-
apiVersion: rbac.authorization.k8s.io/v1
3-
metadata:
4-
name: pod-read
5-
rules:
6-
- apiGroups: ["inference.networking.x-k8s.io"]
7-
resources: ["inferencemodels"]
8-
verbs: ["get", "watch", "list"]
9-
- apiGroups: [""]
10-
resources: ["pods"]
11-
verbs: ["get", "watch", "list"]
12-
- apiGroups: ["inference.networking.x-k8s.io"]
13-
resources: ["inferencepools"]
14-
verbs: ["get", "watch", "list"]
15-
- apiGroups: ["discovery.k8s.io"]
16-
resources: ["endpointslices"]
17-
verbs: ["get", "watch", "list"]
18-
- apiGroups:
19-
- authentication.k8s.io
20-
resources:
21-
- tokenreviews
22-
verbs:
23-
- create
24-
- apiGroups:
25-
- authorization.k8s.io
26-
resources:
27-
- subjectaccessreviews
28-
verbs:
29-
- create
30-
---
31-
kind: ClusterRoleBinding
32-
apiVersion: rbac.authorization.k8s.io/v1
33-
metadata:
34-
name: pod-read-binding
35-
subjects:
36-
- kind: ServiceAccount
37-
name: default
38-
namespace: default
39-
roleRef:
40-
kind: ClusterRole
41-
name: pod-read
42-
---
431
apiVersion: inference.networking.x-k8s.io/v1alpha2
442
kind: InferencePool
453
metadata:
@@ -52,6 +10,20 @@ spec:
5210
extensionRef:
5311
name: inference-gateway-ext-proc
5412
---
13+
apiVersion: v1
14+
kind: Service
15+
metadata:
16+
name: inference-gateway-ext-proc
17+
namespace: default
18+
spec:
19+
selector:
20+
app: inference-gateway-ext-proc
21+
ports:
22+
- protocol: TCP
23+
port: 9002
24+
targetPort: 9002
25+
type: ClusterIP
26+
---
5527
apiVersion: apps/v1
5628
kind: Deployment
5729
metadata:
@@ -103,16 +75,44 @@ spec:
10375
initialDelaySeconds: 5
10476
periodSeconds: 10
10577
---
106-
apiVersion: v1
107-
kind: Service
78+
kind: ClusterRole
79+
apiVersion: rbac.authorization.k8s.io/v1
10880
metadata:
109-
name: inference-gateway-ext-proc
81+
name: pod-read
82+
rules:
83+
- apiGroups: ["inference.networking.x-k8s.io"]
84+
resources: ["inferencemodels"]
85+
verbs: ["get", "watch", "list"]
86+
- apiGroups: [""]
87+
resources: ["pods"]
88+
verbs: ["get", "watch", "list"]
89+
- apiGroups: ["inference.networking.x-k8s.io"]
90+
resources: ["inferencepools"]
91+
verbs: ["get", "watch", "list"]
92+
- apiGroups: ["discovery.k8s.io"]
93+
resources: ["endpointslices"]
94+
verbs: ["get", "watch", "list"]
95+
- apiGroups:
96+
- authentication.k8s.io
97+
resources:
98+
- tokenreviews
99+
verbs:
100+
- create
101+
- apiGroups:
102+
- authorization.k8s.io
103+
resources:
104+
- subjectaccessreviews
105+
verbs:
106+
- create
107+
---
108+
kind: ClusterRoleBinding
109+
apiVersion: rbac.authorization.k8s.io/v1
110+
metadata:
111+
name: pod-read-binding
112+
subjects:
113+
- kind: ServiceAccount
114+
name: default
110115
namespace: default
111-
spec:
112-
selector:
113-
app: inference-gateway-ext-proc
114-
ports:
115-
- protocol: TCP
116-
port: 9002
117-
targetPort: 9002
118-
type: ClusterIP
116+
roleRef:
117+
kind: ClusterRole
118+
name: pod-read

site-src/guides/index.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
8080
NAME CLASS ADDRESS PROGRAMMED AGE
8181
inference-gateway inference-gateway <MY_ADDRESS> True 22s
8282
```
83-
### Deploy the Inference Extension and InferencePool
83+
### Deploy the InferencePool and Extension
8484

8585
```bash
86-
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc.yaml
86+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
8787
```
8888
### Deploy Envoy Gateway Custom Policies
8989

@@ -134,4 +134,4 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
134134
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
135135
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
136136
kubectl delete secret hf-token --ignore-not-found
137-
```
137+
```

test/e2e/epp/e2e_suite_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ const (
7575
// inferModelManifest is the manifest for the inference model CRD.
7676
inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
7777
// inferExtManifest is the manifest for the inference extension test resources.
78-
inferExtManifest = "../../../config/manifests/ext_proc.yaml"
78+
inferExtManifest = "../../../config/manifests/inferencepool.yaml"
7979
// envoyManifest is the manifest for the envoy proxy test resources.
8080
envoyManifest = "../../testdata/envoy.yaml"
8181
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.

0 commit comments

Comments
 (0)