|
1 |
| -kind: ClusterRole |
2 |
| -apiVersion: rbac.authorization.k8s.io/v1 |
3 |
| -metadata: |
4 |
| - name: pod-read |
5 |
| -rules: |
6 |
| -- apiGroups: ["inference.networking.x-k8s.io"] |
7 |
| - resources: ["inferencemodels"] |
8 |
| - verbs: ["get", "watch", "list"] |
9 |
| -- apiGroups: [""] |
10 |
| - resources: ["pods"] |
11 |
| - verbs: ["get", "watch", "list"] |
12 |
| -- apiGroups: ["inference.networking.x-k8s.io"] |
13 |
| - resources: ["inferencepools"] |
14 |
| - verbs: ["get", "watch", "list"] |
15 |
| -- apiGroups: ["discovery.k8s.io"] |
16 |
| - resources: ["endpointslices"] |
17 |
| - verbs: ["get", "watch", "list"] |
18 |
| -- apiGroups: |
19 |
| - - authentication.k8s.io |
20 |
| - resources: |
21 |
| - - tokenreviews |
22 |
| - verbs: |
23 |
| - - create |
24 |
| -- apiGroups: |
25 |
| - - authorization.k8s.io |
26 |
| - resources: |
27 |
| - - subjectaccessreviews |
28 |
| - verbs: |
29 |
| - - create |
30 |
| ---- |
31 |
| -kind: ClusterRoleBinding |
32 |
| -apiVersion: rbac.authorization.k8s.io/v1 |
33 |
| -metadata: |
34 |
| - name: pod-read-binding |
35 |
| -subjects: |
36 |
| -- kind: ServiceAccount |
37 |
| - name: default |
38 |
| - namespace: default |
39 |
| -roleRef: |
40 |
| - kind: ClusterRole |
41 |
| - name: pod-read |
42 |
| ---- |
43 | 1 | apiVersion: inference.networking.x-k8s.io/v1alpha2
|
44 | 2 | kind: InferencePool
|
45 | 3 | metadata:
|
46 | 4 | labels:
|
47 |
| - name: my-pool |
| 5 | + name: vllm-llama2-7b |
48 | 6 | spec:
|
49 | 7 | targetPortNumber: 8000
|
50 | 8 | selector:
|
51 |
| - app: my-pool |
| 9 | + app: vllm-llama2-7b |
52 | 10 | extensionRef:
|
53 |
| - name: inference-gateway-ext-proc |
| 11 | + name: vllm-llama2-7b-epp |
| 12 | +--- |
| 13 | +apiVersion: v1 |
| 14 | +kind: Service |
| 15 | +metadata: |
| 16 | + name: vllm-llama2-7b-epp |
| 17 | + namespace: default |
| 18 | +spec: |
| 19 | + selector: |
| 20 | + app: vllm-llama2-7b-epp |
| 21 | + ports: |
| 22 | + - protocol: TCP |
| 23 | + port: 9002 |
| 24 | + targetPort: 9002 |
| 25 | + type: ClusterIP |
54 | 26 | ---
|
55 | 27 | apiVersion: apps/v1
|
56 | 28 | kind: Deployment
|
57 | 29 | metadata:
|
58 |
| - name: inference-gateway-ext-proc |
| 30 | + name: vllm-llama2-7b-epp |
59 | 31 | namespace: default
|
60 | 32 | labels:
|
61 |
| - app: inference-gateway-ext-proc |
| 33 | + app: vllm-llama2-7b-epp |
62 | 34 | spec:
|
63 | 35 | replicas: 1
|
64 | 36 | selector:
|
65 | 37 | matchLabels:
|
66 |
| - app: inference-gateway-ext-proc |
| 38 | + app: vllm-llama2-7b-epp |
67 | 39 | template:
|
68 | 40 | metadata:
|
69 | 41 | labels:
|
70 |
| - app: inference-gateway-ext-proc |
| 42 | + app: vllm-llama2-7b-epp |
71 | 43 | spec:
|
72 | 44 | containers:
|
73 |
| - - name: inference-gateway-ext-proc |
| 45 | + - name: epp |
74 | 46 | image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
|
75 | 47 | imagePullPolicy: Always
|
76 | 48 | args:
|
77 | 49 | - -poolName
|
78 |
| - - "my-pool" |
| 50 | + - "vllm-llama2-7b" |
79 | 51 | - -v
|
80 | 52 | - "4"
|
81 | 53 | - -grpcPort
|
@@ -103,16 +75,44 @@ spec:
|
103 | 75 | initialDelaySeconds: 5
|
104 | 76 | periodSeconds: 10
|
105 | 77 | ---
|
106 |
| -apiVersion: v1 |
107 |
| -kind: Service |
| 78 | +kind: ClusterRole |
| 79 | +apiVersion: rbac.authorization.k8s.io/v1 |
108 | 80 | metadata:
|
109 |
| - name: inference-gateway-ext-proc |
| 81 | + name: pod-read |
| 82 | +rules: |
| 83 | +- apiGroups: ["inference.networking.x-k8s.io"] |
| 84 | + resources: ["inferencemodels"] |
| 85 | + verbs: ["get", "watch", "list"] |
| 86 | +- apiGroups: [""] |
| 87 | + resources: ["pods"] |
| 88 | + verbs: ["get", "watch", "list"] |
| 89 | +- apiGroups: ["inference.networking.x-k8s.io"] |
| 90 | + resources: ["inferencepools"] |
| 91 | + verbs: ["get", "watch", "list"] |
| 92 | +- apiGroups: ["discovery.k8s.io"] |
| 93 | + resources: ["endpointslices"] |
| 94 | + verbs: ["get", "watch", "list"] |
| 95 | +- apiGroups: |
| 96 | + - authentication.k8s.io |
| 97 | + resources: |
| 98 | + - tokenreviews |
| 99 | + verbs: |
| 100 | + - create |
| 101 | +- apiGroups: |
| 102 | + - authorization.k8s.io |
| 103 | + resources: |
| 104 | + - subjectaccessreviews |
| 105 | + verbs: |
| 106 | + - create |
| 107 | +--- |
| 108 | +kind: ClusterRoleBinding |
| 109 | +apiVersion: rbac.authorization.k8s.io/v1 |
| 110 | +metadata: |
| 111 | + name: pod-read-binding |
| 112 | +subjects: |
| 113 | +- kind: ServiceAccount |
| 114 | + name: default |
110 | 115 | namespace: default
|
111 |
| -spec: |
112 |
| - selector: |
113 |
| - app: inference-gateway-ext-proc |
114 |
| - ports: |
115 |
| - - protocol: TCP |
116 |
| - port: 9002 |
117 |
| - targetPort: 9002 |
118 |
| - type: ClusterIP |
| 116 | +roleRef: |
| 117 | + kind: ClusterRole |
| 118 | + name: pod-read |
0 commit comments