Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4931640

Browse files
committedFeb 27, 2025·
initialize helm template
Signed-off-by: Kuromesi <[email protected]>
1 parent 2ad70e3 commit 4931640

14 files changed

+1601
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
apiVersion: v2
2+
name: gateway-api-inference-extension
3+
description: A Helm chart for gateway-api-inference-extension
4+
5+
type: application
6+
7+
version: 0.1.0
8+
9+
appVersion: "1.16.0"

‎config/manifests/gateway-api-inference-extension/crds/crds.yaml

+917
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
---
2+
# Source: gateway-api-inference-extension/templates/rbac.yaml
3+
apiVersion: v1
4+
kind: ServiceAccount
5+
metadata:
6+
name: inference-gateway-ext-proc-release-name
7+
namespace: default
8+
labels:
9+
app: inference-gateway-ext-proc-release-name
10+
---
11+
# Source: gateway-api-inference-extension/templates/enable_patch_policy.yaml
12+
apiVersion: v1
13+
kind: ConfigMap
14+
metadata:
15+
name: envoy-gateway-config
16+
namespace: envoy-gateway-system
17+
data:
18+
envoy-gateway.yaml: |
19+
apiVersion: gateway.envoyproxy.io/v1alpha1
20+
kind: EnvoyGateway
21+
provider:
22+
type: Kubernetes
23+
gateway:
24+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
25+
extensionApis:
26+
enableEnvoyPatchPolicy: true
27+
enableBackend: true
28+
---
29+
# Source: gateway-api-inference-extension/templates/rbac.yaml
30+
kind: ClusterRole
31+
apiVersion: rbac.authorization.k8s.io/v1
32+
metadata:
33+
name: inference-extension-default-release-name
34+
rules:
35+
- apiGroups: ["inference.networking.x-k8s.io"]
36+
resources: ["inferencemodels"]
37+
verbs: ["get", "watch", "list"]
38+
- apiGroups: [""]
39+
resources: ["pods"]
40+
verbs: ["get", "watch", "list"]
41+
- apiGroups: ["inference.networking.x-k8s.io"]
42+
resources: ["inferencepools"]
43+
verbs: ["get", "watch", "list"]
44+
- apiGroups: ["discovery.k8s.io"]
45+
resources: ["endpointslices"]
46+
verbs: ["get", "watch", "list"]
47+
- apiGroups:
48+
- authentication.k8s.io
49+
resources:
50+
- tokenreviews
51+
verbs:
52+
- create
53+
- apiGroups:
54+
- authorization.k8s.io
55+
resources:
56+
- subjectaccessreviews
57+
verbs:
58+
- create
59+
---
60+
# Source: gateway-api-inference-extension/templates/rbac.yaml
61+
kind: ClusterRoleBinding
62+
apiVersion: rbac.authorization.k8s.io/v1
63+
metadata:
64+
name: inference-extension-default-release-name
65+
subjects:
66+
- kind: ServiceAccount
67+
name: inference-gateway-ext-proc-release-name
68+
namespace: default
69+
roleRef:
70+
kind: ClusterRole
71+
name: inference-extension-default-release-name
72+
---
73+
# Source: gateway-api-inference-extension/templates/ext_proc.yaml
74+
apiVersion: v1
75+
kind: Service
76+
metadata:
77+
name: inference-gateway-ext-proc-release-name
78+
namespace: default
79+
spec:
80+
selector:
81+
app: inference-gateway-ext-proc-release-name
82+
ports:
83+
- name: grpc
84+
protocol: TCP
85+
port: 9002
86+
targetPort: 9002
87+
- name: http-metrics
88+
protocol: TCP
89+
port: 9090
90+
targetPort: 9090
91+
type: ClusterIP
92+
---
93+
# Source: gateway-api-inference-extension/templates/ext_proc.yaml
94+
apiVersion: apps/v1
95+
kind: Deployment
96+
metadata:
97+
name: inference-gateway-ext-proc-release-name
98+
namespace: default
99+
labels:
100+
app: inference-gateway-ext-proc-release-name
101+
spec:
102+
replicas: 1
103+
selector:
104+
matchLabels:
105+
app: inference-gateway-ext-proc-release-name
106+
template:
107+
metadata:
108+
labels:
109+
app: inference-gateway-ext-proc-release-name
110+
spec:
111+
serviceAccountName: inference-gateway-ext-proc-release-name
112+
containers:
113+
- name: inference-gateway-ext-proc
114+
image: registry-cn-hangzhou.ack.aliyuncs.com/dev/gateway-api-inference-extension/epp:main
115+
imagePullPolicy: Always
116+
args:
117+
- -poolName
118+
- vllm-llama2-7b-pool
119+
- -poolNamespace
120+
- default
121+
- -v
122+
- "3"
123+
- -grpcPort
124+
- "9002"
125+
- -grpcHealthPort
126+
- "9003"
127+
- -metricsPort
128+
- "9090"
129+
ports:
130+
- name: grpc
131+
containerPort: 9002
132+
- name: grpc-health
133+
containerPort: 9003
134+
- name: metrics
135+
containerPort: 9090
136+
livenessProbe:
137+
grpc:
138+
port: 9003
139+
service: inference-extension
140+
initialDelaySeconds: 5
141+
periodSeconds: 10
142+
readinessProbe:
143+
grpc:
144+
port: 9003
145+
service: inference-extension
146+
initialDelaySeconds: 5
147+
periodSeconds: 10
148+
---
149+
# Source: gateway-api-inference-extension/templates/gateway.yaml
150+
apiVersion: gateway.envoyproxy.io/v1alpha1
151+
kind: Backend
152+
metadata:
153+
name: backend-release-name
154+
spec:
155+
endpoints:
156+
- fqdn:
157+
hostname: 'foo.bar.com'
158+
port: 8080
159+
---
160+
# Source: gateway-api-inference-extension/templates/traffic_policy.yaml
161+
apiVersion: gateway.envoyproxy.io/v1alpha1
162+
kind: BackendTrafficPolicy
163+
metadata:
164+
name: high-connection-route-policy-release-name # 确保引用有 . 前缀
165+
namespace:
166+
spec:
167+
targetRefs:
168+
- group: gateway.networking.k8s.io
169+
kind: HTTPRoute
170+
name: llm-route-release-name
171+
circuitBreaker:
172+
maxConnections: 40000
173+
maxPendingRequests: 40000
174+
maxParallelRequests: 40000
175+
timeout:
176+
tcp:
177+
connectTimeout: 24h
178+
---
179+
# Source: gateway-api-inference-extension/templates/extension_policy.yaml
180+
apiVersion: gateway.envoyproxy.io/v1alpha1
181+
kind: EnvoyExtensionPolicy
182+
metadata:
183+
name: ext-proc-policy-release-name
184+
namespace: default
185+
spec:
186+
extProc:
187+
- backendRefs:
188+
- group: ""
189+
kind: Service
190+
name: inference-gateway-ext-proc-release-name
191+
port: 9002
192+
processingMode:
193+
request:
194+
body: Buffered
195+
response:
196+
messageTimeout: 1000s
197+
backendSettings:
198+
circuitBreaker:
199+
maxConnections: 40000
200+
maxPendingRequests: 40000
201+
maxParallelRequests: 40000
202+
timeout:
203+
tcp:
204+
connectTimeout: 24h
205+
targetRef:
206+
group: gateway.networking.k8s.io
207+
kind: HTTPRoute
208+
name: llm-route-release-name
209+
---
210+
# Source: gateway-api-inference-extension/templates/patch_policy.yaml
211+
apiVersion: gateway.envoyproxy.io/v1alpha1
212+
kind: EnvoyPatchPolicy
213+
metadata:
214+
name: custom-response-patch-policy-release-name
215+
namespace: default
216+
spec:
217+
targetRef:
218+
group: gateway.networking.k8s.io
219+
kind: Gateway
220+
name: inference-gateway-release-name
221+
type: JSONPatch
222+
jsonPatches:
223+
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
224+
name: original_destination_cluster
225+
operation:
226+
op: add
227+
path: ""
228+
value:
229+
name: original_destination_cluster
230+
type: ORIGINAL_DST
231+
original_dst_lb_config:
232+
use_http_header: true
233+
http_header_name: "x-gateway-destination-endpoint"
234+
connect_timeout: 1000s
235+
lb_policy: CLUSTER_PROVIDED
236+
dns_lookup_family: V4_ONLY
237+
circuit_breakers:
238+
thresholds:
239+
- max_connections: 40000
240+
max_pending_requests: 40000
241+
max_requests: 40000
242+
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
243+
name: "envoyextensionpolicy/default/ext-proc-policy-release-name/extproc/0"
244+
operation:
245+
op: add
246+
path: "/transport_socket"
247+
value:
248+
name: "envoy.transport_sockets.tls"
249+
typed_config:
250+
"@type": "type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext"
251+
common_tls_context: {}
252+
- type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
253+
name: default/inference-gateway-release-name/llm-gw
254+
operation:
255+
op: replace
256+
path: "/virtual_hosts/0/routes/0/route/cluster"
257+
value: original_destination_cluster
258+
---
259+
# Source: gateway-api-inference-extension/templates/gateway.yaml
260+
apiVersion: gateway.networking.k8s.io/v1
261+
kind: Gateway
262+
metadata:
263+
name: inference-gateway-release-name
264+
namespace: default
265+
spec:
266+
gatewayClassName: inference-gateway-release-name
267+
listeners:
268+
- name: http
269+
protocol: HTTP
270+
port: 8080
271+
- name: llm-gw
272+
protocol: HTTP
273+
port: 8081
274+
---
275+
# Source: gateway-api-inference-extension/templates/gateway.yaml
276+
apiVersion: gateway.networking.k8s.io/v1
277+
kind: GatewayClass
278+
metadata:
279+
name: inference-gateway-release-name
280+
spec:
281+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
282+
---
283+
# Source: gateway-api-inference-extension/templates/gateway.yaml
284+
apiVersion: gateway.networking.k8s.io/v1
285+
kind: HTTPRoute
286+
metadata:
287+
name: llm-route-release-name
288+
namespace: default
289+
spec:
290+
parentRefs:
291+
- name: inference-gateway-release-name
292+
sectionName: llm-gw
293+
rules:
294+
- backendRefs:
295+
- group: gateway.envoyproxy.io
296+
kind: Backend
297+
name: backend-release-name
298+
timeouts:
299+
request: "24h"
300+
backendRequest: "24h"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Gateway api inference extension deployed.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{{- define "httpRoute.name" -}}
2+
llm-route-{{ .Release.Name }}
3+
{{- end -}}
4+
5+
{{- define "backend.name" -}}
6+
backend-{{ .Release.Name }}
7+
{{- end -}}
8+
9+
{{- define "gatewayClass.name" -}}
10+
inference-gateway-{{ .Release.Name }}
11+
{{- end -}}
12+
13+
{{- define "gateway.name" -}}
14+
inference-gateway-{{ .Release.Name }}
15+
{{- end -}}
16+
17+
{{- define "envoyExtensionPolicy.name" -}}
18+
ext-proc-policy-{{ .Release.Name }}
19+
{{- end -}}
20+
21+
{{- define "envoyPatchPolicy.name" -}}
22+
custom-response-patch-policy-{{ .Release.Name }}
23+
{{- end -}}
24+
25+
{{/*
26+
Selector labels
27+
*/}}
28+
{{- define "gateway-api-inference-extension.selectorLabels" -}}
29+
app: {{ include "gateway-api-inference-extension.name" . }}
30+
{{- end -}}
31+
32+
{{- define "clusterRole.name" -}}
33+
inference-extension-{{ .Release.Namespace }}-{{ .Release.Name }}
34+
{{- end -}}
35+
36+
{{- define "backendTrafficPolicy.name" -}}
37+
high-connection-route-policy-{{ .Release.Name }}
38+
{{- end -}}
39+
40+
{{- define "gateway-api-inference-extension.name" -}}
41+
inference-gateway-ext-proc-{{ .Release.Name }}
42+
{{- end -}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{{ if .Values.envoy.enablePatchPolicy }}
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: envoy-gateway-config
6+
namespace: {{ .Values.envoy.namespace | default "envoy-gateway-system" }}
7+
data:
8+
envoy-gateway.yaml: |
9+
apiVersion: gateway.envoyproxy.io/v1alpha1
10+
kind: EnvoyGateway
11+
provider:
12+
type: Kubernetes
13+
gateway:
14+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
15+
extensionApis:
16+
enableEnvoyPatchPolicy: true
17+
enableBackend: true
18+
{{ end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ include "gateway-api-inference-extension.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
app: {{ include "gateway-api-inference-extension.name" . }}
8+
spec:
9+
replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
10+
selector:
11+
matchLabels:
12+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
13+
template:
14+
metadata:
15+
labels:
16+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
17+
spec:
18+
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
19+
containers:
20+
- name: inference-gateway-ext-proc
21+
image: {{ .Values.inferenceExtension.image.hub }}:{{ .Values.inferenceExtension.image.tag }}
22+
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
23+
args:
24+
- -poolName
25+
- {{ .Values.inferencePool.name }}
26+
- -poolNamespace
27+
- {{ .Release.Namespace }}
28+
- -v
29+
- {{ .Values.inferenceExtension.logLevel | default 3 | quote }}
30+
- -grpcPort
31+
- {{ .Values.inferenceExtension.grpcPort | default 9002 | quote }}
32+
- -grpcHealthPort
33+
- "9003"
34+
- -metricsPort
35+
- {{ .Values.inferenceExtension.metricsPort | default 9090 | quote }}
36+
ports:
37+
- name: grpc
38+
containerPort: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
39+
- name: grpc-health
40+
containerPort: 9003
41+
- name: metrics
42+
containerPort: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
43+
livenessProbe:
44+
grpc:
45+
port: 9003
46+
service: inference-extension
47+
initialDelaySeconds: 5
48+
periodSeconds: 10
49+
readinessProbe:
50+
grpc:
51+
port: 9003
52+
service: inference-extension
53+
initialDelaySeconds: 5
54+
periodSeconds: 10
55+
---
56+
apiVersion: v1
57+
kind: Service
58+
metadata:
59+
name: {{ include "gateway-api-inference-extension.name" . }}
60+
namespace: {{ .Release.Namespace }}
61+
spec:
62+
selector:
63+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
64+
ports:
65+
- name: grpc
66+
protocol: TCP
67+
port: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
68+
targetPort: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
69+
- name: http-metrics
70+
protocol: TCP
71+
port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
72+
targetPort: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
73+
type: ClusterIP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: gateway.envoyproxy.io/v1alpha1
2+
kind: EnvoyExtensionPolicy
3+
metadata:
4+
name: {{ include "envoyExtensionPolicy.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
spec:
7+
extProc:
8+
- backendRefs:
9+
- group: ""
10+
kind: Service
11+
name: {{ include "gateway-api-inference-extension.name" . }}
12+
port: {{ .Values.inferenceExtension.port | default 9002 }}
13+
processingMode:
14+
request:
15+
body: Buffered
16+
response:
17+
messageTimeout: 1000s
18+
backendSettings:
19+
circuitBreaker:
20+
maxConnections: 40000
21+
maxPendingRequests: 40000
22+
maxParallelRequests: 40000
23+
timeout:
24+
tcp:
25+
connectTimeout: 24h
26+
targetRef:
27+
group: gateway.networking.k8s.io
28+
kind: HTTPRoute
29+
name: {{ include "httpRoute.name" . }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
---
3+
apiVersion: gateway.networking.k8s.io/v1
4+
kind: Gateway
5+
metadata:
6+
name: {{ include "gateway.name" . }}
7+
namespace: {{ .Release.Namespace }}
8+
spec:
9+
gatewayClassName: {{ include "gatewayClass.name" . }}
10+
listeners:
11+
- name: http
12+
protocol: HTTP
13+
port: 8080
14+
- name: llm-gw
15+
protocol: HTTP
16+
port: {{ .Values.gateway.port }}
17+
---
18+
apiVersion: gateway.networking.k8s.io/v1
19+
kind: GatewayClass
20+
metadata:
21+
name: {{ include "gatewayClass.name" . }}
22+
spec:
23+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
24+
---
25+
apiVersion: gateway.envoyproxy.io/v1alpha1
26+
kind: Backend
27+
metadata:
28+
name: {{ include "backend.name" . }}
29+
spec:
30+
endpoints:
31+
- fqdn:
32+
hostname: 'foo.bar.com'
33+
port: 8080
34+
---
35+
apiVersion: gateway.networking.k8s.io/v1
36+
kind: HTTPRoute
37+
metadata:
38+
name: {{ include "httpRoute.name" . }}
39+
namespace: {{ .Release.Namespace }}
40+
spec:
41+
parentRefs:
42+
- name: {{ include "gateway.name" . }}
43+
sectionName: llm-gw
44+
rules:
45+
- backendRefs:
46+
- group: gateway.envoyproxy.io
47+
kind: Backend
48+
name: {{ include "backend.name" . }}
49+
timeouts:
50+
request: "24h"
51+
backendRequest: "24h"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
apiVersion: gateway.envoyproxy.io/v1alpha1
2+
kind: EnvoyPatchPolicy
3+
metadata:
4+
name: {{ include "envoyPatchPolicy.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
spec:
7+
targetRef:
8+
group: gateway.networking.k8s.io
9+
kind: Gateway
10+
name: {{ include "gateway.name" . }}
11+
type: JSONPatch
12+
jsonPatches:
13+
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
14+
name: original_destination_cluster
15+
operation:
16+
op: add
17+
path: ""
18+
value:
19+
name: original_destination_cluster
20+
type: ORIGINAL_DST
21+
original_dst_lb_config:
22+
use_http_header: true
23+
http_header_name: "x-gateway-destination-endpoint"
24+
connect_timeout: 1000s
25+
lb_policy: CLUSTER_PROVIDED
26+
dns_lookup_family: V4_ONLY
27+
circuit_breakers:
28+
thresholds:
29+
- max_connections: 40000
30+
max_pending_requests: 40000
31+
max_requests: 40000
32+
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
33+
name: "envoyextensionpolicy/{{ .Release.Namespace }}/{{ include "envoyExtensionPolicy.name" . }}/extproc/0"
34+
operation:
35+
op: add
36+
path: "/transport_socket"
37+
value:
38+
name: "envoy.transport_sockets.tls"
39+
typed_config:
40+
"@type": "type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext"
41+
common_tls_context: {}
42+
- type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
43+
name: {{ .Release.Namespace }}/{{ include "gateway.name" . }}/llm-gw
44+
operation:
45+
op: replace
46+
path: "/virtual_hosts/0/routes/0/route/cluster"
47+
value: original_destination_cluster
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
kind: ClusterRole
2+
apiVersion: rbac.authorization.k8s.io/v1
3+
metadata:
4+
name: {{ include "clusterRole.name" . }}
5+
rules:
6+
- apiGroups: ["inference.networking.x-k8s.io"]
7+
resources: ["inferencemodels"]
8+
verbs: ["get", "watch", "list"]
9+
- apiGroups: [""]
10+
resources: ["pods"]
11+
verbs: ["get", "watch", "list"]
12+
- apiGroups: ["inference.networking.x-k8s.io"]
13+
resources: ["inferencepools"]
14+
verbs: ["get", "watch", "list"]
15+
- apiGroups: ["discovery.k8s.io"]
16+
resources: ["endpointslices"]
17+
verbs: ["get", "watch", "list"]
18+
- apiGroups:
19+
- authentication.k8s.io
20+
resources:
21+
- tokenreviews
22+
verbs:
23+
- create
24+
- apiGroups:
25+
- authorization.k8s.io
26+
resources:
27+
- subjectaccessreviews
28+
verbs:
29+
- create
30+
---
31+
kind: ClusterRoleBinding
32+
apiVersion: rbac.authorization.k8s.io/v1
33+
metadata:
34+
name: {{ include "clusterRole.name" . }}
35+
subjects:
36+
- kind: ServiceAccount
37+
name: {{ include "gateway-api-inference-extension.name" . }}
38+
namespace: {{ .Release.Namespace }}
39+
roleRef:
40+
kind: ClusterRole
41+
name: {{ include "clusterRole.name" . }}
42+
---
43+
apiVersion: v1
44+
kind: ServiceAccount
45+
metadata:
46+
name: {{ include "gateway-api-inference-extension.name" . }}
47+
namespace: {{ .Release.Namespace }}
48+
labels:
49+
app: {{ include "gateway-api-inference-extension.name" . }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: gateway.envoyproxy.io/v1alpha1
2+
kind: BackendTrafficPolicy
3+
metadata:
4+
name: {{ include "backendTrafficPolicy.name" . }}
5+
namespace: {{ .Release.namespace }}
6+
spec:
7+
targetRefs:
8+
- group: gateway.networking.k8s.io
9+
kind: HTTPRoute
10+
name: {{ include "httpRoute.name" . }}
11+
circuitBreaker:
12+
maxConnections: 40000
13+
maxPendingRequests: 40000
14+
maxParallelRequests: 40000
15+
timeout:
16+
tcp:
17+
connectTimeout: 24h
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
inferenceExtension:
2+
replicas: 1
3+
image:
4+
hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp
5+
tag: main
6+
pullPolicy: Always
7+
8+
name: inference-gateway-ext-proc
9+
serviceName: inference-gateway-ext-proc
10+
grpcPort: 9002
11+
metricsPort: 9090
12+
logLevel: 3
13+
14+
inferencePool:
15+
name: vllm-llama2-7b-pool
16+
17+
gateway:
18+
port: 8081
19+
20+
envoy:
21+
# envoy gateway system namespace
22+
namespace: envoy-gateway-system
23+
24+
# enabling the Envoy Patch Policy feature
25+
enablePatchPolicy: true

0 commit comments

Comments
 (0)
Please sign in to comment.