File tree 4 files changed +37
-37
lines changed
4 files changed +37
-37
lines changed Load Diff This file was deleted.
Original file line number Diff line number Diff line change 6
6
modelName : tweet-summary
7
7
criticality : Critical
8
8
poolRef :
9
- name : my-pool
9
+ name : vllm-llama2-7b
10
10
targetModels :
11
11
- name : tweet-summary-1
12
12
weight : 100
20
20
modelName : meta-llama/Llama-2-7b-hf
21
21
criticality : Critical
22
22
poolRef :
23
- name : my-pool
23
+ name : vllm-llama2-7b
24
24
25
25
---
26
26
apiVersion : inference.networking.x-k8s.io/v1alpha2
31
31
modelName : Qwen/Qwen2.5-1.5B-Instruct
32
32
criticality : Critical
33
33
poolRef :
34
- name : my-pool
34
+ name : vllm-llama2-7b
Original file line number Diff line number Diff line change 75
75
initialDelaySeconds : 5
76
76
periodSeconds : 10
77
77
---
78
+ apiVersion : gateway.envoyproxy.io/v1alpha1
79
+ kind : EnvoyExtensionPolicy
80
+ metadata :
81
+ name : ext-proc-policy
82
+ namespace : default
83
+ spec :
84
+ extProc :
85
+ - backendRefs :
86
+ - group : " "
87
+ kind : Service
88
+ name : vllm-llama2-7b-epp
89
+ port : 9002
90
+ processingMode :
91
+ allowModeOverride : true
92
+ request :
93
+ body : Buffered
94
+ response :
95
+ # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
96
+ # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
97
+ messageTimeout : 1000s
98
+ backendSettings :
99
+ circuitBreaker :
100
+ maxConnections : 40000
101
+ maxPendingRequests : 40000
102
+ maxParallelRequests : 40000
103
+ timeout :
104
+ tcp :
105
+ connectTimeout : 24h
106
+ targetRef :
107
+ group : gateway.networking.k8s.io
108
+ kind : HTTPRoute
109
+ name : llm-route
110
+ ---
78
111
kind : ClusterRole
79
112
apiVersion : rbac.authorization.k8s.io/v1
80
113
metadata :
Original file line number Diff line number Diff line change @@ -88,7 +88,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
88
88
### Deploy Envoy Gateway Custom Policies
89
89
90
90
``` bash
91
- kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/extension_policy.yaml
92
91
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml
93
92
```
94
93
> ** _ NOTE:_ ** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
@@ -125,7 +124,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
125
124
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/traffic_policy.yaml --ignore-not-found
126
125
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/extension_policy.yaml --ignore-not-found
127
126
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml --ignore-not-found
128
- kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc .yaml --ignore-not-found
127
+ kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool .yaml --ignore-not-found
129
128
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gateway.yaml --ignore-not-found
130
129
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/enable_patch_policy.yaml --ignore-not-found
131
130
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml --ignore-not-found
You can’t perform that action at this time.
0 commit comments