File tree 3 files changed +33
-35
lines changed
3 files changed +33
-35
lines changed Load Diff This file was deleted.
Original file line number Diff line number Diff line change 85
85
# op: replace
86
86
# path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/response_header_mode"
87
87
# value: SEND
88
-
88
+ ---
89
+ apiVersion : gateway.envoyproxy.io/v1alpha1
90
+ kind : EnvoyExtensionPolicy
91
+ metadata :
92
+ name : ext-proc-policy
93
+ namespace : default
94
+ spec :
95
+ extProc :
96
+ - backendRefs :
97
+ - group : " "
98
+ kind : Service
99
+ name : vllm-llama2-7b-epp
100
+ port : 9002
101
+ processingMode :
102
+ allowModeOverride : true
103
+ request :
104
+ body : Buffered
105
+ response :
106
+ # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
107
+ # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
108
+ messageTimeout : 1000s
109
+ backendSettings :
110
+ circuitBreaker :
111
+ maxConnections : 40000
112
+ maxPendingRequests : 40000
113
+ maxParallelRequests : 40000
114
+ timeout :
115
+ tcp :
116
+ connectTimeout : 24h
117
+ targetRef :
118
+ group : gateway.networking.k8s.io
119
+ kind : HTTPRoute
120
+ name : llm-route
Original file line number Diff line number Diff line change @@ -88,7 +88,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
88
88
### Deploy Envoy Gateway Custom Policies
89
89
90
90
``` bash
91
- kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/extension_policy.yaml
92
91
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml
93
92
```
94
93
> ** _ NOTE:_ ** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
You can’t perform that action at this time.
0 commit comments