forked from kubernetes-sigs/gateway-api-inference-extension
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpatch_policy.yaml
43 lines (42 loc) · 1.48 KB
/
patch_policy.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
apiVersion: gateway.envoyproxy.io/v1alpha1
kind: EnvoyPatchPolicy
metadata:
name: custom-response-patch-policy
namespace: default
spec:
targetRef:
group: gateway.networking.k8s.io
kind: Gateway
name: <GATEWAY-NAME>
type: JSONPatch
jsonPatches:
# Necessary to create a cluster of the type: ORIGINAL_DST to allow for
# direct pod scheduling. Which is heavily utilized in our scheduling.
# Specifically the field `original_dst_lb_config` allows us to enable
# `use_http_header` and `http_header_name`.
# Source: https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
name: original_destination_cluster
operation:
op: add
path: ""
value:
name: original_destination_cluster
type: ORIGINAL_DST
original_dst_lb_config:
use_http_header: true
http_header_name: "target-pod"
connect_timeout: 1000s
lb_policy: CLUSTER_PROVIDED
dns_lookup_family: V4_ONLY
circuit_breakers:
thresholds:
- max_connections: 40000
max_pending_requests: 40000
max_requests: 40000
- type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
name: default/<GATEWAY-NAME>/llm-gw
operation:
op: replace
path: "/virtual_hosts/0/routes/0/route/cluster"
value: original_destination_cluster