diff --git a/pkg/README.md b/pkg/README.md index 7ca88b6f..9e28a3a5 100644 --- a/pkg/README.md +++ b/pkg/README.md @@ -1,5 +1,8 @@ ## Quickstart +### Requirements +The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher. + ### Steps 1. **Deploy Sample vLLM Application** diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index ab7dec39..462240b4 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -61,7 +61,17 @@ spec: request: body: Buffered response: - messageTimeout: 5s + # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly. + # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly. + messageTimeout: 1000s + backendSettings: + circuitBreaker: + maxConnections: 40000 + maxPendingRequests: 40000 + maxParallelRequests: 40000 + timeout: + tcp: + connectTimeout: 24h targetRef: group: gateway.networking.k8s.io kind: HTTPRoute diff --git a/pkg/manifests/gateway.yaml b/pkg/manifests/gateway.yaml index 6fa88e05..621d73a5 100644 --- a/pkg/manifests/gateway.yaml +++ b/pkg/manifests/gateway.yaml @@ -44,4 +44,7 @@ spec: - backendRefs: - group: gateway.envoyproxy.io kind: Backend - name: backend-dummy \ No newline at end of file + name: backend-dummy + timeouts: + request: "24h" + backendRequest: "24h" diff --git a/pkg/manifests/patch_policy.yaml b/pkg/manifests/patch_policy.yaml index 4d227124..6003dc1b 100644 --- a/pkg/manifests/patch_policy.yaml +++ b/pkg/manifests/patch_policy.yaml @@ -26,9 +26,14 @@ spec: original_dst_lb_config: use_http_header: true http_header_name: "target-pod" - connect_timeout: 6s + connect_timeout: 1000s lb_policy: CLUSTER_PROVIDED dns_lookup_family: V4_ONLY + circuit_breakers: + thresholds: + - max_connections: 40000 + max_pending_requests: 40000 + max_requests: 40000 - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration" name: default//llm-gw diff --git a/pkg/manifests/traffic_policy.yaml b/pkg/manifests/traffic_policy.yaml new file mode 100644 index 00000000..e110f173 --- /dev/null +++ b/pkg/manifests/traffic_policy.yaml @@ -0,0 +1,16 @@ +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: BackendTrafficPolicy +metadata: + name: high-connection-route-policy +spec: + targetRefs: + - group: gateway.networking.k8s.io + kind: HTTPRoute + name: llm-route + circuitBreaker: + maxConnections: 40000 + maxPendingRequests: 40000 + maxParallelRequests: 40000 + timeout: + tcp: + connectTimeout: 24h \ No newline at end of file