diff --git a/pkg/manifests/ext_proc.yaml b/config/manifests/ext_proc.yaml similarity index 100% rename from pkg/manifests/ext_proc.yaml rename to config/manifests/ext_proc.yaml diff --git a/pkg/manifests/gateway/enable_patch_policy.yaml b/config/manifests/gateway/enable_patch_policy.yaml similarity index 100% rename from pkg/manifests/gateway/enable_patch_policy.yaml rename to config/manifests/gateway/enable_patch_policy.yaml diff --git a/pkg/manifests/gateway/extension_policy.yaml b/config/manifests/gateway/extension_policy.yaml similarity index 100% rename from pkg/manifests/gateway/extension_policy.yaml rename to config/manifests/gateway/extension_policy.yaml diff --git a/pkg/manifests/gateway/gateway.yaml b/config/manifests/gateway/gateway.yaml similarity index 100% rename from pkg/manifests/gateway/gateway.yaml rename to config/manifests/gateway/gateway.yaml diff --git a/pkg/manifests/gateway/patch_policy.yaml b/config/manifests/gateway/patch_policy.yaml similarity index 100% rename from pkg/manifests/gateway/patch_policy.yaml rename to config/manifests/gateway/patch_policy.yaml diff --git a/pkg/manifests/gateway/traffic_policy.yaml b/config/manifests/gateway/traffic_policy.yaml similarity index 100% rename from pkg/manifests/gateway/traffic_policy.yaml rename to config/manifests/gateway/traffic_policy.yaml diff --git a/pkg/manifests/inferencemodel.yaml b/config/manifests/inferencemodel.yaml similarity index 100% rename from pkg/manifests/inferencemodel.yaml rename to config/manifests/inferencemodel.yaml diff --git a/pkg/manifests/vllm/deployment.yaml b/config/manifests/vllm/deployment.yaml similarity index 100% rename from pkg/manifests/vllm/deployment.yaml rename to config/manifests/vllm/deployment.yaml diff --git a/hack/release-quickstart.sh b/hack/release-quickstart.sh index f4701508..a21047c3 100755 --- a/hack/release-quickstart.sh +++ b/hack/release-quickstart.sh @@ -36,9 +36,9 @@ sed -i.bak -E "s|(releases/download/)v[0-9]+\.[0-9]+\.0-rc\.?[0-9]+|\1${RELEASE_ sed -i.bak "s|kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd|kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/${RELEASE_TAG}/manifests.yaml|g" "$README" # ----------------------------------------------------------------------------- -# Update pkg/manifests/ext_proc.yaml +# Update config/manifests/ext_proc.yaml # ----------------------------------------------------------------------------- -EXT_PROC="pkg/manifests/ext_proc.yaml" +EXT_PROC="config/manifests/ext_proc.yaml" echo "Updating ${EXT_PROC} ..." # Update the EPP container tag. @@ -51,9 +51,9 @@ sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inferen sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EXT_PROC" # ----------------------------------------------------------------------------- -# Update pkg/manifests/vllm/deployment.yaml +# Update config/manifests/vllm/deployment.yaml # ----------------------------------------------------------------------------- -VLLM_DEPLOY="pkg/manifests/vllm/deployment.yaml" +VLLM_DEPLOY="config/manifests/vllm/deployment.yaml" echo "Updating ${VLLM_DEPLOY} ..." # Update the vLLM image version diff --git a/site-src/guides/index.md b/site-src/guides/index.md index 34fff20c..4478128f 100644 --- a/site-src/guides/index.md +++ b/site-src/guides/index.md @@ -17,7 +17,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway. ```bash kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2 - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/deployment.yaml ``` ### Install the Inference Extension CRDs @@ -31,14 +31,14 @@ This quickstart guide is intended for engineers familiar with k8s and model serv Deploy the sample InferenceModel which is configured to load balance traffic between the `tweet-summary-0` and `tweet-summary-1` [LoRA adapters](https://docs.vllm.ai/en/latest/features/lora.html) of the sample model server. ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencemodel.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencemodel.yaml ``` ### Update Envoy Gateway Config to enable Patch Policy** Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run: ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/enable_patch_policy.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/enable_patch_policy.yaml kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system ``` Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again. @@ -46,7 +46,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv ### Deploy Gateway ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/gateway.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gateway.yaml ``` > **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./manifests/gateway/ext-proc.yaml` file, and an additional `./manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.*** @@ -59,13 +59,13 @@ This quickstart guide is intended for engineers familiar with k8s and model serv ### Deploy the Inference Extension and InferencePool ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/ext_proc.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc.yaml ``` ### Deploy Envoy Gateway Custom Policies ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/extension_policy.yaml - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/patch_policy.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/extension_policy.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/patch_policy.yaml ``` > **_NOTE:_** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further. @@ -74,7 +74,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv For high-traffic benchmarking you can apply this manifest to avoid any defaults that can cause timeouts/errors. ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/traffic_policy.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/traffic_policy.yaml ``` ### Try it out diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 4a0dd2a8..c4342775 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -69,7 +69,7 @@ const ( // clientManifest is the manifest for the client test resources. clientManifest = "../testdata/client.yaml" // modelServerManifest is the manifest for the model server test resources. - modelServerManifest = "../../pkg/manifests/vllm/deployment.yaml" + modelServerManifest = "../../config/manifests/vllm/deployment.yaml" // modelServerSecretManifest is the manifest for the model server secret resource. modelServerSecretManifest = "../testdata/model-secret.yaml" // inferPoolManifest is the manifest for the inference pool CRD. @@ -77,7 +77,7 @@ const ( // inferModelManifest is the manifest for the inference model CRD. inferModelManifest = "../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml" // inferExtManifest is the manifest for the inference extension test resources. - inferExtManifest = "../../pkg/manifests/ext_proc.yaml" + inferExtManifest = "../../config/manifests/ext_proc.yaml" // envoyManifest is the manifest for the envoy proxy test resources. envoyManifest = "../testdata/envoy.yaml" )