From ff4f6fbe406fab9694cc4da1b43e2dfb5f4ad91f Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Mon, 17 Mar 2025 19:52:56 +0000 Subject: [PATCH] Revert name change to make pool name far more descriptive. --- config/manifests/ext_proc.yaml | 6 +++--- config/manifests/inferencemodel.yaml | 6 +++--- config/manifests/vllm/cpu-deployment.yaml | 6 +++--- config/manifests/vllm/gpu-deployment.yaml | 6 +++--- test/e2e/epp/e2e_suite_test.go | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/config/manifests/ext_proc.yaml b/config/manifests/ext_proc.yaml index d70467ee..ae189684 100644 --- a/config/manifests/ext_proc.yaml +++ b/config/manifests/ext_proc.yaml @@ -44,11 +44,11 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2 kind: InferencePool metadata: labels: - name: my-pool + name: vllm-llama2-7b-pool spec: targetPortNumber: 8000 selector: - app: my-pool + app: vllm-llama2-7b-pool extensionRef: name: inference-gateway-ext-proc --- @@ -75,7 +75,7 @@ spec: imagePullPolicy: Always args: - -poolName - - "my-pool" + - "vllm-llama2-7b-pool" - -v - "4" - -grpcPort diff --git a/config/manifests/inferencemodel.yaml b/config/manifests/inferencemodel.yaml index 8374c5b3..83868a99 100644 --- a/config/manifests/inferencemodel.yaml +++ b/config/manifests/inferencemodel.yaml @@ -6,7 +6,7 @@ spec: modelName: tweet-summary criticality: Critical poolRef: - name: my-pool + name: vllm-llama2-7b-pool targetModels: - name: tweet-summary-1 weight: 100 @@ -20,7 +20,7 @@ spec: modelName: meta-llama/Llama-2-7b-hf criticality: Critical poolRef: - name: my-pool + name: vllm-llama2-7b-pool --- apiVersion: inference.networking.x-k8s.io/v1alpha2 @@ -31,4 +31,4 @@ spec: modelName: Qwen/Qwen2.5-1.5B-Instruct criticality: Critical poolRef: - name: my-pool + name: vllm-llama2-7b-pool diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml index a0925c83..3bd32812 100644 --- a/config/manifests/vllm/cpu-deployment.yaml +++ b/config/manifests/vllm/cpu-deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool + name: vllm-llama2-7b-pool spec: replicas: 3 selector: matchLabels: - app: my-pool + app: vllm-llama2-7b-pool template: metadata: labels: - app: my-pool + app: vllm-llama2-7b-pool spec: containers: - name: lora diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index d16a46a4..51689c9f 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool + name: vllm-llama2-7b-pool spec: replicas: 3 selector: matchLabels: - app: my-pool + app: vllm-llama2-7b-pool template: metadata: labels: - app: my-pool + app: vllm-llama2-7b-pool spec: containers: - name: lora diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index bc7dc87a..a62b7796 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -57,7 +57,7 @@ const ( // TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed nsName = "default" // modelServerName is the name of the model server test resources. - modelServerName = "my-pool" + modelServerName = "vllm-llama2-7b-pool" // modelName is the test model name. modelName = "tweet-summary" // envoyName is the name of the envoy proxy test resources.