From cfed5d235b2c52fb90fd29f6cdfe86432c8bfdaf Mon Sep 17 00:00:00 2001 From: ahg-g Date: Fri, 14 Mar 2025 21:52:12 +0000 Subject: [PATCH] Create inference model/pool objects in memory instead of reading them from files --- pkg/epp/util/testing/wrappers.go | 10 ++ test/integration/epp/hermetic_test.go | 95 ++++++++----------- .../inferencepool-with-model-hermetic.yaml | 63 ------------ 3 files changed, 51 insertions(+), 117 deletions(-) delete mode 100644 test/testdata/inferencepool-with-model-hermetic.yaml diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index c4018631..ed57d01f 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -129,6 +129,11 @@ func (m *InferenceModelWrapper) ModelName(modelName string) *InferenceModelWrapp return m } +func (m *InferenceModelWrapper) TargetModel(modelName string) *InferenceModelWrapper { + m.Spec.TargetModels = append(m.Spec.TargetModels, v1alpha2.TargetModel{Name: modelName}) + return m +} + func (m *InferenceModelWrapper) PoolName(poolName string) *InferenceModelWrapper { m.Spec.PoolRef = v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolName)} return m @@ -187,6 +192,11 @@ func (m *InferencePoolWrapper) TargetPortNumber(p int32) *InferencePoolWrapper { return m } +func (m *InferencePoolWrapper) ExtensionRef(name string) *InferencePoolWrapper { + m.Spec.ExtensionRef = &v1alpha2.Extension{ExtensionReference: v1alpha2.ExtensionReference{Name: v1alpha2.ObjectName(name)}} + return m +} + // Obj returns the wrapped InferencePool. func (m *InferencePoolWrapper) ObjRef() *v1alpha2.InferencePool { return &m.InferencePool diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 7dc9bdb8..2962655e 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -18,10 +18,7 @@ limitations under the License. package epp import ( - "bufio" - "bytes" "context" - "errors" "fmt" "io" "net" @@ -48,7 +45,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" - k8syaml "k8s.io/apimachinery/pkg/util/yaml" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/component-base/metrics/legacyregistry" metricsutils "k8s.io/component-base/metrics/testutil" @@ -67,7 +63,6 @@ import ( runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" - "sigs.k8s.io/yaml" ) const ( @@ -1545,35 +1540,50 @@ func BeforeSuite() func() { logger.Info("Setting up hermetic ExtProc server") - // Unmarshal CRDs from file into structs - manifestsPath := filepath.Join("..", "..", "testdata", "inferencepool-with-model-hermetic.yaml") - docs, err := readDocuments(manifestsPath) - if err != nil { - logutil.Fatal(logger, err, "Can't read object manifests", "path", manifestsPath) + ns := "default" + pool := utiltesting.MakeInferencePool("vllm-llama2-7b-pool"). + Namespace(ns). + TargetPortNumber(8000). + Selector(map[string]string{"app": "vllm-llama2-7b-pool"}). + ExtensionRef("epp"). + ObjRef() + if err := k8sClient.Create(context.Background(), pool); err != nil { + logutil.Fatal(logger, err, "Unable to create inferencePool", "pool", pool.Name) } - for _, doc := range docs { - inferenceModel := &v1alpha2.InferenceModel{} - if err = yaml.Unmarshal(doc, inferenceModel); err != nil { - logutil.Fatal(logger, err, "Can't unmarshal object", "document", doc) - } - if inferenceModel.Kind == "InferenceModel" { - logger.Info("Creating inference model", "model", inferenceModel) - if err := k8sClient.Create(context.Background(), inferenceModel); err != nil { - logutil.Fatal(logger, err, "Unable to create inferenceModel", "modelName", inferenceModel.Name) - } - } + models := []*v1alpha2.InferenceModel{ + utiltesting.MakeInferenceModel("sample"). + Namespace(ns). + ModelName("sql-lora"). + Criticality(v1alpha2.Critical). + PoolName(pool.Name). + TargetModel("sql-lora-1fdg2"). + ObjRef(), + utiltesting.MakeInferenceModel("sheddable"). + Namespace(ns). + ModelName("sql-lora-sheddable"). + Criticality(v1alpha2.Sheddable). + PoolName(pool.Name). + TargetModel("sql-lora-1fdg3"). + ObjRef(), + utiltesting.MakeInferenceModel("generic"). + Namespace(ns). + ModelName("my-model"). + Criticality(v1alpha2.Critical). + PoolName(pool.Name). + TargetModel("my-model-12345"). + ObjRef(), + utiltesting.MakeInferenceModel("direct-model"). + Namespace(ns). + ModelName("direct-model"). + Criticality(v1alpha2.Critical). + PoolName(pool.Name). + ObjRef(), } - for _, doc := range docs { - inferencePool := &v1alpha2.InferencePool{} - if err = yaml.Unmarshal(doc, inferencePool); err != nil { - logutil.Fatal(logger, err, "Can't unmarshal object", "document", doc) - } - if inferencePool.Kind == "InferencePool" { - logger.Info("Creating inference pool", "pool", inferencePool) - if err := k8sClient.Create(context.Background(), inferencePool); err != nil { - logutil.Fatal(logger, err, "Unable to create inferencePool", "poolName", inferencePool.Name) - } + for i := range models { + logger.Info("Creating inference model", "model", models[i]) + if err := k8sClient.Create(context.Background(), models[i]); err != nil { + logutil.Fatal(logger, err, "Unable to create inferenceModel", "modelName", models[i].Name) } } @@ -1644,29 +1654,6 @@ func streamedRequest(t *testing.T, client extProcPb.ExternalProcessor_ProcessCli return responses, nil } -// readDocuments reads documents from file. -func readDocuments(fp string) ([][]byte, error) { - b, err := os.ReadFile(fp) - if err != nil { - return nil, err - } - - docs := [][]byte{} - reader := k8syaml.NewYAMLReader(bufio.NewReader(bytes.NewReader(b))) - for { - // Read document - doc, err := reader.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return nil, err - } - docs = append(docs, doc) - } - return docs, nil -} - func makeMetadata(endpoint string) *structpb.Struct { return &structpb.Struct{ Fields: map[string]*structpb.Value{ diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml deleted file mode 100644 index 36b6e539..00000000 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferencePool -metadata: - name: vllm-llama2-7b-pool - namespace: default -spec: - targetPortNumber: 8000 - selector: - app: vllm-llama2-7b-pool - extensionRef: - name: epp ---- -apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel -metadata: - name: inferencemodel-sample - namespace: default -spec: - modelName: sql-lora - criticality: Critical - poolRef: - name: vllm-llama2-7b-pool - targetModels: - - name: sql-lora-1fdg2 - weight: 100 ---- -apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel -metadata: - name: inferencemodel-sheddable - namespace: default -spec: - modelName: sql-lora-sheddable - poolRef: - name: vllm-llama2-7b-pool - targetModels: - - name: sql-lora-1fdg3 - weight: 100 ---- -apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel -metadata: - name: inferencemodel-generic - namespace: default -spec: - modelName: my-model - criticality: Critical - poolRef: - name: vllm-llama2-7b-pool - targetModels: - - name: my-model-12345 - weight: 100 ---- -apiVersion: inference.networking.x-k8s.io/v1alpha2 -kind: InferenceModel -metadata: - name: inferencemodel-direct-model-name - namespace: default -spec: - modelName: direct-model - criticality: Critical - poolRef: - name: vllm-llama2-7b-pool \ No newline at end of file