Skip to content

Create inference model/pool objects in memory instead of reading them files #505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/epp/util/testing/wrappers.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ func (m *InferenceModelWrapper) ModelName(modelName string) *InferenceModelWrapp
return m
}

func (m *InferenceModelWrapper) TargetModel(modelName string) *InferenceModelWrapper {
m.Spec.TargetModels = append(m.Spec.TargetModels, v1alpha2.TargetModel{Name: modelName})
return m
}

func (m *InferenceModelWrapper) PoolName(poolName string) *InferenceModelWrapper {
m.Spec.PoolRef = v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolName)}
return m
Expand Down Expand Up @@ -187,6 +192,11 @@ func (m *InferencePoolWrapper) TargetPortNumber(p int32) *InferencePoolWrapper {
return m
}

func (m *InferencePoolWrapper) ExtensionRef(name string) *InferencePoolWrapper {
m.Spec.ExtensionRef = &v1alpha2.Extension{ExtensionReference: v1alpha2.ExtensionReference{Name: v1alpha2.ObjectName(name)}}
return m
}

// Obj returns the wrapped InferencePool.
func (m *InferencePoolWrapper) ObjRef() *v1alpha2.InferencePool {
return &m.InferencePool
Expand Down
95 changes: 41 additions & 54 deletions test/integration/epp/hermetic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ limitations under the License.
package epp

import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"io"
"net"
Expand All @@ -48,7 +45,6 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/component-base/metrics/legacyregistry"
metricsutils "k8s.io/component-base/metrics/testutil"
Expand All @@ -67,7 +63,6 @@ import (
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
"sigs.k8s.io/yaml"
)

const (
Expand Down Expand Up @@ -1545,35 +1540,50 @@ func BeforeSuite() func() {

logger.Info("Setting up hermetic ExtProc server")

// Unmarshal CRDs from file into structs
manifestsPath := filepath.Join("..", "..", "testdata", "inferencepool-with-model-hermetic.yaml")
docs, err := readDocuments(manifestsPath)
if err != nil {
logutil.Fatal(logger, err, "Can't read object manifests", "path", manifestsPath)
ns := "default"
pool := utiltesting.MakeInferencePool("vllm-llama2-7b-pool").
Namespace(ns).
TargetPortNumber(8000).
Selector(map[string]string{"app": "vllm-llama2-7b-pool"}).
ExtensionRef("epp").
ObjRef()
if err := k8sClient.Create(context.Background(), pool); err != nil {
logutil.Fatal(logger, err, "Unable to create inferencePool", "pool", pool.Name)
}

for _, doc := range docs {
inferenceModel := &v1alpha2.InferenceModel{}
if err = yaml.Unmarshal(doc, inferenceModel); err != nil {
logutil.Fatal(logger, err, "Can't unmarshal object", "document", doc)
}
if inferenceModel.Kind == "InferenceModel" {
logger.Info("Creating inference model", "model", inferenceModel)
if err := k8sClient.Create(context.Background(), inferenceModel); err != nil {
logutil.Fatal(logger, err, "Unable to create inferenceModel", "modelName", inferenceModel.Name)
}
}
models := []*v1alpha2.InferenceModel{
utiltesting.MakeInferenceModel("sample").
Namespace(ns).
ModelName("sql-lora").
Criticality(v1alpha2.Critical).
PoolName(pool.Name).
TargetModel("sql-lora-1fdg2").
ObjRef(),
utiltesting.MakeInferenceModel("sheddable").
Namespace(ns).
ModelName("sql-lora-sheddable").
Criticality(v1alpha2.Sheddable).
PoolName(pool.Name).
TargetModel("sql-lora-1fdg3").
ObjRef(),
utiltesting.MakeInferenceModel("generic").
Namespace(ns).
ModelName("my-model").
Criticality(v1alpha2.Critical).
PoolName(pool.Name).
TargetModel("my-model-12345").
ObjRef(),
utiltesting.MakeInferenceModel("direct-model").
Namespace(ns).
ModelName("direct-model").
Criticality(v1alpha2.Critical).
PoolName(pool.Name).
ObjRef(),
}
for _, doc := range docs {
inferencePool := &v1alpha2.InferencePool{}
if err = yaml.Unmarshal(doc, inferencePool); err != nil {
logutil.Fatal(logger, err, "Can't unmarshal object", "document", doc)
}
if inferencePool.Kind == "InferencePool" {
logger.Info("Creating inference pool", "pool", inferencePool)
if err := k8sClient.Create(context.Background(), inferencePool); err != nil {
logutil.Fatal(logger, err, "Unable to create inferencePool", "poolName", inferencePool.Name)
}
for i := range models {
logger.Info("Creating inference model", "model", models[i])
if err := k8sClient.Create(context.Background(), models[i]); err != nil {
logutil.Fatal(logger, err, "Unable to create inferenceModel", "modelName", models[i].Name)
}
}

Expand Down Expand Up @@ -1644,29 +1654,6 @@ func streamedRequest(t *testing.T, client extProcPb.ExternalProcessor_ProcessCli
return responses, nil
}

// readDocuments reads documents from file.
func readDocuments(fp string) ([][]byte, error) {
b, err := os.ReadFile(fp)
if err != nil {
return nil, err
}

docs := [][]byte{}
reader := k8syaml.NewYAMLReader(bufio.NewReader(bytes.NewReader(b)))
for {
// Read document
doc, err := reader.Read()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return nil, err
}
docs = append(docs, doc)
}
return docs, nil
}

func makeMetadata(endpoint string) *structpb.Struct {
return &structpb.Struct{
Fields: map[string]*structpb.Value{
Expand Down
63 changes: 0 additions & 63 deletions test/testdata/inferencepool-with-model-hermetic.yaml

This file was deleted.