@@ -31,11 +31,42 @@ import (
31
31
"google.golang.org/protobuf/types/known/structpb"
32
32
"k8s.io/component-base/metrics/legacyregistry"
33
33
metricsutils "k8s.io/component-base/metrics/testutil"
34
+ "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
34
35
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
35
36
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
36
37
utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
37
38
)
38
39
40
+ var models = []* v1alpha2.InferenceModel {
41
+ utiltesting .MakeInferenceModel ("sample" ).
42
+ Namespace (pool .Namespace ).
43
+ ModelName ("sql-lora" ).
44
+ Criticality (v1alpha2 .Critical ).
45
+ PoolName (pool .Name ).
46
+ TargetModel ("sql-lora-1fdg2" ).
47
+ ObjRef (),
48
+ utiltesting .MakeInferenceModel ("sheddable" ).
49
+ Namespace (pool .Namespace ).
50
+ ModelName ("sql-lora-sheddable" ).
51
+ Criticality (v1alpha2 .Sheddable ).
52
+ PoolName (pool .Name ).
53
+ TargetModel ("sql-lora-1fdg3" ).
54
+ ObjRef (),
55
+ utiltesting .MakeInferenceModel ("generic" ).
56
+ Namespace (pool .Namespace ).
57
+ ModelName ("my-model" ).
58
+ Criticality (v1alpha2 .Critical ).
59
+ PoolName (pool .Name ).
60
+ TargetModel ("my-model-12345" ).
61
+ ObjRef (),
62
+ utiltesting .MakeInferenceModel ("direct-model" ).
63
+ Namespace (pool .Namespace ).
64
+ ModelName ("direct-model" ).
65
+ Criticality (v1alpha2 .Critical ).
66
+ PoolName (pool .Name ).
67
+ ObjRef (),
68
+ }
69
+
39
70
func TestMain (m * testing.M ) {
40
71
cleanup := BeforeSuite ()
41
72
code := m .Run ()
@@ -304,7 +335,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
304
335
}
305
336
for _ , test := range tests {
306
337
t .Run (test .name , func (t * testing.T ) {
307
- client , cleanup := setUpHermeticServer (t , test .pods , false )
338
+ client , cleanup := startEPPServer (t , & eppOptions { podMetrics : test .pods , models : models } )
308
339
t .Cleanup (cleanup )
309
340
want := & extProcPb.ProcessingResponse {
310
341
Response : & extProcPb.ProcessingResponse_RequestBody {
@@ -1336,7 +1367,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
1336
1367
1337
1368
for _ , test := range tests {
1338
1369
t .Run (test .name , func (t * testing.T ) {
1339
- client , cleanup := setUpHermeticServer (t , test .pods , true )
1370
+ client , cleanup := startEPPServer (t , & eppOptions { podMetrics : test .pods , models : models , streamed : true } )
1340
1371
t .Cleanup (cleanup )
1341
1372
responses , err := streamedRequest (t , client , test .requests , len (test .wantResponses ))
1342
1373
0 commit comments