forked from kubernetes-sigs/gateway-api-inference-extension
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.go
90 lines (76 loc) · 2.45 KB
/
utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package test
import (
"encoding/json"
"fmt"
"net"
"sync"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/reflection"
klog "k8s.io/klog/v2"
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/backend"
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/handlers"
"inference.networking.x-k8s.io/llm-instance-gateway/pkg/ext-proc/scheduling"
)
func StartExtProc(port int, refreshPodsInterval, refreshMetricsInterval time.Duration, pods []*backend.PodMetrics, models map[string]*v1alpha1.Model) *grpc.Server {
ps := make(backend.PodSet)
pms := make(map[backend.Pod]*backend.PodMetrics)
for _, pod := range pods {
ps[pod.Pod] = true
pms[pod.Pod] = pod
}
pmc := &backend.FakePodMetricsClient{Res: pms}
pp := backend.NewProvider(pmc, &backend.K8sDatastore{Pods: populatePodDatastore(pods)})
if err := pp.Init(refreshPodsInterval, refreshMetricsInterval); err != nil {
klog.Fatalf("failed to initialize: %v", err)
}
return startExtProc(port, pp, models)
}
// startExtProc starts an extProc server with fake pods.
func startExtProc(port int, pp *backend.Provider, models map[string]*v1alpha1.Model) *grpc.Server {
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
klog.Fatalf("failed to listen: %v", err)
}
s := grpc.NewServer()
extProcPb.RegisterExternalProcessorServer(s, handlers.NewServer(pp, scheduling.NewScheduler(pp), "target-pod", &backend.FakeDataStore{Res: models}))
klog.Infof("Starting gRPC server on port :%v", port)
reflection.Register(s)
go s.Serve(lis)
return s
}
func GenerateRequest(model string) *extProcPb.ProcessingRequest {
j := map[string]interface{}{
"model": model,
"prompt": "hello",
"max_tokens": 100,
"temperature": 0,
}
llmReq, err := json.Marshal(j)
if err != nil {
klog.Fatal(err)
}
req := &extProcPb.ProcessingRequest{
Request: &extProcPb.ProcessingRequest_RequestBody{
RequestBody: &extProcPb.HttpBody{Body: llmReq},
},
}
return req
}
func FakePod(index int) backend.Pod {
address := fmt.Sprintf("address-%v", index)
pod := backend.Pod{
Name: fmt.Sprintf("pod-%v", index),
Address: address,
}
return pod
}
func populatePodDatastore(pods []*backend.PodMetrics) *sync.Map {
returnVal := &sync.Map{}
for _, pod := range pods {
returnVal.Store(pod.Pod, true)
}
return returnVal
}