forked from kubernetes-sigs/gateway-api-inference-extension
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.go
97 lines (82 loc) · 2.65 KB
/
benchmark.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package main
import (
"flag"
"fmt"
"os"
"time"
"github.com/bojand/ghz/printer"
"github.com/bojand/ghz/runner"
"github.com/jhump/protoreflect/desc"
"google.golang.org/protobuf/proto"
klog "k8s.io/klog/v2"
"ext-proc/backend"
"ext-proc/test"
)
var (
svrAddr = flag.String("server_address", "localhost:9002", "Address of the ext proc server")
totalRequests = flag.Int("total_requests", 100000, "number of requests to be sent for load test")
// Flags when running a local ext proc server.
numFakePods = flag.Int("num_fake_pods", 200, "number of fake pods when running a local ext proc server")
numModelsPerPod = flag.Int("num_models_per_pod", 5, "number of fake models per pod when running a local ext proc server")
localServer = flag.Bool("local_server", true, "whether to start a local ext proc server")
refreshPodsInterval = flag.Duration("refreshPodsInterval", 10*time.Second, "interval to refresh pods")
refreshMetricsInterval = flag.Duration("refreshMetricsInterval", 50*time.Millisecond, "interval to refresh metrics")
)
const (
port = 9002
)
func main() {
klog.InitFlags(nil)
flag.Parse()
if *localServer {
test.StartExtProc(port, *refreshPodsInterval, *refreshMetricsInterval, fakePods())
time.Sleep(time.Second) // wait until server is up
klog.Info("Server started")
}
report, err := runner.Run(
"envoy.service.ext_proc.v3.ExternalProcessor.Process",
*svrAddr,
runner.WithInsecure(true),
runner.WithBinaryDataFunc(generateRequest),
runner.WithTotalRequests(uint(*totalRequests)),
)
if err != nil {
klog.Fatal(err)
}
printer := printer.ReportPrinter{
Out: os.Stdout,
Report: report,
}
printer.Print("summary")
}
func generateRequest(mtd *desc.MethodDescriptor, callData *runner.CallData) []byte {
numModels := *numFakePods * (*numModelsPerPod)
req := test.GenerateRequest(modelName(int(callData.RequestNumber) % numModels))
data, err := proto.Marshal(req)
if err != nil {
klog.Fatal("marshaling error: ", err)
}
return data
}
func fakePods() []*backend.PodMetrics {
pms := make([]*backend.PodMetrics, 0, *numFakePods)
for i := 0; i < *numFakePods; i++ {
metrics := fakeMetrics(i)
pod := test.FakePod(i)
pms = append(pms, &backend.PodMetrics{Pod: pod, Metrics: metrics})
}
return pms
}
// fakeMetrics adds numModelsPerPod number of adapters to the pod metrics.
func fakeMetrics(podNumber int) backend.Metrics {
metrics := backend.Metrics{
ActiveModels: make(map[string]int),
}
for i := 0; i < *numModelsPerPod; i++ {
metrics.ActiveModels[modelName(podNumber*(*numModelsPerPod)+i)] = 0
}
return metrics
}
func modelName(i int) string {
return fmt.Sprintf("adapter-%v", i)
}