diff --git a/pkg/ext-proc/Dockerfile b/pkg/ext-proc/Dockerfile new file mode 100644 index 00000000..3a35c1cc --- /dev/null +++ b/pkg/ext-proc/Dockerfile @@ -0,0 +1,19 @@ +## Multistage build +FROM golang:1.22.5-alpine as build +ENV CGO_ENABLED=0 +ENV GOOS=linux +ENV GOARCH=amd64 + +WORKDIR /src +COPY . . +RUN go mod download +RUN go build -o /ext-proc +FROM alpine:latest +## Multistage deploy +FROM gcr.io/distroless/base-debian10 +# Install bash + +WORKDIR / +COPY --from=build /ext-proc /ext-proc + +ENTRYPOINT ["/ext-proc"] diff --git a/pkg/ext-proc/backend/fake.go b/pkg/ext-proc/backend/fake.go new file mode 100644 index 00000000..61d4b619 --- /dev/null +++ b/pkg/ext-proc/backend/fake.go @@ -0,0 +1,26 @@ +package backend + +import ( + dto "github.com/prometheus/client_model/go" +) + +type FakePodLister struct { + Err error + Pods PodSet +} + +type FakePodMetricsClient struct { + Err map[Pod]error + Res map[Pod]map[string]*dto.MetricFamily +} + +func (f *FakePodMetricsClient) FetchMetrics(pod Pod) (map[string]*dto.MetricFamily, error) { + if err, ok := f.Err[pod]; ok { + return nil, err + } + return f.Res[pod], nil +} + +func (fpl *FakePodLister) List() (PodSet, error) { + return fpl.Pods, fpl.Err +} diff --git a/pkg/ext-proc/backend/metrics.go b/pkg/ext-proc/backend/metrics.go new file mode 100644 index 00000000..7d7b5470 --- /dev/null +++ b/pkg/ext-proc/backend/metrics.go @@ -0,0 +1,135 @@ +package backend + +import ( + "fmt" + "strings" + "sync" + "time" + + dto "github.com/prometheus/client_model/go" + "go.uber.org/multierr" + klog "k8s.io/klog/v2" +) + +const ( + ActiveLoRAAdaptersMetricName = "vllm:info_active_adapters_info" + LoRAAdapterPendingRequestMetricName = "vllm:active_lora_adapters" + // TODO: Replace these with the num_tokens_running/waiting below once we add those to the fork. + RunningQueueSizeMetricName = "vllm:num_requests_running" + WaitingQueueSizeMetricName = "vllm:num_requests_waiting" + /* TODO: Uncomment this once the following are added to the fork. + RunningQueueSizeMetricName = "vllm:num_tokens_running" + WaitingQueueSizeMetricName = "vllm:num_tokens_waiting" + */ + KVCacheUsagePercentMetricName = "vllm:gpu_cache_usage_perc" + KvCacheMaxTokenCapacityMetricName = "vllm:gpu_cache_max_token_capacity" +) + +func (p *Provider) refreshMetricsOnce() error { + start := time.Now() + defer func() { + d := time.Now().Sub(start) + // TODO: add a metric instead of logging + klog.V(3).Infof("Refreshed metrics in %v", d) + }() + var wg sync.WaitGroup + var errs error + processOnePod := func(key, value any) bool { + pod := key.(Pod) + metrics := value.(*PodMetrics) + wg.Add(1) + go func() { + defer wg.Done() + metricFamilies, err := p.pmc.FetchMetrics(pod) + if err != nil { + multierr.Append(errs, fmt.Errorf("failed to parse metrics from %s: %v", pod, err)) + return + } + updated, err := promToPodMetrics(metricFamilies, metrics) + klog.V(3).Infof("Updated metrics for pod %s: %v", pod, updated.Metrics) + if err != nil { + multierr.Append(errs, fmt.Errorf("failed to get all pod metrics updated from prometheus: %v", err)) + } + p.UpdatePodMetrics(pod, updated) + }() + return true + } + p.podMetrics.Range(processOnePod) + wg.Wait() + return errs +} + +// promToPodMetrics updates internal pod metrics with scraped prometheus metrics. +// A combined error is returned if errors occur in one or more metric processing. +// it returns a new PodMetrics pointer which can be used to atomically update the pod metrics map. +func promToPodMetrics(metricFamilies map[string]*dto.MetricFamily, existing *PodMetrics) (*PodMetrics, error) { + var errs error + updated := existing.Clone() + runningQueueSize, _, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName) + multierr.Append(errs, err) + if err != nil { + updated.RunningQueueSize = int(runningQueueSize.GetCounter().GetValue()) + } + waitingQueueSize, _, err := getLatestMetric(metricFamilies, WaitingQueueSizeMetricName) + multierr.Append(errs, err) + if err != nil { + updated.WaitingQueueSize = int(waitingQueueSize.GetGauge().GetValue()) + } + cachePercent, _, err := getLatestMetric(metricFamilies, KVCacheUsagePercentMetricName) + multierr.Append(errs, err) + if err != nil { + updated.KVCacheUsagePercent = cachePercent.GetGauge().GetValue() + } + /* TODO: uncomment once this is available in vllm. + kvCap, _, err := getGaugeLatestValue(metricFamilies, KvCacheMaxTokenCapacityMetricName) + multierr.Append(errs, err) + if err != nil { + updated.KvCacheMaxTokenCapacity = int(kvCap) + } + */ + + // Update active loras + mf, ok := metricFamilies[ActiveLoRAAdaptersMetricName] + if ok { + // IMPORTANT: replace the map entries instead of appending to it. + updated.CachedModels = make(map[string]int) + for _, metric := range mf.GetMetric() { + for _, label := range metric.GetLabel() { + if label.GetName() == "active_adapters" { + if label.GetValue() != "" { + adapterList := strings.Split(label.GetValue(), ",") + for _, adapter := range adapterList { + updated.CachedModels[adapter] = 0 + } + } + } + } + } + } else { + klog.Warningf("metric family %q not found", ActiveLoRAAdaptersMetricName) + multierr.Append(errs, fmt.Errorf("metric family %q not found", ActiveLoRAAdaptersMetricName)) + } + + return updated, errs +} + +// getLatestMetric gets the latest metric of a family. This should be used to get the latest Gauge metric. +func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName string) (*dto.Metric, time.Time, error) { + mf, ok := metricFamilies[metricName] + if !ok { + klog.Warningf("metric family %q not found", metricName) + return nil, time.Time{}, fmt.Errorf("metric family %q not found", metricName) + } + if len(mf.GetMetric()) == 0 { + return nil, time.Time{}, fmt.Errorf("no metrics available for %q", metricName) + } + var latestTs int64 + var latest *dto.Metric + for _, m := range mf.GetMetric() { + if m.GetTimestampMs() > latestTs { + latestTs = m.GetTimestampMs() + latest = m + } + } + return latest, time.Unix(0, latestTs*1000), nil +} diff --git a/pkg/ext-proc/backend/pod_client.go b/pkg/ext-proc/backend/pod_client.go new file mode 100644 index 00000000..ee36a2c6 --- /dev/null +++ b/pkg/ext-proc/backend/pod_client.go @@ -0,0 +1,34 @@ +package backend + +import ( + "fmt" + "net/http" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + klog "k8s.io/klog/v2" +) + +type PodMetricsClientImpl struct { +} + +// FetchMetrics fetches metrics from a given pod. +func (p *PodMetricsClientImpl) FetchMetrics(pod Pod) (map[string]*dto.MetricFamily, error) { + // Currently the metrics endpoint is hard-coded, which works with vLLM. + // TODO(https://github.com/kubernetes-sigs/llm-instance-gateway/issues/16): Consume this from LLMServerPool config. + url := fmt.Sprintf("http://%s/metrics", pod.Address) + resp, err := http.Get(url) + if err != nil { + klog.Errorf("failed to fetch metrics from %s: %v", pod, err) + return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + klog.Errorf("unexpected status code from %s: %v", pod, resp.StatusCode) + return nil, fmt.Errorf("unexpected status code from %s: %v", pod, resp.StatusCode) + } + + parser := expfmt.TextParser{} + return parser.TextToMetricFamilies(resp.Body) +} diff --git a/pkg/ext-proc/backend/provider.go b/pkg/ext-proc/backend/provider.go new file mode 100644 index 00000000..60fb627c --- /dev/null +++ b/pkg/ext-proc/backend/provider.go @@ -0,0 +1,122 @@ +package backend + +import ( + "fmt" + "sync" + "time" + + dto "github.com/prometheus/client_model/go" + klog "k8s.io/klog/v2" +) + +func NewProvider(pmc PodMetricsClient, pl PodLister) *Provider { + p := &Provider{ + podMetrics: sync.Map{}, + pmc: pmc, + pl: pl, + } + return p +} + +// Provider provides backend pods and information such as metrics. +type Provider struct { + // key: Pod, value: *PodMetrics + podMetrics sync.Map + pmc PodMetricsClient + pl PodLister +} + +type PodMetricsClient interface { + FetchMetrics(pod Pod) (map[string]*dto.MetricFamily, error) +} + +type PodLister interface { + List() (PodSet, error) +} + +func (p *Provider) AllPodMetrics() []*PodMetrics { + res := []*PodMetrics{} + fn := func(k, v any) bool { + res = append(res, v.(*PodMetrics)) + return true + } + p.podMetrics.Range(fn) + return res +} + +func (p *Provider) UpdatePodMetrics(pod Pod, pm *PodMetrics) { + p.podMetrics.Store(pod, pm) +} + +func (p *Provider) GetPodMetrics(pod Pod) (*PodMetrics, bool) { + val, ok := p.podMetrics.Load(pod) + if ok { + return val.(*PodMetrics), true + } + return nil, false +} + +func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duration) error { + if err := p.refreshPodsOnce(); err != nil { + return fmt.Errorf("failed to init pods: %v", err) + } + if err := p.refreshMetricsOnce(); err != nil { + return fmt.Errorf("failed to init metrics: %v", err) + } + + klog.V(2).Infof("Initialized pods and metrics: %+v", p.AllPodMetrics()) + + // periodically refresh pods + go func() { + for { + time.Sleep(refreshPodsInterval) + if err := p.refreshPodsOnce(); err != nil { + klog.V(1).Infof("Failed to refresh podslist pods: %v", err) + } + } + }() + + // periodically refresh metrics + go func() { + for { + time.Sleep(refreshMetricsInterval) + if err := p.refreshMetricsOnce(); err != nil { + klog.V(1).Infof("Failed to refresh metrics: %v", err) + } + } + }() + + return nil +} + +// refreshPodsOnce lists pods and updates keys in the podMetrics map. +// Note this function doesn't update the PodMetrics value, it's done separately. +func (p *Provider) refreshPodsOnce() error { + pods, err := p.pl.List() + if err != nil { + return err + } + // merge new pods with cached ones. + // add new pod to the map + for pod := range pods { + if _, ok := p.podMetrics.Load(pod); !ok { + new := &PodMetrics{ + Pod: pod, + Metrics: Metrics{ + CachedModels: make(map[string]int), + }, + } + p.podMetrics.Store(pod, new) + } + } + // remove pods that don't exist any more. + mergeFn := func(k, v any) bool { + pod := k.(Pod) + if _, ok := pods[pod]; !ok { + p.podMetrics.Delete(pod) + } + return true + } + p.podMetrics.Range(mergeFn) + return nil +} diff --git a/pkg/ext-proc/backend/types.go b/pkg/ext-proc/backend/types.go new file mode 100644 index 00000000..7d5af51a --- /dev/null +++ b/pkg/ext-proc/backend/types.go @@ -0,0 +1,52 @@ +// Package backend is a library to interact with backend model servers such as probing metrics. +package backend + +import "fmt" + +type PodSet map[Pod]bool + +type Pod struct { + Namespace string + Name string + Address string +} + +func (p Pod) String() string { + return p.Namespace + "." + p.Name +} + +type Metrics struct { + // CachedModels is a set of models(including LoRA adapters) that are currently cached to GPU. + CachedModels map[string]int + RunningQueueSize int + WaitingQueueSize int + KVCacheUsagePercent float64 + KvCacheMaxTokenCapacity int +} + +type PodMetrics struct { + Pod + Metrics +} + +func (pm *PodMetrics) String() string { + return fmt.Sprintf("Pod: %+v; Metrics: %+v", pm.Pod, pm.Metrics) +} + +func (pm *PodMetrics) Clone() *PodMetrics { + cm := make(map[string]int, len(pm.CachedModels)) + for k, v := range pm.CachedModels { + cm[k] = v + } + clone := &PodMetrics{ + Pod: pm.Pod, + Metrics: Metrics{ + CachedModels: cm, + RunningQueueSize: pm.RunningQueueSize, + WaitingQueueSize: pm.WaitingQueueSize, + KVCacheUsagePercent: pm.KVCacheUsagePercent, + KvCacheMaxTokenCapacity: pm.KvCacheMaxTokenCapacity, + }, + } + return clone +} diff --git a/pkg/ext-proc/benchmark/benchmark.go b/pkg/ext-proc/benchmark/benchmark.go new file mode 100644 index 00000000..fe617345 --- /dev/null +++ b/pkg/ext-proc/benchmark/benchmark.go @@ -0,0 +1,212 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "net" + "os" + "time" + + "github.com/bojand/ghz/printer" + "github.com/bojand/ghz/runner" + extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + "github.com/jhump/protoreflect/desc" + dto "github.com/prometheus/client_model/go" + "google.golang.org/grpc" + "google.golang.org/grpc/reflection" + "google.golang.org/protobuf/proto" + klog "k8s.io/klog/v2" + + "ext-proc/backend" + "ext-proc/handlers" + "ext-proc/scheduling" +) + +var ( + svrAddr = flag.String("server_address", "localhost:9002", "Address of the ext proc server") + totalRequests = flag.Int("total_requests", 100000, "number of requests to be sent for load test") + targetPodHeader = flag.String("targetPodHeader", "target-pod", "the header key for the target pod address to instruct Envoy to send the request to. This must match Envoy configuration.") + + // Flags when running a local ext proc server. + numFakePods = flag.Int("num_fake_pods", 200, "number of fake pods when running a local ext proc server") + numModelsPerPod = flag.Int("num_models_per_pod", 5, "number of fake models per pod when running a local ext proc server") + localServer = flag.Bool("local_server", true, "whether to start a local ext proc server") + refreshPodsInterval = flag.Duration("refreshPodsInterval", 10*time.Second, "interval to refresh pods") + refreshMetricsInterval = flag.Duration("refreshMetricsInterval", 50*time.Millisecond, "interval to refresh metrics") +) + +const ( + port = 9002 +) + +func main() { + klog.InitFlags(nil) + flag.Parse() + + if *localServer { + go startExtProc() + time.Sleep(time.Second) // wait until server is up + klog.Info("Server started") + } + + report, err := runner.Run( + "envoy.service.ext_proc.v3.ExternalProcessor.Process", + *svrAddr, + runner.WithInsecure(true), + runner.WithBinaryDataFunc(generateRequest), + runner.WithTotalRequests(uint(*totalRequests)), + ) + if err != nil { + klog.Fatal(err) + } + + printer := printer.ReportPrinter{ + Out: os.Stdout, + Report: report, + } + + printer.Print("summary") +} + +func generateRequest(mtd *desc.MethodDescriptor, callData *runner.CallData) []byte { + numModels := *numFakePods * (*numModelsPerPod) + j := map[string]interface{}{ + "model": modelName(int(callData.RequestNumber) % numModels), + "prompt": "Write as if you were a critic: San Francisco", + "max_tokens": 100, + "temperature": 0, + } + + llmReq, err := json.Marshal(j) + if err != nil { + klog.Fatal(err) + } + req := &extProcPb.ProcessingRequest{ + Request: &extProcPb.ProcessingRequest_RequestBody{ + RequestBody: &extProcPb.HttpBody{Body: llmReq}, + }, + } + data, err := proto.Marshal(req) + if err != nil { + klog.Fatal("marshaling error: ", err) + } + return data +} + +// startExtProc starts an extProc server with fake pods. +func startExtProc() { + pods, fm := fakePods() + pmc := &backend.FakePodMetricsClient{Res: fm} + + lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) + if err != nil { + klog.Fatalf("failed to listen: %v", err) + } + + s := grpc.NewServer() + + pp := backend.NewProvider(pmc, &backend.FakePodLister{Pods: pods}) + if err := pp.Init(*refreshPodsInterval, *refreshMetricsInterval); err != nil { + klog.Fatalf("failed to initialize: %v", err) + } + extProcPb.RegisterExternalProcessorServer(s, handlers.NewServer(pp, scheduling.NewScheduler(pp), *targetPodHeader)) + + klog.Infof("Starting gRPC server on port :%v", port) + reflection.Register(s) + s.Serve(lis) +} + +func fakePods() (backend.PodSet, map[backend.Pod]map[string]*dto.MetricFamily) { + pods := make(backend.PodSet) + metrics := make(map[backend.Pod]map[string]*dto.MetricFamily, *numFakePods) + for i := 0; i < *numFakePods; i++ { + address := fmt.Sprintf("address-%v", i) + pod := backend.Pod{ + Namespace: "default", + Name: fmt.Sprintf("pod-%v", i), + Address: address, + } + pods[pod] = true + metrics[pod] = fakeMetrics(i) + } + + return pods, metrics +} + +// fakeMetrics adds numModelsPerPod number of adapters to the pod metrics. +func fakeMetrics(podNumber int) map[string]*dto.MetricFamily { + metrics := make(map[string]*dto.MetricFamily) + metrics["vllm:active_lora_adapters"] = &dto.MetricFamily{ + Metric: []*dto.Metric{}, + } + metrics["vllm:info_active_adapters_info"] = &dto.MetricFamily{ + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: ptrString("active_adapters"), + Value: ptrString(""), + }, + }, + }, + }, + } + for i := 0; i < *numModelsPerPod; i++ { + mn := modelName(podNumber*(*numModelsPerPod) + i) + one := &dto.Metric{ + Label: []*dto.LabelPair{ + { + Name: ptrString("active_lora_adapters"), + Value: ptrString(mn), + }, + }, + Gauge: &dto.Gauge{Value: ptrFloat64(0)}, + } + metrics["vllm:active_lora_adapters"].Metric = append(metrics["vllm:active_lora_adapters"].Metric, one) + + original := metrics["vllm:info_active_adapters_info"].Metric[0].Label[0].Value + metrics["vllm:info_active_adapters_info"].Metric[0].Label[0].Value = ptrString(*original + "," + mn) + } + metrics[backend.RunningQueueSizeMetricName] = &dto.MetricFamily{ + Metric: []*dto.Metric{ + { + Gauge: &dto.Gauge{Value: ptrFloat64(0)}, + }, + }, + } + metrics[backend.WaitingQueueSizeMetricName] = &dto.MetricFamily{ + Metric: []*dto.Metric{ + { + Gauge: &dto.Gauge{Value: ptrFloat64(0)}, + }, + }, + } + metrics[backend.KVCacheUsagePercentMetricName] = &dto.MetricFamily{ + Metric: []*dto.Metric{ + { + Gauge: &dto.Gauge{Value: ptrFloat64(0)}, + }, + }, + } + metrics[backend.KvCacheMaxTokenCapacityMetricName] = &dto.MetricFamily{ + Metric: []*dto.Metric{ + { + Gauge: &dto.Gauge{Value: ptrFloat64(0)}, + }, + }, + } + return metrics +} + +func modelName(i int) string { + return fmt.Sprintf("adapter-%v", i) +} + +func ptrString(s string) *string { + return &s +} + +func ptrFloat64(f float64) *float64 { + return &f +} diff --git a/pkg/ext-proc/go.mod b/pkg/ext-proc/go.mod new file mode 100644 index 00000000..7a1a1f88 --- /dev/null +++ b/pkg/ext-proc/go.mod @@ -0,0 +1,56 @@ +module ext-proc + +go 1.21 + +require ( + github.com/bojand/ghz v0.120.0 + github.com/envoyproxy/go-control-plane v0.13.0 + github.com/jhump/protoreflect v1.15.1 + github.com/prometheus/client_model v0.6.1 + github.com/prometheus/common v0.55.0 + go.uber.org/multierr v1.9.0 + google.golang.org/grpc v1.67.0 + google.golang.org/protobuf v1.34.2 + k8s.io/klog/v2 v2.130.1 +) + +require ( + cel.dev/expr v0.16.0 // indirect + cloud.google.com/go/compute/metadata v0.5.0 // indirect + github.com/BurntSushi/toml v1.1.0 // indirect + github.com/Masterminds/goutils v1.1.1 // indirect + github.com/Masterminds/semver/v3 v3.2.0 // indirect + github.com/Masterminds/sprig/v3 v3.2.3 // indirect + github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect + github.com/bufbuild/protocompile v0.4.0 // indirect + github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/huandu/xstrings v1.3.3 // indirect + github.com/imdario/mergo v0.3.11 // indirect + github.com/jinzhu/configor v1.2.1 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/mitchellh/copystructure v1.0.0 // indirect + github.com/mitchellh/reflectwalk v1.0.1 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect + github.com/shopspring/decimal v1.2.0 // indirect + github.com/spf13/cast v1.4.1 // indirect + go.uber.org/atomic v1.7.0 // indirect + golang.org/x/crypto v0.26.0 // indirect + golang.org/x/net v0.28.0 // indirect + golang.org/x/oauth2 v0.22.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.24.0 // indirect + golang.org/x/text v0.17.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/pkg/ext-proc/go.sum b/pkg/ext-proc/go.sum new file mode 100644 index 00000000..3e06181a --- /dev/null +++ b/pkg/ext-proc/go.sum @@ -0,0 +1,171 @@ +cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y= +cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg= +cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= +cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.1.0 h1:ksErzDEI1khOiGPgpwuI7x2ebx/uXQNw7xJpn9Eq1+I= +github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= +github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= +github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g= +github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= +github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/bojand/ghz v0.120.0 h1:6F4wsmZVwFg5UnD+/R+IABWk6sKE/0OKIBdUQUZnOdo= +github.com/bojand/ghz v0.120.0/go.mod h1:HfECuBZj1v02XObGnRuoZgyB1PR24/25dIYiJIMjJnE= +github.com/bufbuild/protocompile v0.4.0 h1:LbFKd2XowZvQ/kajzguUp2DC9UEIQhIq77fZZlaQsNA= +github.com/bufbuild/protocompile v0.4.0/go.mod h1:3v93+mbWn/v3xzN+31nwkJfrEpAUwp+BagBSZWx+TP8= +github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= +github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/envoyproxy/go-control-plane v0.13.0 h1:HzkeUz1Knt+3bK+8LG1bxOO/jzWZmdxpwC51i202les= +github.com/envoyproxy/go-control-plane v0.13.0/go.mod h1:GRaKG3dwvFoTg4nj7aXdZnvMg4d7nvT/wl9WgVXn3Q8= +github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= +github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= +github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= +github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA= +github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= +github.com/jhump/protoreflect v1.15.1 h1:HUMERORf3I3ZdX05WaQ6MIpd/NJ434hTp5YiKgfCL6c= +github.com/jhump/protoreflect v1.15.1/go.mod h1:jD/2GMKKE6OqX8qTjhADU1e6DShO+gavG9e0Q693nKo= +github.com/jinzhu/configor v1.2.1 h1:OKk9dsR8i6HPOCZR8BcMtcEImAFjIhbJFZNyn5GCZko= +github.com/jinzhu/configor v1.2.1/go.mod h1:nX89/MOmDba7ZX7GCyU/VIaQ2Ar2aizBl2d3JLF/rDc= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= +github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= +github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/mitchellh/reflectwalk v1.0.1 h1:FVzMWA5RllMAKIdUSC8mdWo3XtwoecrH79BY70sEEpE= +github.com/mitchellh/reflectwalk v1.0.1/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= +github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.4.1 h1:s0hze+J0196ZfEMTs80N7UlFt0BDuQ7Q+JDnHiMWKdA= +github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= +go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8= +google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/grpc v1.67.0 h1:IdH9y6PF5MPSdAntIcpjQ+tXO41pcQsfZV2RxtQgVcw= +google.golang.org/grpc v1.67.0/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= diff --git a/pkg/ext-proc/handlers/request.go b/pkg/ext-proc/handlers/request.go new file mode 100644 index 00000000..6fe06303 --- /dev/null +++ b/pkg/ext-proc/handlers/request.go @@ -0,0 +1,115 @@ +package handlers + +import ( + "encoding/json" + "fmt" + + configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + klog "k8s.io/klog/v2" + + "ext-proc/scheduling" +) + +// HandleRequestBody handles body of the request to the backend server, such as parsing the "model" +// parameter. +// Envoy sends the request body to ext proc before sending the request to the backend server. +func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) (*extProcPb.ProcessingResponse, error) { + klog.V(2).Infof("Handling request body") + + // Unmarshal request body (must be JSON). + v := req.Request.(*extProcPb.ProcessingRequest_RequestBody) + var rb map[string]interface{} + if err := json.Unmarshal(v.RequestBody.Body, &rb); err != nil { + klog.Errorf("Error unmarshaling request body: %v", err) + return nil, fmt.Errorf("error unmarshaling request body: %v", err) + } + klog.V(2).Infof("Request body: %v", rb) + + // Resolve target models. + model, ok := rb["model"].(string) + if !ok { + return nil, fmt.Errorf("model not found in request") + } + klog.V(2).Infof("Model requested: %v", model) + llmReq := &scheduling.LLMRequest{ + Model: model, + // For now use the model as the target model. + // TODO: Once the API is approved, read the "LLMUseCase" configuration and apply traffic split. + TargetModels: map[string]int{model: 100}, + ResolvedTargetModel: model, + } + + // Update target models in the body. + rb["model"] = llmReq.ResolvedTargetModel + updatedBody, err := json.Marshal(rb) + if err != nil { + klog.Errorf("Error marshaling request body: %v", err) + return nil, fmt.Errorf("error marshaling request body: %v", err) + } + klog.V(2).Infof("Updated body: %v", updatedBody) + + targetPod, err := s.scheduler.Schedule(llmReq) + if err != nil { + return nil, fmt.Errorf("failed to find target pod: %v", err) + } + klog.V(2).Infof("Selected target model %v in target pod: %v\n", llmReq.ResolvedTargetModel, targetPod) + + reqCtx.Model = llmReq.Model + reqCtx.TargetPod = targetPod + + // Insert "target-pod" to instruct Envoy to route requests to the specified target pod. + headers := []*configPb.HeaderValueOption{ + { + Header: &configPb.HeaderValue{ + Key: s.targetPodHeader, + RawValue: []byte(targetPod.Address), + }, + }, + } + // Print headers for debugging + for _, header := range headers { + klog.V(2).Infof("[request_body] Header Key: %s, Header Value: %s\n", header.Header.Key, header.Header.RawValue) + } + + resp := &extProcPb.ProcessingResponse{ + Response: &extProcPb.ProcessingResponse_RequestBody{ + RequestBody: &extProcPb.BodyResponse{ + Response: &extProcPb.CommonResponse{ + HeaderMutation: &extProcPb.HeaderMutation{ + SetHeaders: headers, + }, + // TODO: Enable body mutation + // BodyMutation: &extProcPb.BodyMutation{ + // Mutation: &extProcPb.BodyMutation_Body{ + // Body: updatedBody, + // }, + // }, + }, + }, + }, + } + return resp, nil +} + +func HandleRequestHeaders(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) *extProcPb.ProcessingResponse { + klog.V(2).Info("--- In RequestHeaders processing ...") + r := req.Request + h := r.(*extProcPb.ProcessingRequest_RequestHeaders) + klog.V(2).Infof("Headers: %+v\n", h) + + resp := &extProcPb.ProcessingResponse{ + Response: &extProcPb.ProcessingResponse_RequestHeaders{ + RequestHeaders: &extProcPb.HeadersResponse{ + Response: &extProcPb.CommonResponse{ + // Set `clear_route_cache = true` to force Envoy to recompute the target cluster + // based on the new "target-pod" header. + // See https://www.envoyproxy.io/docs/envoy/latest/api-v3/service/ext_proc/v3/external_processor.proto#service-ext-proc-v3-commonresponse. + ClearRouteCache: true, + }, + }, + }, + } + + return resp +} diff --git a/pkg/ext-proc/handlers/response.go b/pkg/ext-proc/handlers/response.go new file mode 100644 index 00000000..1719b45a --- /dev/null +++ b/pkg/ext-proc/handlers/response.go @@ -0,0 +1,35 @@ +package handlers + +import ( + configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" + extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + klog "k8s.io/klog/v2" +) + +// HandleResponseHeaders processes response headers from the backend model server. +func (s *Server) HandleResponseHeaders(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) (*extProcPb.ProcessingResponse, error) { + klog.V(2).Info("Processing ResponseHeaders") + h := req.Request.(*extProcPb.ProcessingRequest_ResponseHeaders) + klog.V(2).Infof("Headers before: %+v\n", h) + + resp := &extProcPb.ProcessingResponse{ + Response: &extProcPb.ProcessingResponse_ResponseHeaders{ + ResponseHeaders: &extProcPb.HeadersResponse{ + Response: &extProcPb.CommonResponse{ + HeaderMutation: &extProcPb.HeaderMutation{ + SetHeaders: []*configPb.HeaderValueOption{ + { + Header: &configPb.HeaderValue{ + // This is for debugging purpose only. + Key: "x-went-into-resp-headers", + RawValue: []byte("true"), + }, + }, + }, + }, + }, + }, + }, + } + return resp, nil +} diff --git a/pkg/ext-proc/handlers/server.go b/pkg/ext-proc/handlers/server.go new file mode 100644 index 00000000..e2aee316 --- /dev/null +++ b/pkg/ext-proc/handlers/server.go @@ -0,0 +1,98 @@ +package handlers + +import ( + "io" + + extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + klog "k8s.io/klog/v2" + + "ext-proc/backend" + "ext-proc/scheduling" +) + +func NewServer(pp PodProvider, scheduler Scheduler, targetPodHeader string) *Server { + return &Server{ + scheduler: scheduler, + podProvider: pp, + targetPodHeader: targetPodHeader, + } +} + +// Server implements the Envoy external processing server. +// https://www.envoyproxy.io/docs/envoy/latest/api-v3/service/ext_proc/v3/external_processor.proto +type Server struct { + scheduler Scheduler + podProvider PodProvider + // The key of the header to specify the target pod address. This value needs to match Envoy + // configuration. + targetPodHeader string +} + +type Scheduler interface { + Schedule(b *scheduling.LLMRequest) (targetPod *backend.Pod, err error) +} + +// PodProvider is an interface to provide set of pods in the backend and information such as metrics. +type PodProvider interface { + GetPodMetrics(pod backend.Pod) (*backend.PodMetrics, bool) + UpdatePodMetrics(pod backend.Pod, pm *backend.PodMetrics) +} + +func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error { + klog.V(2).Info("Processing") + ctx := srv.Context() + // Create request context to share states during life time of an HTTP request. + // See https://github.com/envoyproxy/envoy/issues/17540. + reqCtx := &RequestContext{} + + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + req, err := srv.Recv() + if err == io.EOF { + return nil + } + if err != nil { + return status.Errorf(codes.Unknown, "cannot receive stream request: %v", err) + } + + resp := &extProcPb.ProcessingResponse{} + switch v := req.Request.(type) { + case *extProcPb.ProcessingRequest_RequestHeaders: + resp = HandleRequestHeaders(reqCtx, req) + klog.V(2).Infof("Request context after HandleRequestHeaders: %v", reqCtx) + case *extProcPb.ProcessingRequest_RequestBody: + resp, err = s.HandleRequestBody(reqCtx, req) + klog.V(2).Infof("Request context after HandleRequestBody: %v", reqCtx) + case *extProcPb.ProcessingRequest_ResponseHeaders: + resp, err = s.HandleResponseHeaders(reqCtx, req) + klog.V(2).Infof("Request context after HandleResponseHeaders: %v", reqCtx) + default: + klog.Infof("Unknown Request type %+v", v) + return status.Error(codes.Unknown, "unknown request type") + } + + if err != nil { + klog.Errorf("failed to process request: %v", err) + return status.Errorf(codes.Unknown, "failed to handle request: %v", err) + } + + klog.V(2).Infof("response: %v", resp) + if err := srv.Send(resp); err != nil { + klog.Infof("send error %v", err) + return status.Errorf(codes.Unknown, "failed to send response back to Envoy: %v", err) + } + } +} + +// RequestContext stores context information during the life time of an HTTP request. +type RequestContext struct { + TargetPod *backend.Pod + Model string +} diff --git a/pkg/ext-proc/main.go b/pkg/ext-proc/main.go new file mode 100644 index 00000000..d71a2603 --- /dev/null +++ b/pkg/ext-proc/main.go @@ -0,0 +1,99 @@ +package main + +import ( + "context" + "flag" + "fmt" + "net" + "os" + "os/signal" + "strings" + "syscall" + "time" + + extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + healthPb "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/status" + klog "k8s.io/klog/v2" + + "ext-proc/backend" + "ext-proc/handlers" + "ext-proc/scheduling" +) + +type extProcServer struct{} +type server struct{} + +var ( + port = flag.Int("port", 9002, "gRPC port") + targetPodHeader = flag.String("targetPodHeader", "target-pod", "the header key for the target pod address to instruct Envoy to send the request to. This must match Envoy configuration.") + podIPsFlag = flag.String("podIPs", "", "Comma-separated list of pod IPs") + + refreshPodsInterval = flag.Duration("refreshPodsInterval", 10*time.Second, "interval to refresh pods") + refreshMetricsInterval = flag.Duration("refreshMetricsInterval", 50*time.Millisecond, "interval to refresh metrics") +) + +type healthServer struct{} + +func (s *healthServer) Check(ctx context.Context, in *healthPb.HealthCheckRequest) (*healthPb.HealthCheckResponse, error) { + klog.Infof("Handling grpc Check request + %s", in.String()) + return &healthPb.HealthCheckResponse{Status: healthPb.HealthCheckResponse_SERVING}, nil +} + +func (s *healthServer) Watch(in *healthPb.HealthCheckRequest, srv healthPb.Health_WatchServer) error { + return status.Error(codes.Unimplemented, "Watch is not implemented") +} + +func main() { + klog.InitFlags(nil) + flag.Parse() + + // This is the list of addresses of backend pods. + // TODO (https://github.com/kubernetes-sigs/llm-instance-gateway/issues/12): Remove this once dynamic pod listing is implemented. + if *podIPsFlag == "" { + klog.Fatal("No pods or pod IPs provided. Use the -pods and -podIPs flags to specify comma-separated lists of pod addresses and pod IPs.") + } + podIPs := strings.Split(*podIPsFlag, ",") + klog.Infof("Pods: %v", podIPs) + pods := make(backend.PodSet) + for _, ip := range podIPs { + pod := backend.Pod{ + Namespace: "default", + Name: ip, + Address: ip, + } + pods[pod] = true + } + + klog.Infof("Listening on %q", fmt.Sprintf(":%d", *port)) + lis, err := net.Listen("tcp", fmt.Sprintf(":%d", *port)) + if err != nil { + klog.Fatalf("failed to listen: %v", err) + } + + s := grpc.NewServer() + + pp := backend.NewProvider(&backend.PodMetricsClientImpl{}, &backend.FakePodLister{Pods: pods}) + if err := pp.Init(*refreshPodsInterval, *refreshMetricsInterval); err != nil { + klog.Fatalf("failed to initialize: %v", err) + } + extProcPb.RegisterExternalProcessorServer(s, handlers.NewServer(pp, scheduling.NewScheduler(pp), *targetPodHeader)) + healthPb.RegisterHealthServer(s, &healthServer{}) + + klog.Infof("Starting gRPC server on port :%v", *port) + + // shutdown + var gracefulStop = make(chan os.Signal) + signal.Notify(gracefulStop, syscall.SIGTERM) + signal.Notify(gracefulStop, syscall.SIGINT) + go func() { + sig := <-gracefulStop + klog.Infof("caught sig: %+v", sig) + os.Exit(0) + }() + + s.Serve(lis) + +} diff --git a/pkg/ext-proc/scheduling/filter.go b/pkg/ext-proc/scheduling/filter.go new file mode 100644 index 00000000..11e223a8 --- /dev/null +++ b/pkg/ext-proc/scheduling/filter.go @@ -0,0 +1,130 @@ +package scheduling + +import ( + "fmt" + "math" + + klog "k8s.io/klog/v2" + + "ext-proc/backend" +) + +type Filter interface { + Name() string + Filter(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) +} + +// filter applies current filterFunc, and then recursively applies next filters depending success or +// failure of the current filterFunc. +// It can be used to construct a flow chart algorithm. +type filter struct { + name string + filter filterFunc + // nextOnSuccess filter will be applied after successfully applying the current filter. + // The filtered results will be passed to the next filter. + nextOnSuccess *filter + // nextOnFailure filter will be applied if current filter fails. + // The original input will be passed to the next filter. + nextOnFailure *filter + // nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the + // success or failure of the current filter. + // NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil. + // However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of + // nextOnSuccessOrFailure, in the success and failure scenarios, respectively. + nextOnSuccessOrFailure *filter +} + +func (f *filter) Name() string { + if f == nil { + return "nil" + } + return f.name +} + +func (f *filter) Filter(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) { + if f == nil { + klog.V(2).Infof("Running nil filter, returning all input pods by default") + return pods, nil + } + klog.V(2).Infof("Running filter %q on request %v with %v pods", f.name, b, len(pods)) + + filtered, err := f.filter(b, pods) + + next := f.nextOnSuccessOrFailure + if err == nil { + klog.V(2).Infof("onSuccess %v -> %v, filtered: %v", f.name, next.Name(), len(filtered)) + if f.nextOnSuccess != nil { + next = f.nextOnSuccess + } + // On success, pass the filtered result to the next filter. + return next.Filter(b, filtered) + } + + klog.V(2).Infof("onFailure %v -> %v", f.name, next.Name()) + if f.nextOnFailure != nil { + next = f.nextOnFailure + } + // On failure, pass the initial set of pods to the next filter. + return next.Filter(b, pods) +} + +// filterFunc filters a set of input pods to a subset. +type filterFunc func(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) + +// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc. +func toFilterFunc(pp podPredicate) filterFunc { + return func(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) { + filtered := []*backend.PodMetrics{} + for _, pod := range pods { + pass := pp(b, pod) + if pass { + filtered = append(filtered, pod) + } + } + if len(filtered) == 0 { + return nil, fmt.Errorf("no pods left") + } + return filtered, nil + } +} + +func leastQueuingFilterFunc(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) { + min := math.MaxInt + filtered := []*backend.PodMetrics{} + for _, pod := range pods { + if pod.WaitingQueueSize < min { + min = pod.WaitingQueueSize + filtered = []*backend.PodMetrics{} + } + if pod.WaitingQueueSize == min { + filtered = append(filtered, pod) + } + } + return filtered, nil +} + +func leastKVCacheFilterFunc(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) { + min := math.MaxInt + filtered := []*backend.PodMetrics{} + margin := 5 + for _, pod := range pods { + cur := int(pod.KVCacheUsagePercent) / margin + if cur < min { + min = cur + filtered = []*backend.PodMetrics{} + } + if cur == min { + filtered = append(filtered, pod) + } + } + return filtered, nil +} + +// podPredicate is a filter function to check whether a pod is desired. +type podPredicate func(b *LLMRequest, pod *backend.PodMetrics) bool + +// loraAffinityPredicate return true if the pod have the requested LoRA adapter loaded. +func loraAffinityPredicate(b *LLMRequest, pod *backend.PodMetrics) bool { + _, ok := pod.CachedModels[b.ResolvedTargetModel] + return ok +} diff --git a/pkg/ext-proc/scheduling/scheduler.go b/pkg/ext-proc/scheduling/scheduler.go new file mode 100644 index 00000000..be7501ae --- /dev/null +++ b/pkg/ext-proc/scheduling/scheduler.go @@ -0,0 +1,54 @@ +// Package scheduling implements request scheduling algorithms. +package scheduling + +import ( + "math/rand" + + klog "k8s.io/klog/v2" + + "ext-proc/backend" +) + +var ( + defaultFilter = &filter{ + name: "least queuing", + filter: leastQueuingFilterFunc, + nextOnSuccessOrFailure: &filter{ + name: "lora affinity", + filter: toFilterFunc(loraAffinityPredicate), + nextOnSuccessOrFailure: &filter{ + name: "least KV cache percent", + filter: leastKVCacheFilterFunc, + }, + }, + } +) + +func NewScheduler(pmp PodMetricsProvider) *Scheduler { + return &Scheduler{ + podMetricsProvider: pmp, + filter: defaultFilter, + } +} + +type Scheduler struct { + podMetricsProvider PodMetricsProvider + filter Filter +} + +// PodMetricsProvider is an interface to provide set of pods in the backend and information such as +// metrics. +type PodMetricsProvider interface { + AllPodMetrics() []*backend.PodMetrics +} + +// Schedule finds the target pod based on metrics and the requested lora adapter. +func (s *Scheduler) Schedule(b *LLMRequest) (targetPod *backend.Pod, err error) { + klog.V(2).Infof("request: %v; metrics: %+v", b, s.podMetricsProvider.AllPodMetrics()) + pods, err := s.filter.Filter(b, s.podMetricsProvider.AllPodMetrics()) + if err != nil || len(pods) == 0 { + klog.Errorf("Failed to apply filter, this should never happen: %v", err) + } + i := rand.Intn(len(pods)) + return &pods[i].Pod, nil +} diff --git a/pkg/ext-proc/scheduling/types.go b/pkg/ext-proc/scheduling/types.go new file mode 100644 index 00000000..46578550 --- /dev/null +++ b/pkg/ext-proc/scheduling/types.go @@ -0,0 +1,10 @@ +package scheduling + +// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body. +type LLMRequest struct { + Model string + // Target models is a map of target model name to weight. + TargetModels map[string]int + // Resolved target model is the final target model after traffic split. + ResolvedTargetModel string +}