diff --git a/pkg/ext-proc/Dockerfile b/pkg/ext-proc/Dockerfile
new file mode 100644
index 00000000..3a35c1cc
--- /dev/null
+++ b/pkg/ext-proc/Dockerfile
@@ -0,0 +1,19 @@
+## Multistage build
+FROM golang:1.22.5-alpine as build
+ENV CGO_ENABLED=0
+ENV GOOS=linux
+ENV GOARCH=amd64
+
+WORKDIR /src
+COPY . .
+RUN go mod download
+RUN go build -o /ext-proc
+FROM alpine:latest
+## Multistage deploy
+FROM gcr.io/distroless/base-debian10
+# Install bash
+
+WORKDIR /
+COPY --from=build /ext-proc /ext-proc
+
+ENTRYPOINT ["/ext-proc"]
diff --git a/pkg/ext-proc/backend/fake.go b/pkg/ext-proc/backend/fake.go
new file mode 100644
index 00000000..61d4b619
--- /dev/null
+++ b/pkg/ext-proc/backend/fake.go
@@ -0,0 +1,26 @@
+package backend
+
+import (
+	dto "github.com/prometheus/client_model/go"
+)
+
+type FakePodLister struct {
+	Err  error
+	Pods PodSet
+}
+
+type FakePodMetricsClient struct {
+	Err map[Pod]error
+	Res map[Pod]map[string]*dto.MetricFamily
+}
+
+func (f *FakePodMetricsClient) FetchMetrics(pod Pod) (map[string]*dto.MetricFamily, error) {
+	if err, ok := f.Err[pod]; ok {
+		return nil, err
+	}
+	return f.Res[pod], nil
+}
+
+func (fpl *FakePodLister) List() (PodSet, error) {
+	return fpl.Pods, fpl.Err
+}
diff --git a/pkg/ext-proc/backend/metrics.go b/pkg/ext-proc/backend/metrics.go
new file mode 100644
index 00000000..7d7b5470
--- /dev/null
+++ b/pkg/ext-proc/backend/metrics.go
@@ -0,0 +1,135 @@
+package backend
+
+import (
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	dto "github.com/prometheus/client_model/go"
+	"go.uber.org/multierr"
+	klog "k8s.io/klog/v2"
+)
+
+const (
+	ActiveLoRAAdaptersMetricName        = "vllm:info_active_adapters_info"
+	LoRAAdapterPendingRequestMetricName = "vllm:active_lora_adapters"
+	// TODO: Replace these with the num_tokens_running/waiting below once we add those to the fork.
+	RunningQueueSizeMetricName = "vllm:num_requests_running"
+	WaitingQueueSizeMetricName = "vllm:num_requests_waiting"
+	/* TODO: Uncomment this once the following are added to the fork.
+	RunningQueueSizeMetricName        = "vllm:num_tokens_running"
+	WaitingQueueSizeMetricName        = "vllm:num_tokens_waiting"
+	*/
+	KVCacheUsagePercentMetricName     = "vllm:gpu_cache_usage_perc"
+	KvCacheMaxTokenCapacityMetricName = "vllm:gpu_cache_max_token_capacity"
+)
+
+func (p *Provider) refreshMetricsOnce() error {
+	start := time.Now()
+	defer func() {
+		d := time.Now().Sub(start)
+		// TODO: add a metric instead of logging
+		klog.V(3).Infof("Refreshed metrics in %v", d)
+	}()
+	var wg sync.WaitGroup
+	var errs error
+	processOnePod := func(key, value any) bool {
+		pod := key.(Pod)
+		metrics := value.(*PodMetrics)
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			metricFamilies, err := p.pmc.FetchMetrics(pod)
+			if err != nil {
+				multierr.Append(errs, fmt.Errorf("failed to parse metrics from %s: %v", pod, err))
+				return
+			}
+			updated, err := promToPodMetrics(metricFamilies, metrics)
+			klog.V(3).Infof("Updated metrics for pod %s: %v", pod, updated.Metrics)
+			if err != nil {
+				multierr.Append(errs, fmt.Errorf("failed to get all pod metrics updated from prometheus: %v", err))
+			}
+			p.UpdatePodMetrics(pod, updated)
+		}()
+		return true
+	}
+	p.podMetrics.Range(processOnePod)
+	wg.Wait()
+	return errs
+}
+
+// promToPodMetrics updates internal pod metrics with scraped prometheus metrics.
+// A combined error is returned if errors occur in one or more metric processing.
+// it returns a new PodMetrics pointer which can be used to atomically update the pod metrics map.
+func promToPodMetrics(metricFamilies map[string]*dto.MetricFamily, existing *PodMetrics) (*PodMetrics, error) {
+	var errs error
+	updated := existing.Clone()
+	runningQueueSize, _, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName)
+	multierr.Append(errs, err)
+	if err != nil {
+		updated.RunningQueueSize = int(runningQueueSize.GetCounter().GetValue())
+	}
+	waitingQueueSize, _, err := getLatestMetric(metricFamilies, WaitingQueueSizeMetricName)
+	multierr.Append(errs, err)
+	if err != nil {
+		updated.WaitingQueueSize = int(waitingQueueSize.GetGauge().GetValue())
+	}
+	cachePercent, _, err := getLatestMetric(metricFamilies, KVCacheUsagePercentMetricName)
+	multierr.Append(errs, err)
+	if err != nil {
+		updated.KVCacheUsagePercent = cachePercent.GetGauge().GetValue()
+	}
+	/* TODO: uncomment once this is available in vllm.
+	kvCap, _, err := getGaugeLatestValue(metricFamilies, KvCacheMaxTokenCapacityMetricName)
+	multierr.Append(errs, err)
+	if err != nil {
+		updated.KvCacheMaxTokenCapacity = int(kvCap)
+	}
+	*/
+
+	// Update active loras
+	mf, ok := metricFamilies[ActiveLoRAAdaptersMetricName]
+	if ok {
+		// IMPORTANT: replace the map entries instead of appending to it.
+		updated.CachedModels = make(map[string]int)
+		for _, metric := range mf.GetMetric() {
+			for _, label := range metric.GetLabel() {
+				if label.GetName() == "active_adapters" {
+					if label.GetValue() != "" {
+						adapterList := strings.Split(label.GetValue(), ",")
+						for _, adapter := range adapterList {
+							updated.CachedModels[adapter] = 0
+						}
+					}
+				}
+			}
+		}
+	} else {
+		klog.Warningf("metric family %q not found", ActiveLoRAAdaptersMetricName)
+		multierr.Append(errs, fmt.Errorf("metric family %q not found", ActiveLoRAAdaptersMetricName))
+	}
+
+	return updated, errs
+}
+
+// getLatestMetric gets the latest metric of a family. This should be used to get the latest Gauge metric.
+func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName string) (*dto.Metric, time.Time, error) {
+	mf, ok := metricFamilies[metricName]
+	if !ok {
+		klog.Warningf("metric family %q not found", metricName)
+		return nil, time.Time{}, fmt.Errorf("metric family %q not found", metricName)
+	}
+	if len(mf.GetMetric()) == 0 {
+		return nil, time.Time{}, fmt.Errorf("no metrics available for %q", metricName)
+	}
+	var latestTs int64
+	var latest *dto.Metric
+	for _, m := range mf.GetMetric() {
+		if m.GetTimestampMs() > latestTs {
+			latestTs = m.GetTimestampMs()
+			latest = m
+		}
+	}
+	return latest, time.Unix(0, latestTs*1000), nil
+}
diff --git a/pkg/ext-proc/backend/pod_client.go b/pkg/ext-proc/backend/pod_client.go
new file mode 100644
index 00000000..ee36a2c6
--- /dev/null
+++ b/pkg/ext-proc/backend/pod_client.go
@@ -0,0 +1,34 @@
+package backend
+
+import (
+	"fmt"
+	"net/http"
+
+	dto "github.com/prometheus/client_model/go"
+	"github.com/prometheus/common/expfmt"
+	klog "k8s.io/klog/v2"
+)
+
+type PodMetricsClientImpl struct {
+}
+
+// FetchMetrics fetches metrics from a given pod.
+func (p *PodMetricsClientImpl) FetchMetrics(pod Pod) (map[string]*dto.MetricFamily, error) {
+	// Currently the metrics endpoint is hard-coded, which works with vLLM.
+	// TODO(https://github.com/kubernetes-sigs/llm-instance-gateway/issues/16): Consume this from LLMServerPool config.
+	url := fmt.Sprintf("http://%s/metrics", pod.Address)
+	resp, err := http.Get(url)
+	if err != nil {
+		klog.Errorf("failed to fetch metrics from %s: %v", pod, err)
+		return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod, err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		klog.Errorf("unexpected status code from %s: %v", pod, resp.StatusCode)
+		return nil, fmt.Errorf("unexpected status code from %s: %v", pod, resp.StatusCode)
+	}
+
+	parser := expfmt.TextParser{}
+	return parser.TextToMetricFamilies(resp.Body)
+}
diff --git a/pkg/ext-proc/backend/provider.go b/pkg/ext-proc/backend/provider.go
new file mode 100644
index 00000000..60fb627c
--- /dev/null
+++ b/pkg/ext-proc/backend/provider.go
@@ -0,0 +1,122 @@
+package backend
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	dto "github.com/prometheus/client_model/go"
+	klog "k8s.io/klog/v2"
+)
+
+func NewProvider(pmc PodMetricsClient, pl PodLister) *Provider {
+	p := &Provider{
+		podMetrics: sync.Map{},
+		pmc:        pmc,
+		pl:         pl,
+	}
+	return p
+}
+
+// Provider provides backend pods and information such as metrics.
+type Provider struct {
+	// key: Pod, value: *PodMetrics
+	podMetrics sync.Map
+	pmc        PodMetricsClient
+	pl         PodLister
+}
+
+type PodMetricsClient interface {
+	FetchMetrics(pod Pod) (map[string]*dto.MetricFamily, error)
+}
+
+type PodLister interface {
+	List() (PodSet, error)
+}
+
+func (p *Provider) AllPodMetrics() []*PodMetrics {
+	res := []*PodMetrics{}
+	fn := func(k, v any) bool {
+		res = append(res, v.(*PodMetrics))
+		return true
+	}
+	p.podMetrics.Range(fn)
+	return res
+}
+
+func (p *Provider) UpdatePodMetrics(pod Pod, pm *PodMetrics) {
+	p.podMetrics.Store(pod, pm)
+}
+
+func (p *Provider) GetPodMetrics(pod Pod) (*PodMetrics, bool) {
+	val, ok := p.podMetrics.Load(pod)
+	if ok {
+		return val.(*PodMetrics), true
+	}
+	return nil, false
+}
+
+func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duration) error {
+	if err := p.refreshPodsOnce(); err != nil {
+		return fmt.Errorf("failed to init pods: %v", err)
+	}
+	if err := p.refreshMetricsOnce(); err != nil {
+		return fmt.Errorf("failed to init metrics: %v", err)
+	}
+
+	klog.V(2).Infof("Initialized pods and metrics: %+v", p.AllPodMetrics())
+
+	// periodically refresh pods
+	go func() {
+		for {
+			time.Sleep(refreshPodsInterval)
+			if err := p.refreshPodsOnce(); err != nil {
+				klog.V(1).Infof("Failed to refresh podslist pods: %v", err)
+			}
+		}
+	}()
+
+	// periodically refresh metrics
+	go func() {
+		for {
+			time.Sleep(refreshMetricsInterval)
+			if err := p.refreshMetricsOnce(); err != nil {
+				klog.V(1).Infof("Failed to refresh metrics: %v", err)
+			}
+		}
+	}()
+
+	return nil
+}
+
+// refreshPodsOnce lists pods and updates keys in the podMetrics map.
+// Note this function doesn't update the PodMetrics value, it's done separately.
+func (p *Provider) refreshPodsOnce() error {
+	pods, err := p.pl.List()
+	if err != nil {
+		return err
+	}
+	// merge new pods with cached ones.
+	// add new pod to the map
+	for pod := range pods {
+		if _, ok := p.podMetrics.Load(pod); !ok {
+			new := &PodMetrics{
+				Pod: pod,
+				Metrics: Metrics{
+					CachedModels: make(map[string]int),
+				},
+			}
+			p.podMetrics.Store(pod, new)
+		}
+	}
+	// remove pods that don't exist any more.
+	mergeFn := func(k, v any) bool {
+		pod := k.(Pod)
+		if _, ok := pods[pod]; !ok {
+			p.podMetrics.Delete(pod)
+		}
+		return true
+	}
+	p.podMetrics.Range(mergeFn)
+	return nil
+}
diff --git a/pkg/ext-proc/backend/types.go b/pkg/ext-proc/backend/types.go
new file mode 100644
index 00000000..7d5af51a
--- /dev/null
+++ b/pkg/ext-proc/backend/types.go
@@ -0,0 +1,52 @@
+// Package backend is a library to interact with backend model servers such as probing metrics.
+package backend
+
+import "fmt"
+
+type PodSet map[Pod]bool
+
+type Pod struct {
+	Namespace string
+	Name      string
+	Address   string
+}
+
+func (p Pod) String() string {
+	return p.Namespace + "." + p.Name
+}
+
+type Metrics struct {
+	// CachedModels is a set of models(including LoRA adapters) that are currently cached to GPU.
+	CachedModels            map[string]int
+	RunningQueueSize        int
+	WaitingQueueSize        int
+	KVCacheUsagePercent     float64
+	KvCacheMaxTokenCapacity int
+}
+
+type PodMetrics struct {
+	Pod
+	Metrics
+}
+
+func (pm *PodMetrics) String() string {
+	return fmt.Sprintf("Pod: %+v; Metrics: %+v", pm.Pod, pm.Metrics)
+}
+
+func (pm *PodMetrics) Clone() *PodMetrics {
+	cm := make(map[string]int, len(pm.CachedModels))
+	for k, v := range pm.CachedModels {
+		cm[k] = v
+	}
+	clone := &PodMetrics{
+		Pod: pm.Pod,
+		Metrics: Metrics{
+			CachedModels:            cm,
+			RunningQueueSize:        pm.RunningQueueSize,
+			WaitingQueueSize:        pm.WaitingQueueSize,
+			KVCacheUsagePercent:     pm.KVCacheUsagePercent,
+			KvCacheMaxTokenCapacity: pm.KvCacheMaxTokenCapacity,
+		},
+	}
+	return clone
+}
diff --git a/pkg/ext-proc/benchmark/benchmark.go b/pkg/ext-proc/benchmark/benchmark.go
new file mode 100644
index 00000000..fe617345
--- /dev/null
+++ b/pkg/ext-proc/benchmark/benchmark.go
@@ -0,0 +1,212 @@
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"net"
+	"os"
+	"time"
+
+	"github.com/bojand/ghz/printer"
+	"github.com/bojand/ghz/runner"
+	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"github.com/jhump/protoreflect/desc"
+	dto "github.com/prometheus/client_model/go"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/reflection"
+	"google.golang.org/protobuf/proto"
+	klog "k8s.io/klog/v2"
+
+	"ext-proc/backend"
+	"ext-proc/handlers"
+	"ext-proc/scheduling"
+)
+
+var (
+	svrAddr         = flag.String("server_address", "localhost:9002", "Address of the ext proc server")
+	totalRequests   = flag.Int("total_requests", 100000, "number of requests to be sent for load test")
+	targetPodHeader = flag.String("targetPodHeader", "target-pod", "the header key for the target pod address to instruct Envoy to send the request to. This must match Envoy configuration.")
+
+	// Flags when running a local ext proc server.
+	numFakePods            = flag.Int("num_fake_pods", 200, "number of fake pods when running a local ext proc server")
+	numModelsPerPod        = flag.Int("num_models_per_pod", 5, "number of fake models per pod when running a local ext proc server")
+	localServer            = flag.Bool("local_server", true, "whether to start a local ext proc server")
+	refreshPodsInterval    = flag.Duration("refreshPodsInterval", 10*time.Second, "interval to refresh pods")
+	refreshMetricsInterval = flag.Duration("refreshMetricsInterval", 50*time.Millisecond, "interval to refresh metrics")
+)
+
+const (
+	port = 9002
+)
+
+func main() {
+	klog.InitFlags(nil)
+	flag.Parse()
+
+	if *localServer {
+		go startExtProc()
+		time.Sleep(time.Second) // wait until server is up
+		klog.Info("Server started")
+	}
+
+	report, err := runner.Run(
+		"envoy.service.ext_proc.v3.ExternalProcessor.Process",
+		*svrAddr,
+		runner.WithInsecure(true),
+		runner.WithBinaryDataFunc(generateRequest),
+		runner.WithTotalRequests(uint(*totalRequests)),
+	)
+	if err != nil {
+		klog.Fatal(err)
+	}
+
+	printer := printer.ReportPrinter{
+		Out:    os.Stdout,
+		Report: report,
+	}
+
+	printer.Print("summary")
+}
+
+func generateRequest(mtd *desc.MethodDescriptor, callData *runner.CallData) []byte {
+	numModels := *numFakePods * (*numModelsPerPod)
+	j := map[string]interface{}{
+		"model":       modelName(int(callData.RequestNumber) % numModels),
+		"prompt":      "Write as if you were a critic: San Francisco",
+		"max_tokens":  100,
+		"temperature": 0,
+	}
+
+	llmReq, err := json.Marshal(j)
+	if err != nil {
+		klog.Fatal(err)
+	}
+	req := &extProcPb.ProcessingRequest{
+		Request: &extProcPb.ProcessingRequest_RequestBody{
+			RequestBody: &extProcPb.HttpBody{Body: llmReq},
+		},
+	}
+	data, err := proto.Marshal(req)
+	if err != nil {
+		klog.Fatal("marshaling error: ", err)
+	}
+	return data
+}
+
+// startExtProc starts an extProc server with fake pods.
+func startExtProc() {
+	pods, fm := fakePods()
+	pmc := &backend.FakePodMetricsClient{Res: fm}
+
+	lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
+	if err != nil {
+		klog.Fatalf("failed to listen: %v", err)
+	}
+
+	s := grpc.NewServer()
+
+	pp := backend.NewProvider(pmc, &backend.FakePodLister{Pods: pods})
+	if err := pp.Init(*refreshPodsInterval, *refreshMetricsInterval); err != nil {
+		klog.Fatalf("failed to initialize: %v", err)
+	}
+	extProcPb.RegisterExternalProcessorServer(s, handlers.NewServer(pp, scheduling.NewScheduler(pp), *targetPodHeader))
+
+	klog.Infof("Starting gRPC server on port :%v", port)
+	reflection.Register(s)
+	s.Serve(lis)
+}
+
+func fakePods() (backend.PodSet, map[backend.Pod]map[string]*dto.MetricFamily) {
+	pods := make(backend.PodSet)
+	metrics := make(map[backend.Pod]map[string]*dto.MetricFamily, *numFakePods)
+	for i := 0; i < *numFakePods; i++ {
+		address := fmt.Sprintf("address-%v", i)
+		pod := backend.Pod{
+			Namespace: "default",
+			Name:      fmt.Sprintf("pod-%v", i),
+			Address:   address,
+		}
+		pods[pod] = true
+		metrics[pod] = fakeMetrics(i)
+	}
+
+	return pods, metrics
+}
+
+// fakeMetrics adds numModelsPerPod number of adapters to the pod metrics.
+func fakeMetrics(podNumber int) map[string]*dto.MetricFamily {
+	metrics := make(map[string]*dto.MetricFamily)
+	metrics["vllm:active_lora_adapters"] = &dto.MetricFamily{
+		Metric: []*dto.Metric{},
+	}
+	metrics["vllm:info_active_adapters_info"] = &dto.MetricFamily{
+		Metric: []*dto.Metric{
+			{
+				Label: []*dto.LabelPair{
+					{
+						Name:  ptrString("active_adapters"),
+						Value: ptrString(""),
+					},
+				},
+			},
+		},
+	}
+	for i := 0; i < *numModelsPerPod; i++ {
+		mn := modelName(podNumber*(*numModelsPerPod) + i)
+		one := &dto.Metric{
+			Label: []*dto.LabelPair{
+				{
+					Name:  ptrString("active_lora_adapters"),
+					Value: ptrString(mn),
+				},
+			},
+			Gauge: &dto.Gauge{Value: ptrFloat64(0)},
+		}
+		metrics["vllm:active_lora_adapters"].Metric = append(metrics["vllm:active_lora_adapters"].Metric, one)
+
+		original := metrics["vllm:info_active_adapters_info"].Metric[0].Label[0].Value
+		metrics["vllm:info_active_adapters_info"].Metric[0].Label[0].Value = ptrString(*original + "," + mn)
+	}
+	metrics[backend.RunningQueueSizeMetricName] = &dto.MetricFamily{
+		Metric: []*dto.Metric{
+			{
+				Gauge: &dto.Gauge{Value: ptrFloat64(0)},
+			},
+		},
+	}
+	metrics[backend.WaitingQueueSizeMetricName] = &dto.MetricFamily{
+		Metric: []*dto.Metric{
+			{
+				Gauge: &dto.Gauge{Value: ptrFloat64(0)},
+			},
+		},
+	}
+	metrics[backend.KVCacheUsagePercentMetricName] = &dto.MetricFamily{
+		Metric: []*dto.Metric{
+			{
+				Gauge: &dto.Gauge{Value: ptrFloat64(0)},
+			},
+		},
+	}
+	metrics[backend.KvCacheMaxTokenCapacityMetricName] = &dto.MetricFamily{
+		Metric: []*dto.Metric{
+			{
+				Gauge: &dto.Gauge{Value: ptrFloat64(0)},
+			},
+		},
+	}
+	return metrics
+}
+
+func modelName(i int) string {
+	return fmt.Sprintf("adapter-%v", i)
+}
+
+func ptrString(s string) *string {
+	return &s
+}
+
+func ptrFloat64(f float64) *float64 {
+	return &f
+}
diff --git a/pkg/ext-proc/go.mod b/pkg/ext-proc/go.mod
new file mode 100644
index 00000000..7a1a1f88
--- /dev/null
+++ b/pkg/ext-proc/go.mod
@@ -0,0 +1,56 @@
+module ext-proc
+
+go 1.21
+
+require (
+	github.com/bojand/ghz v0.120.0
+	github.com/envoyproxy/go-control-plane v0.13.0
+	github.com/jhump/protoreflect v1.15.1
+	github.com/prometheus/client_model v0.6.1
+	github.com/prometheus/common v0.55.0
+	go.uber.org/multierr v1.9.0
+	google.golang.org/grpc v1.67.0
+	google.golang.org/protobuf v1.34.2
+	k8s.io/klog/v2 v2.130.1
+)
+
+require (
+	cel.dev/expr v0.16.0 // indirect
+	cloud.google.com/go/compute/metadata v0.5.0 // indirect
+	github.com/BurntSushi/toml v1.1.0 // indirect
+	github.com/Masterminds/goutils v1.1.1 // indirect
+	github.com/Masterminds/semver/v3 v3.2.0 // indirect
+	github.com/Masterminds/sprig/v3 v3.2.3 // indirect
+	github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
+	github.com/bufbuild/protocompile v0.4.0 // indirect
+	github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
+	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/huandu/xstrings v1.3.3 // indirect
+	github.com/imdario/mergo v0.3.11 // indirect
+	github.com/jinzhu/configor v1.2.1 // indirect
+	github.com/kr/text v0.2.0 // indirect
+	github.com/mitchellh/copystructure v1.0.0 // indirect
+	github.com/mitchellh/reflectwalk v1.0.1 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
+	github.com/shopspring/decimal v1.2.0 // indirect
+	github.com/spf13/cast v1.4.1 // indirect
+	go.uber.org/atomic v1.7.0 // indirect
+	golang.org/x/crypto v0.26.0 // indirect
+	golang.org/x/net v0.28.0 // indirect
+	golang.org/x/oauth2 v0.22.0 // indirect
+	golang.org/x/sync v0.8.0 // indirect
+	golang.org/x/sys v0.24.0 // indirect
+	golang.org/x/text v0.17.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+)
diff --git a/pkg/ext-proc/go.sum b/pkg/ext-proc/go.sum
new file mode 100644
index 00000000..3e06181a
--- /dev/null
+++ b/pkg/ext-proc/go.sum
@@ -0,0 +1,171 @@
+cel.dev/expr v0.16.0 h1:yloc84fytn4zmJX2GU3TkXGsaieaV7dQ057Qs4sIG2Y=
+cel.dev/expr v0.16.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
+cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY=
+cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/toml v1.1.0 h1:ksErzDEI1khOiGPgpwuI7x2ebx/uXQNw7xJpn9Eq1+I=
+github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
+github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
+github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
+github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
+github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
+github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
+github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
+github.com/bojand/ghz v0.120.0 h1:6F4wsmZVwFg5UnD+/R+IABWk6sKE/0OKIBdUQUZnOdo=
+github.com/bojand/ghz v0.120.0/go.mod h1:HfECuBZj1v02XObGnRuoZgyB1PR24/25dIYiJIMjJnE=
+github.com/bufbuild/protocompile v0.4.0 h1:LbFKd2XowZvQ/kajzguUp2DC9UEIQhIq77fZZlaQsNA=
+github.com/bufbuild/protocompile v0.4.0/go.mod h1:3v93+mbWn/v3xzN+31nwkJfrEpAUwp+BagBSZWx+TP8=
+github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
+github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg=
+github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/envoyproxy/go-control-plane v0.13.0 h1:HzkeUz1Knt+3bK+8LG1bxOO/jzWZmdxpwC51i202les=
+github.com/envoyproxy/go-control-plane v0.13.0/go.mod h1:GRaKG3dwvFoTg4nj7aXdZnvMg4d7nvT/wl9WgVXn3Q8=
+github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM=
+github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=
+github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
+github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
+github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
+github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA=
+github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
+github.com/jhump/protoreflect v1.15.1 h1:HUMERORf3I3ZdX05WaQ6MIpd/NJ434hTp5YiKgfCL6c=
+github.com/jhump/protoreflect v1.15.1/go.mod h1:jD/2GMKKE6OqX8qTjhADU1e6DShO+gavG9e0Q693nKo=
+github.com/jinzhu/configor v1.2.1 h1:OKk9dsR8i6HPOCZR8BcMtcEImAFjIhbJFZNyn5GCZko=
+github.com/jinzhu/configor v1.2.1/go.mod h1:nX89/MOmDba7ZX7GCyU/VIaQ2Ar2aizBl2d3JLF/rDc=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
+github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
+github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/mitchellh/reflectwalk v1.0.1 h1:FVzMWA5RllMAKIdUSC8mdWo3XtwoecrH79BY70sEEpE=
+github.com/mitchellh/reflectwalk v1.0.1/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
+github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
+github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
+github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/spf13/cast v1.4.1 h1:s0hze+J0196ZfEMTs80N7UlFt0BDuQ7Q+JDnHiMWKdA=
+github.com/spf13/cast v1.4.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
+go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
+go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
+golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
+golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
+golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
+golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA=
+golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
+golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
+golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8=
+google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 h1:e7S5W7MGGLaSu8j3YjdezkZ+m1/Nm0uRVRMEMGk26Xs=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/grpc v1.67.0 h1:IdH9y6PF5MPSdAntIcpjQ+tXO41pcQsfZV2RxtQgVcw=
+google.golang.org/grpc v1.67.0/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
diff --git a/pkg/ext-proc/handlers/request.go b/pkg/ext-proc/handlers/request.go
new file mode 100644
index 00000000..6fe06303
--- /dev/null
+++ b/pkg/ext-proc/handlers/request.go
@@ -0,0 +1,115 @@
+package handlers
+
+import (
+	"encoding/json"
+	"fmt"
+
+	configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	klog "k8s.io/klog/v2"
+
+	"ext-proc/scheduling"
+)
+
+// HandleRequestBody handles body of the request to the backend server, such as parsing the "model"
+// parameter.
+// Envoy sends the request body to ext proc before sending the request to the backend server.
+func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) (*extProcPb.ProcessingResponse, error) {
+	klog.V(2).Infof("Handling request body")
+
+	// Unmarshal request body (must be JSON).
+	v := req.Request.(*extProcPb.ProcessingRequest_RequestBody)
+	var rb map[string]interface{}
+	if err := json.Unmarshal(v.RequestBody.Body, &rb); err != nil {
+		klog.Errorf("Error unmarshaling request body: %v", err)
+		return nil, fmt.Errorf("error unmarshaling request body: %v", err)
+	}
+	klog.V(2).Infof("Request body: %v", rb)
+
+	// Resolve target models.
+	model, ok := rb["model"].(string)
+	if !ok {
+		return nil, fmt.Errorf("model not found in request")
+	}
+	klog.V(2).Infof("Model requested: %v", model)
+	llmReq := &scheduling.LLMRequest{
+		Model: model,
+		// For now use the model as the target model.
+		// TODO: Once the API is approved, read the "LLMUseCase" configuration and apply traffic split.
+		TargetModels:        map[string]int{model: 100},
+		ResolvedTargetModel: model,
+	}
+
+	// Update target models in the body.
+	rb["model"] = llmReq.ResolvedTargetModel
+	updatedBody, err := json.Marshal(rb)
+	if err != nil {
+		klog.Errorf("Error marshaling request body: %v", err)
+		return nil, fmt.Errorf("error marshaling request body: %v", err)
+	}
+	klog.V(2).Infof("Updated body: %v", updatedBody)
+
+	targetPod, err := s.scheduler.Schedule(llmReq)
+	if err != nil {
+		return nil, fmt.Errorf("failed to find target pod: %v", err)
+	}
+	klog.V(2).Infof("Selected target model %v in target pod: %v\n", llmReq.ResolvedTargetModel, targetPod)
+
+	reqCtx.Model = llmReq.Model
+	reqCtx.TargetPod = targetPod
+
+	// Insert "target-pod" to instruct Envoy to route requests to the specified target pod.
+	headers := []*configPb.HeaderValueOption{
+		{
+			Header: &configPb.HeaderValue{
+				Key:      s.targetPodHeader,
+				RawValue: []byte(targetPod.Address),
+			},
+		},
+	}
+	// Print headers for debugging
+	for _, header := range headers {
+		klog.V(2).Infof("[request_body] Header Key: %s, Header Value: %s\n", header.Header.Key, header.Header.RawValue)
+	}
+
+	resp := &extProcPb.ProcessingResponse{
+		Response: &extProcPb.ProcessingResponse_RequestBody{
+			RequestBody: &extProcPb.BodyResponse{
+				Response: &extProcPb.CommonResponse{
+					HeaderMutation: &extProcPb.HeaderMutation{
+						SetHeaders: headers,
+					},
+					// TODO: Enable body mutation
+					// BodyMutation: &extProcPb.BodyMutation{
+					// 	Mutation: &extProcPb.BodyMutation_Body{
+					// 		Body: updatedBody,
+					// 	},
+					// },
+				},
+			},
+		},
+	}
+	return resp, nil
+}
+
+func HandleRequestHeaders(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) *extProcPb.ProcessingResponse {
+	klog.V(2).Info("--- In RequestHeaders processing ...")
+	r := req.Request
+	h := r.(*extProcPb.ProcessingRequest_RequestHeaders)
+	klog.V(2).Infof("Headers: %+v\n", h)
+
+	resp := &extProcPb.ProcessingResponse{
+		Response: &extProcPb.ProcessingResponse_RequestHeaders{
+			RequestHeaders: &extProcPb.HeadersResponse{
+				Response: &extProcPb.CommonResponse{
+					// Set `clear_route_cache = true` to force Envoy to recompute the target cluster
+					// based on the new "target-pod" header.
+					// See https://www.envoyproxy.io/docs/envoy/latest/api-v3/service/ext_proc/v3/external_processor.proto#service-ext-proc-v3-commonresponse.
+					ClearRouteCache: true,
+				},
+			},
+		},
+	}
+
+	return resp
+}
diff --git a/pkg/ext-proc/handlers/response.go b/pkg/ext-proc/handlers/response.go
new file mode 100644
index 00000000..1719b45a
--- /dev/null
+++ b/pkg/ext-proc/handlers/response.go
@@ -0,0 +1,35 @@
+package handlers
+
+import (
+	configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	klog "k8s.io/klog/v2"
+)
+
+// HandleResponseHeaders processes response headers from the backend model server.
+func (s *Server) HandleResponseHeaders(reqCtx *RequestContext, req *extProcPb.ProcessingRequest) (*extProcPb.ProcessingResponse, error) {
+	klog.V(2).Info("Processing ResponseHeaders")
+	h := req.Request.(*extProcPb.ProcessingRequest_ResponseHeaders)
+	klog.V(2).Infof("Headers before: %+v\n", h)
+
+	resp := &extProcPb.ProcessingResponse{
+		Response: &extProcPb.ProcessingResponse_ResponseHeaders{
+			ResponseHeaders: &extProcPb.HeadersResponse{
+				Response: &extProcPb.CommonResponse{
+					HeaderMutation: &extProcPb.HeaderMutation{
+						SetHeaders: []*configPb.HeaderValueOption{
+							{
+								Header: &configPb.HeaderValue{
+									// This is for debugging purpose only.
+									Key:      "x-went-into-resp-headers",
+									RawValue: []byte("true"),
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+	return resp, nil
+}
diff --git a/pkg/ext-proc/handlers/server.go b/pkg/ext-proc/handlers/server.go
new file mode 100644
index 00000000..e2aee316
--- /dev/null
+++ b/pkg/ext-proc/handlers/server.go
@@ -0,0 +1,98 @@
+package handlers
+
+import (
+	"io"
+
+	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	klog "k8s.io/klog/v2"
+
+	"ext-proc/backend"
+	"ext-proc/scheduling"
+)
+
+func NewServer(pp PodProvider, scheduler Scheduler, targetPodHeader string) *Server {
+	return &Server{
+		scheduler:       scheduler,
+		podProvider:     pp,
+		targetPodHeader: targetPodHeader,
+	}
+}
+
+// Server implements the Envoy external processing server.
+// https://www.envoyproxy.io/docs/envoy/latest/api-v3/service/ext_proc/v3/external_processor.proto
+type Server struct {
+	scheduler   Scheduler
+	podProvider PodProvider
+	// The key of the header to specify the target pod address. This value needs to match Envoy
+	// configuration.
+	targetPodHeader string
+}
+
+type Scheduler interface {
+	Schedule(b *scheduling.LLMRequest) (targetPod *backend.Pod, err error)
+}
+
+// PodProvider is an interface to provide set of pods in the backend and information such as metrics.
+type PodProvider interface {
+	GetPodMetrics(pod backend.Pod) (*backend.PodMetrics, bool)
+	UpdatePodMetrics(pod backend.Pod, pm *backend.PodMetrics)
+}
+
+func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
+	klog.V(2).Info("Processing")
+	ctx := srv.Context()
+	// Create request context to share states during life time of an HTTP request.
+	// See https://github.com/envoyproxy/envoy/issues/17540.
+	reqCtx := &RequestContext{}
+
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		req, err := srv.Recv()
+		if err == io.EOF {
+			return nil
+		}
+		if err != nil {
+			return status.Errorf(codes.Unknown, "cannot receive stream request: %v", err)
+		}
+
+		resp := &extProcPb.ProcessingResponse{}
+		switch v := req.Request.(type) {
+		case *extProcPb.ProcessingRequest_RequestHeaders:
+			resp = HandleRequestHeaders(reqCtx, req)
+			klog.V(2).Infof("Request context after HandleRequestHeaders: %v", reqCtx)
+		case *extProcPb.ProcessingRequest_RequestBody:
+			resp, err = s.HandleRequestBody(reqCtx, req)
+			klog.V(2).Infof("Request context after HandleRequestBody: %v", reqCtx)
+		case *extProcPb.ProcessingRequest_ResponseHeaders:
+			resp, err = s.HandleResponseHeaders(reqCtx, req)
+			klog.V(2).Infof("Request context after HandleResponseHeaders: %v", reqCtx)
+		default:
+			klog.Infof("Unknown Request type %+v", v)
+			return status.Error(codes.Unknown, "unknown request type")
+		}
+
+		if err != nil {
+			klog.Errorf("failed to process request: %v", err)
+			return status.Errorf(codes.Unknown, "failed to handle request: %v", err)
+		}
+
+		klog.V(2).Infof("response: %v", resp)
+		if err := srv.Send(resp); err != nil {
+			klog.Infof("send error %v", err)
+			return status.Errorf(codes.Unknown, "failed to send response back to Envoy: %v", err)
+		}
+	}
+}
+
+// RequestContext stores context information during the life time of an HTTP request.
+type RequestContext struct {
+	TargetPod *backend.Pod
+	Model     string
+}
diff --git a/pkg/ext-proc/main.go b/pkg/ext-proc/main.go
new file mode 100644
index 00000000..d71a2603
--- /dev/null
+++ b/pkg/ext-proc/main.go
@@ -0,0 +1,99 @@
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"net"
+	"os"
+	"os/signal"
+	"strings"
+	"syscall"
+	"time"
+
+	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	healthPb "google.golang.org/grpc/health/grpc_health_v1"
+	"google.golang.org/grpc/status"
+	klog "k8s.io/klog/v2"
+
+	"ext-proc/backend"
+	"ext-proc/handlers"
+	"ext-proc/scheduling"
+)
+
+type extProcServer struct{}
+type server struct{}
+
+var (
+	port            = flag.Int("port", 9002, "gRPC port")
+	targetPodHeader = flag.String("targetPodHeader", "target-pod", "the header key for the target pod address to instruct Envoy to send the request to. This must match Envoy configuration.")
+	podIPsFlag      = flag.String("podIPs", "", "Comma-separated list of pod IPs")
+
+	refreshPodsInterval    = flag.Duration("refreshPodsInterval", 10*time.Second, "interval to refresh pods")
+	refreshMetricsInterval = flag.Duration("refreshMetricsInterval", 50*time.Millisecond, "interval to refresh metrics")
+)
+
+type healthServer struct{}
+
+func (s *healthServer) Check(ctx context.Context, in *healthPb.HealthCheckRequest) (*healthPb.HealthCheckResponse, error) {
+	klog.Infof("Handling grpc Check request + %s", in.String())
+	return &healthPb.HealthCheckResponse{Status: healthPb.HealthCheckResponse_SERVING}, nil
+}
+
+func (s *healthServer) Watch(in *healthPb.HealthCheckRequest, srv healthPb.Health_WatchServer) error {
+	return status.Error(codes.Unimplemented, "Watch is not implemented")
+}
+
+func main() {
+	klog.InitFlags(nil)
+	flag.Parse()
+
+	// This is the list of addresses of backend pods.
+	// TODO (https://github.com/kubernetes-sigs/llm-instance-gateway/issues/12): Remove this once dynamic pod listing is implemented.
+	if *podIPsFlag == "" {
+		klog.Fatal("No pods or pod IPs provided. Use the -pods and -podIPs flags to specify comma-separated lists of pod addresses and pod IPs.")
+	}
+	podIPs := strings.Split(*podIPsFlag, ",")
+	klog.Infof("Pods: %v", podIPs)
+	pods := make(backend.PodSet)
+	for _, ip := range podIPs {
+		pod := backend.Pod{
+			Namespace: "default",
+			Name:      ip,
+			Address:   ip,
+		}
+		pods[pod] = true
+	}
+
+	klog.Infof("Listening on %q", fmt.Sprintf(":%d", *port))
+	lis, err := net.Listen("tcp", fmt.Sprintf(":%d", *port))
+	if err != nil {
+		klog.Fatalf("failed to listen: %v", err)
+	}
+
+	s := grpc.NewServer()
+
+	pp := backend.NewProvider(&backend.PodMetricsClientImpl{}, &backend.FakePodLister{Pods: pods})
+	if err := pp.Init(*refreshPodsInterval, *refreshMetricsInterval); err != nil {
+		klog.Fatalf("failed to initialize: %v", err)
+	}
+	extProcPb.RegisterExternalProcessorServer(s, handlers.NewServer(pp, scheduling.NewScheduler(pp), *targetPodHeader))
+	healthPb.RegisterHealthServer(s, &healthServer{})
+
+	klog.Infof("Starting gRPC server on port :%v", *port)
+
+	// shutdown
+	var gracefulStop = make(chan os.Signal)
+	signal.Notify(gracefulStop, syscall.SIGTERM)
+	signal.Notify(gracefulStop, syscall.SIGINT)
+	go func() {
+		sig := <-gracefulStop
+		klog.Infof("caught sig: %+v", sig)
+		os.Exit(0)
+	}()
+
+	s.Serve(lis)
+
+}
diff --git a/pkg/ext-proc/scheduling/filter.go b/pkg/ext-proc/scheduling/filter.go
new file mode 100644
index 00000000..11e223a8
--- /dev/null
+++ b/pkg/ext-proc/scheduling/filter.go
@@ -0,0 +1,130 @@
+package scheduling
+
+import (
+	"fmt"
+	"math"
+
+	klog "k8s.io/klog/v2"
+
+	"ext-proc/backend"
+)
+
+type Filter interface {
+	Name() string
+	Filter(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
+}
+
+// filter applies current filterFunc, and then recursively applies next filters depending success or
+// failure of the current filterFunc.
+// It can be used to construct a flow chart algorithm.
+type filter struct {
+	name   string
+	filter filterFunc
+	// nextOnSuccess filter will be applied after successfully applying the current filter.
+	// The filtered results will be passed to the next filter.
+	nextOnSuccess *filter
+	// nextOnFailure filter will be applied if current filter fails.
+	// The original input will be passed to the next filter.
+	nextOnFailure *filter
+	// nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
+	// success or failure of the current filter.
+	// NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
+	// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
+	// nextOnSuccessOrFailure,  in the success and failure scenarios, respectively.
+	nextOnSuccessOrFailure *filter
+}
+
+func (f *filter) Name() string {
+	if f == nil {
+		return "nil"
+	}
+	return f.name
+}
+
+func (f *filter) Filter(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+	if f == nil {
+		klog.V(2).Infof("Running nil filter, returning all input pods by default")
+		return pods, nil
+	}
+	klog.V(2).Infof("Running filter %q on request %v with %v pods", f.name, b, len(pods))
+
+	filtered, err := f.filter(b, pods)
+
+	next := f.nextOnSuccessOrFailure
+	if err == nil {
+		klog.V(2).Infof("onSuccess %v -> %v, filtered: %v", f.name, next.Name(), len(filtered))
+		if f.nextOnSuccess != nil {
+			next = f.nextOnSuccess
+		}
+		// On success, pass the filtered result to the next filter.
+		return next.Filter(b, filtered)
+	}
+
+	klog.V(2).Infof("onFailure %v -> %v", f.name, next.Name())
+	if f.nextOnFailure != nil {
+		next = f.nextOnFailure
+	}
+	// On failure, pass the initial set of pods to the next filter.
+	return next.Filter(b, pods)
+}
+
+// filterFunc filters a set of input pods to a subset.
+type filterFunc func(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
+
+// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
+func toFilterFunc(pp podPredicate) filterFunc {
+	return func(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+		filtered := []*backend.PodMetrics{}
+		for _, pod := range pods {
+			pass := pp(b, pod)
+			if pass {
+				filtered = append(filtered, pod)
+			}
+		}
+		if len(filtered) == 0 {
+			return nil, fmt.Errorf("no pods left")
+		}
+		return filtered, nil
+	}
+}
+
+func leastQueuingFilterFunc(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+	min := math.MaxInt
+	filtered := []*backend.PodMetrics{}
+	for _, pod := range pods {
+		if pod.WaitingQueueSize < min {
+			min = pod.WaitingQueueSize
+			filtered = []*backend.PodMetrics{}
+		}
+		if pod.WaitingQueueSize == min {
+			filtered = append(filtered, pod)
+		}
+	}
+	return filtered, nil
+}
+
+func leastKVCacheFilterFunc(b *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+	min := math.MaxInt
+	filtered := []*backend.PodMetrics{}
+	margin := 5
+	for _, pod := range pods {
+		cur := int(pod.KVCacheUsagePercent) / margin
+		if cur < min {
+			min = cur
+			filtered = []*backend.PodMetrics{}
+		}
+		if cur == min {
+			filtered = append(filtered, pod)
+		}
+	}
+	return filtered, nil
+}
+
+// podPredicate is a filter function to check whether a pod is desired.
+type podPredicate func(b *LLMRequest, pod *backend.PodMetrics) bool
+
+// loraAffinityPredicate return true if the pod have the requested LoRA adapter loaded.
+func loraAffinityPredicate(b *LLMRequest, pod *backend.PodMetrics) bool {
+	_, ok := pod.CachedModels[b.ResolvedTargetModel]
+	return ok
+}
diff --git a/pkg/ext-proc/scheduling/scheduler.go b/pkg/ext-proc/scheduling/scheduler.go
new file mode 100644
index 00000000..be7501ae
--- /dev/null
+++ b/pkg/ext-proc/scheduling/scheduler.go
@@ -0,0 +1,54 @@
+// Package scheduling implements request scheduling algorithms.
+package scheduling
+
+import (
+	"math/rand"
+
+	klog "k8s.io/klog/v2"
+
+	"ext-proc/backend"
+)
+
+var (
+	defaultFilter = &filter{
+		name:   "least queuing",
+		filter: leastQueuingFilterFunc,
+		nextOnSuccessOrFailure: &filter{
+			name:   "lora affinity",
+			filter: toFilterFunc(loraAffinityPredicate),
+			nextOnSuccessOrFailure: &filter{
+				name:   "least KV cache percent",
+				filter: leastKVCacheFilterFunc,
+			},
+		},
+	}
+)
+
+func NewScheduler(pmp PodMetricsProvider) *Scheduler {
+	return &Scheduler{
+		podMetricsProvider: pmp,
+		filter:             defaultFilter,
+	}
+}
+
+type Scheduler struct {
+	podMetricsProvider PodMetricsProvider
+	filter             Filter
+}
+
+// PodMetricsProvider is an interface to provide set of pods in the backend and information such as
+// metrics.
+type PodMetricsProvider interface {
+	AllPodMetrics() []*backend.PodMetrics
+}
+
+// Schedule finds the target pod based on metrics and the requested lora adapter.
+func (s *Scheduler) Schedule(b *LLMRequest) (targetPod *backend.Pod, err error) {
+	klog.V(2).Infof("request: %v; metrics: %+v", b, s.podMetricsProvider.AllPodMetrics())
+	pods, err := s.filter.Filter(b, s.podMetricsProvider.AllPodMetrics())
+	if err != nil || len(pods) == 0 {
+		klog.Errorf("Failed to apply filter, this should never happen: %v", err)
+	}
+	i := rand.Intn(len(pods))
+	return &pods[i].Pod, nil
+}
diff --git a/pkg/ext-proc/scheduling/types.go b/pkg/ext-proc/scheduling/types.go
new file mode 100644
index 00000000..46578550
--- /dev/null
+++ b/pkg/ext-proc/scheduling/types.go
@@ -0,0 +1,10 @@
+package scheduling
+
+// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
+type LLMRequest struct {
+	Model string
+	// Target models is a map of target model name to weight.
+	TargetModels map[string]int
+	// Resolved target model is the final target model after traffic split.
+	ResolvedTargetModel string
+}