kubernetes-sigs
diff --git a/Diff for: ‎.gitignore
+1 b/Diff for: ‎.gitignore
+1
diff --git a/Diff for: ‎pkg/ext-proc/Dockerfile renamed to ‎Dockerfile
+3-2 b/Diff for: ‎pkg/ext-proc/Dockerfile renamed to ‎Dockerfile
+3-2
diff --git a/Diff for: ‎examples/poc/ext-proc/go.mod
+1-1 b/Diff for: ‎examples/poc/ext-proc/go.mod
+1-1
diff --git a/Diff for: ‎pkg/ext-proc/backend/datastore.go
+38 b/Diff for: ‎pkg/ext-proc/backend/datastore.go
+38
diff --git a/Diff for: ‎pkg/ext-proc/backend/llmserverpool_controller.go
+199 b/Diff for: ‎pkg/ext-proc/backend/llmserverpool_controller.go
+199
@@ -15,6 +15,7 @@ Dockerfile.cross
 
 # Go workspace file
 go.work
+go.work.sum
 
 # Kubernetes Generated files - skip generated files, except for vendored files
 !vendor/**/zz_generated.*
 
@@ -1,11 +1,12 @@
 ## Multistage build
-FROM golang:1.22.5-alpine as build
+FROM golang:1.23-alpine as build
 ENV CGO_ENABLED=0
 ENV GOOS=linux
 ENV GOARCH=amd64
 
 WORKDIR /src
 COPY . .
+WORKDIR /src/pkg/ext-proc
 RUN go mod download
 RUN go build -o /ext-proc
 FROM alpine:latest
@@ -16,4 +17,4 @@ FROM gcr.io/distroless/base-debian10
 WORKDIR /
 COPY --from=build /ext-proc /ext-proc
 
-ENTRYPOINT ["/ext-proc"]
+ENTRYPOINT ["/ext-proc"]
@@ -1,4 +1,4 @@
-module ext-proc
+module ext-proc-poc
 
 go 1.21
 
 
@@ -0,0 +1,38 @@
+package backend
+
+import (
+	"sync"
+
+	"inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/klog/v2"
+)
+
+type Datastore struct {
+	LLMServerPool *v1alpha1.LLMServerPool
+	Pods          sync.Map
+	Port          string
+}
+
+func (ds *Datastore) GetPodIPs() []string {
+	var ips []string
+	ds.Pods.Range(func(name, pod any) bool {
+
+		ips = append(ips, pod.(*corev1.Pod).Status.PodIP)
+		return true
+	})
+	return ips
+}
+
+func (ds *Datastore) LabelsMatch(podLabels map[string]string) bool {
+	selector, err := metav1.LabelSelectorAsSelector(&ds.LLMServerPool.Spec.ModelServerSelector)
+	if err != nil {
+		klog.Error(err.Error())
+		return false
+	}
+	set := labels.Set(podLabels)
+	return selector.Matches(set)
+
+}
@@ -0,0 +1,199 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"golang.org/x/time/rate"
+
+	clientset "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned"
+	"inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/scheme"
+	informers "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/api/v1alpha1"
+	listers "inference.networking.x-k8s.io/llm-instance-gateway/client-go/listers/api/v1alpha1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+	typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
+	"k8s.io/client-go/tools/cache"
+	"k8s.io/client-go/tools/record"
+	"k8s.io/client-go/util/workqueue"
+	"k8s.io/klog/v2"
+)
+
+const (
+	controllerNamePrefix = "instance-gateway-"
+)
+
+// LLMServerPoolController is the controller implementation for Instance Gateway resources
+type LLMServerPoolController struct {
+	// kubeclientset is the standard kubernetes clientset
+	kubeclientset kubernetes.Interface
+	// clientset is the clientset for our own API group
+	clientset clientset.Interface
+
+	llmServerPoolLister  listers.LLMServerPoolLister
+	llmServerPoolsSynced cache.InformerSynced
+
+	// workqueue is a rate limited work queue. This is used to queue work to be
+	// processed instead of performing it as soon as a change happens. This
+	// means we can ensure we only process a fixed amount of resources at a
+	// time, and makes it easy to ensure we are never processing the same item
+	// simultaneously in two different workers.
+	workqueue      workqueue.TypedRateLimitingInterface[cache.ObjectName]
+	controllerName string
+	recorder       record.EventRecorder
+	serverPoolName string
+	datastore      *Datastore
+}
+
+func NewLLMServerPoolController(
+	ctx context.Context,
+	llmServerPoolName string,
+	datastore *Datastore,
+	kubeclientset kubernetes.Interface,
+	llmServerPoolInformer informers.LLMServerPoolInformer) *LLMServerPoolController {
+
+	logger := klog.FromContext(ctx)
+	utilruntime.Must(scheme.AddToScheme(scheme.Scheme))
+	logger.V(4).Info("Creating event broadcaster")
+
+	eventBroadcaster := record.NewBroadcaster(record.WithContext(ctx))
+	eventBroadcaster.StartStructuredLogging(0)
+	eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeclientset.CoreV1().Events("")})
+	recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerNamePrefix + llmServerPoolName})
+	ratelimiter := workqueue.NewTypedMaxOfRateLimiter(
+		workqueue.NewTypedItemExponentialFailureRateLimiter[cache.ObjectName](5*time.Millisecond, 1000*time.Second),
+		&workqueue.TypedBucketRateLimiter[cache.ObjectName]{Limiter: rate.NewLimiter(rate.Limit(50), 300)},
+	)
+
+	controller := &LLMServerPoolController{
+		controllerName:       controllerNamePrefix + llmServerPoolName,
+		serverPoolName:       llmServerPoolName,
+		datastore:            datastore,
+		workqueue:            workqueue.NewTypedRateLimitingQueue(ratelimiter),
+		llmServerPoolsSynced: llmServerPoolInformer.Informer().HasSynced,
+		llmServerPoolLister:  llmServerPoolInformer.Lister(),
+		recorder:             recorder,
+	}
+
+	llmServerPoolInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
+		AddFunc: controller.enqueueLLMServerPool,
+		UpdateFunc: func(old, new interface{}) {
+			controller.enqueueLLMServerPool(new)
+		},
+		DeleteFunc: controller.enqueueLLMServerPool,
+	})
+
+	return controller
+}
+
+// Run will set up the event handlers for types we are interested in, as well
+// as syncing informer caches and starting workers. It will block until stopCh
+// is closed, at which point it will shutdown the workqueue and wait for
+// workers to finish processing their current work items.
+func (c *LLMServerPoolController) Run(ctx context.Context, workers int) error {
+	defer utilruntime.HandleCrash()
+	defer c.workqueue.ShutDown()
+	logger := klog.FromContext(ctx)
+
+	// Start the informer factories to begin populating the informer caches
+	logger.Info("Starting LLMServerPool controller:", c.controllerName)
+
+	// Wait for the caches to be synced before starting workers
+	logger.Info("Waiting for informer caches to sync")
+
+	if ok := cache.WaitForCacheSync(ctx.Done(), c.llmServerPoolsSynced); !ok {
+		return fmt.Errorf("failed to wait for caches to sync")
+	}
+
+	logger.Info("Starting workers", "count", workers)
+	// Launch two workers to process Foo resources
+	for i := 0; i < workers; i++ {
+		go wait.UntilWithContext(ctx, c.runWorker, time.Second)
+	}
+
+	logger.Info("Started workers")
+	<-ctx.Done()
+	logger.Info("Shutting down workers")
+
+	return nil
+}
+
+// runWorker is a long-running function that will continually call the
+// processNextWorkItem function in order to read and process a message on the
+// workqueue.
+func (c *LLMServerPoolController) runWorker(ctx context.Context) {
+	for c.processNextWorkItem(ctx) {
+	}
+}
+
+// processNextWorkItem will read a single work item off the workqueue and
+// attempt to process it, by calling the syncHandler.
+func (c *LLMServerPoolController) processNextWorkItem(ctx context.Context) bool {
+	logger := klog.FromContext(ctx)
+	objRef, shutdown := c.workqueue.Get()
+
+	if shutdown {
+		return false
+	}
+
+	// We call Done at the end of this func so the workqueue knows we have
+	// finished processing this item. We also must remember to call Forget
+	// if we do not want this work item being re-queued. For example, we do
+	// not call Forget if a transient error occurs, instead the item is
+	// put back on the workqueue and attempted again after a back-off
+	// period.
+	defer c.workqueue.Done(objRef)
+
+	// Do work and update the local datastore
+	err := c.updateDatastore(objRef)
+	if err == nil {
+		// If no error occurs then we Forget this item so it does not
+		// get queued again until another change happens.
+		c.workqueue.Forget(objRef)
+		logger.Info("Successfully synced", "objectName", objRef)
+		return true
+	}
+
+	// there was a failure so be sure to report it.  This method allows for
+	// pluggable error handling which can be used for things like
+	// cluster-monitoring.
+	utilruntime.HandleErrorWithContext(ctx, err, "Error syncing; requeuing for later retry", "objectReference", objRef)
+	// since we failed, we should requeue the item to work on later.  This
+	// method will add a backoff to avoid hotlooping on particular items
+	// (they're probably still not going to work right away) and overall
+	// controller protection (everything I've done is broken, this controller
+	// needs to calm down or it can starve other useful work) cases.
+	c.workqueue.AddRateLimited(objRef)
+	return true
+}
+
+func (c *LLMServerPoolController) updateDatastore(objName cache.ObjectName) error {
+	serverPool, err := c.llmServerPoolLister.LLMServerPools(objName.Namespace).Get(objName.Name)
+	if err != nil {
+		if errors.IsNotFound(err) {
+			klog.Info("The parent LLMServerPool cannot be found, if it was deleted, this controller should be shut down shortly.")
+			return nil
+		}
+		return err
+	}
+
+	if c.datastore.LLMServerPool == nil || serverPool.ObjectMeta.ResourceVersion != c.datastore.LLMServerPool.ObjectMeta.ResourceVersion {
+		c.datastore.LLMServerPool = serverPool
+	}
+
+	return nil
+}
+
+func (c *LLMServerPoolController) enqueueLLMServerPool(obj interface{}) {
+	if objectRef, err := cache.ObjectToName(obj); err != nil {
+		utilruntime.HandleError(err)
+		return
+	} else if objectRef.Name == c.serverPoolName {
+		// Only add the relevant LLMServerPool to the queue
+		c.workqueue.Add(objectRef)
+	}
+}
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module ext-proc`
	`1`	`+module ext-proc-poc`
`2`	`2`
`3`	`3`	`go 1.21`
`4`	`4`