diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..a3aab7af
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
+# Ignore build and test binaries.
+bin/
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..ada68ff0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+bin/*
+Dockerfile.cross
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Go workspace file
+go.work
+
+# Kubernetes Generated files - skip generated files, except for vendored files
+!vendor/**/zz_generated.*
+
+# editor and IDE paraphernalia
+.idea
+.vscode
+*.swp
+*.swo
+*~
diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 00000000..aac8a13f
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,47 @@
+run:
+  timeout: 5m
+  allow-parallel-runners: true
+
+issues:
+  # don't skip warning about doc comments
+  # don't exclude the default set of lint
+  exclude-use-default: false
+  # restore some of the defaults
+  # (fill in the rest as needed)
+  exclude-rules:
+    - path: "api/*"
+      linters:
+        - lll
+    - path: "internal/*"
+      linters:
+        - dupl
+        - lll
+linters:
+  disable-all: true
+  enable:
+    - dupl
+    - errcheck
+    - exportloopref
+    - ginkgolinter
+    - goconst
+    - gocyclo
+    - gofmt
+    - goimports
+    - gosimple
+    - govet
+    - ineffassign
+    - lll
+    - misspell
+    - nakedret
+    - prealloc
+    - revive
+    - staticcheck
+    - typecheck
+    - unconvert
+    - unparam
+    - unused
+
+linters-settings:
+  revive:
+    rules:
+      - name: comment-spacings
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..4afbb6f0
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,214 @@
+# Image URL to use all building/pushing image targets
+IMG ?= controller:latest
+# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
+ENVTEST_K8S_VERSION = 1.31.0
+
+# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
+ifeq (,$(shell go env GOBIN))
+GOBIN=$(shell go env GOPATH)/bin
+else
+GOBIN=$(shell go env GOBIN)
+endif
+
+# CONTAINER_TOOL defines the container tool to be used for building images.
+# Be aware that the target commands are only tested with Docker which is
+# scaffolded by default. However, you might want to replace it to use other
+# tools. (i.e. podman)
+CONTAINER_TOOL ?= docker
+
+# Setting SHELL to bash allows bash commands to be executed by recipes.
+# Options are set to exit when a recipe line exits non-zero or a piped command fails.
+SHELL = /usr/bin/env bash -o pipefail
+.SHELLFLAGS = -ec
+
+.PHONY: all
+all: build
+
+##@ General
+
+# The help target prints out all targets with their descriptions organized
+# beneath their categories. The categories are represented by '##@' and the
+# target descriptions by '##'. The awk command is responsible for reading the
+# entire set of makefiles included in this invocation, looking for lines of the
+# file as xyz: ## something, and then pretty-format the target and help. Then,
+# if there's a line with ##@ something, that gets pretty-printed as a category.
+# More info on the usage of ANSI control characters for terminal formatting:
+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
+# More info on the awk command:
+# http://linuxcommand.org/lc3_adv_awk.php
+
+.PHONY: help
+help: ## Display this help.
+	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
+
+##@ Development
+
+.PHONY: manifests
+manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
+	$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
+
+.PHONY: generate
+generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
+	$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
+	./hack/update-codegen.sh
+
+PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
+# Use same code-generator version as k8s.io/api
+CODEGEN_VERSION := $(shell go list -m -f '{{.Version}}' k8s.io/api)
+CODEGEN = $(shell pwd)/bin/code-generator
+CODEGEN_ROOT = $(shell go env GOMODCACHE)/k8s.io/code-generator@$(CODEGEN_VERSION)
+.PHONY: code-generator
+code-generator:
+	@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install k8s.io/code-generator/cmd/client-gen@$(CODEGEN_VERSION)
+	cp -f $(CODEGEN_ROOT)/generate-groups.sh $(PROJECT_DIR)/bin/
+	cp -f $(CODEGEN_ROOT)/generate-internal-groups.sh $(PROJECT_DIR)/bin/
+	cp -f $(CODEGEN_ROOT)/kube_codegen.sh $(PROJECT_DIR)/bin/
+
+
+.PHONY: fmt
+fmt: ## Run go fmt against code.
+	go fmt ./...
+
+.PHONY: vet
+vet: ## Run go vet against code.
+	go vet ./...
+
+.PHONY: test
+test: manifests generate fmt vet envtest ## Run tests.
+	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
+
+# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors.
+.PHONY: test-e2e  # Run the e2e tests against a Kind k8s instance that is spun up.
+test-e2e:
+	go test ./test/e2e/ -v -ginkgo.v
+
+.PHONY: lint
+lint: golangci-lint ## Run golangci-lint linter
+	$(GOLANGCI_LINT) run
+
+.PHONY: lint-fix
+lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
+	$(GOLANGCI_LINT) run --fix
+
+##@ Build
+
+.PHONY: build
+build: manifests generate fmt vet ## Build manager binary.
+	go build -o bin/manager cmd/main.go
+
+.PHONY: run
+run: manifests generate fmt vet ## Run a controller from your host.
+	go run ./cmd/main.go
+
+# If you wish to build the manager image targeting other platforms you can use the --platform flag.
+# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
+# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
+.PHONY: docker-build
+docker-build: ## Build docker image with the manager.
+	$(CONTAINER_TOOL) build -t ${IMG} .
+
+.PHONY: docker-push
+docker-push: ## Push docker image with the manager.
+	$(CONTAINER_TOOL) push ${IMG}
+
+# PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
+# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
+# - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
+# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
+# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail)
+# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
+PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
+.PHONY: docker-buildx
+docker-buildx: ## Build and push docker image for the manager for cross-platform support
+	# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
+	sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
+	- $(CONTAINER_TOOL) buildx create --name api-builder
+	$(CONTAINER_TOOL) buildx use api-builder
+	- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross .
+	- $(CONTAINER_TOOL) buildx rm api-builder
+	rm Dockerfile.cross
+
+.PHONY: build-installer
+build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment.
+	mkdir -p dist
+	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
+	$(KUSTOMIZE) build config/default > dist/install.yaml
+
+##@ Deployment
+
+ifndef ignore-not-found
+  ignore-not-found = false
+endif
+
+.PHONY: install
+install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config.
+	$(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f -
+
+.PHONY: uninstall
+uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
+	$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
+
+.PHONY: deploy
+deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
+	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
+	$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -
+
+.PHONY: undeploy
+undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
+	$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
+
+##@ Dependencies
+
+## Location to install dependencies to
+LOCALBIN ?= $(shell pwd)/bin
+$(LOCALBIN):
+	mkdir -p $(LOCALBIN)
+
+## Tool Binaries
+KUBECTL ?= kubectl
+KUSTOMIZE ?= $(LOCALBIN)/kustomize
+CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
+ENVTEST ?= $(LOCALBIN)/setup-envtest
+GOLANGCI_LINT = $(LOCALBIN)/golangci-lint
+
+## Tool Versions
+KUSTOMIZE_VERSION ?= v5.4.3
+CONTROLLER_TOOLS_VERSION ?= v0.16.1
+ENVTEST_VERSION ?= release-0.19
+GOLANGCI_LINT_VERSION ?= v1.59.1
+
+.PHONY: kustomize
+kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
+$(KUSTOMIZE): $(LOCALBIN)
+	$(call go-install-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/v5,$(KUSTOMIZE_VERSION))
+
+.PHONY: controller-gen
+controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary.
+$(CONTROLLER_GEN): $(LOCALBIN)
+	$(call go-install-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen,$(CONTROLLER_TOOLS_VERSION))
+
+.PHONY: envtest
+envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
+$(ENVTEST): $(LOCALBIN)
+	$(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION))
+
+.PHONY: golangci-lint
+golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
+$(GOLANGCI_LINT): $(LOCALBIN)
+	$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
+
+# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
+# $1 - target path with name of binary
+# $2 - package url which can be installed
+# $3 - specific version of package
+define go-install-tool
+@[ -f "$(1)-$(3)" ] || { \
+set -e; \
+package=$(2)@$(3) ;\
+echo "Downloading $${package}" ;\
+rm -f $(1) || true ;\
+GOBIN=$(LOCALBIN) go install $${package} ;\
+mv $(1) $(1)-$(3) ;\
+} ;\
+ln -sf $(1)-$(3) $(1)
+endef
diff --git a/PROJECT b/PROJECT
new file mode 100644
index 00000000..1d40dcf3
--- /dev/null
+++ b/PROJECT
@@ -0,0 +1,27 @@
+# Code generated by tool. DO NOT EDIT.
+# This file is used to track the info used to scaffold your project
+# and allow the plugins properly work.
+# More info: https://book.kubebuilder.io/reference/project-config.html
+domain: x-k8s.io
+layout:
+- go.kubebuilder.io/v4
+projectName: llm-instance-gateway
+repo: sigs.k8s.io/llm-instance-gateway
+resources:
+- api:
+    crdVersion: v1
+    namespaced: true
+  domain: x-k8s.io
+  group: inference
+  kind: LLMServerPool
+  path: sigs.k8s.io/llm-instance-gateway/api/v1alpha1
+  version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  domain: x-k8s.io
+  group: inference
+  kind: LLMService
+  path: sigs.k8s.io/llm-instance-gateway/api/v1alpha1
+  version: v1alpha1
+version: "3"
diff --git a/README.md b/README.md
index 94e8dbd1..0dcd692c 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,24 @@ This project is currently in development.
 
 For more rapid testing, our PoC is in the `./examples/` dir.
 
+
+## Getting Started
+
+**Install the CRDs into the cluster:**
+
+```sh
+make install
+```
+
+**Delete the APIs(CRDs) from the cluster:**
+
+```sh
+make uninstall
+```
+
+**Deploying the ext-proc image**
+Refer to this [README](https://github.com/kubernetes-sigs/llm-instance-gateway/blob/main/pkg/README.md) on how to deploy the Ext-Proc image used to support Instance Gateway.
+
 ## Contributing
 
 Our community meeting is weekly at Th 10AM PDT; [zoom link here](https://zoom.us/j/9955436256?pwd=Z2FQWU1jeDZkVC9RRTN4TlZyZTBHZz09).
diff --git a/api/Dockerfile b/api/Dockerfile
new file mode 100644
index 00000000..a48973ee
--- /dev/null
+++ b/api/Dockerfile
@@ -0,0 +1,33 @@
+# Build the manager binary
+FROM golang:1.22 AS builder
+ARG TARGETOS
+ARG TARGETARCH
+
+WORKDIR /workspace
+# Copy the Go Modules manifests
+COPY go.mod go.mod
+COPY go.sum go.sum
+# cache deps before building and copying source so that we don't need to re-download as much
+# and so that source changes don't invalidate our downloaded layer
+RUN go mod download
+
+# Copy the go source
+COPY cmd/main.go cmd/main.go
+COPY api/ api/
+COPY internal/controller/ internal/controller/
+
+# Build
+# the GOARCH has not a default value to allow the binary be built according to the host where the command
+# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
+# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
+# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
+RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
+
+# Use distroless as minimal base image to package the manager binary
+# Refer to https://github.com/GoogleContainerTools/distroless for more details
+FROM gcr.io/distroless/static:nonroot
+WORKDIR /
+COPY --from=builder /workspace/manager .
+USER 65532:65532
+
+ENTRYPOINT ["/manager"]
diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go
new file mode 100644
index 00000000..1b8782a0
--- /dev/null
+++ b/api/v1alpha1/groupversion_info.go
@@ -0,0 +1,45 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the gateway v1alpha1 API group
+// +kubebuilder:object:generate=true
+// +groupName=gateway.inference.networking.x-k8s.io
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "inference.networking.x-k8s.io", Version: "v1alpha1"}
+
+	// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
+	// It is required by pkg/client/informers/externalversions/...
+	SchemeGroupVersion = GroupVersion
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
+
+// Resource is required by pkg/client/listers/...
+func Resource(resource string) schema.GroupResource {
+	return GroupVersion.WithResource(resource).GroupResource()
+}
diff --git a/api/v1alpha1/llmserverpool_types.go b/api/v1alpha1/llmserverpool_types.go
new file mode 100644
index 00000000..a18cef58
--- /dev/null
+++ b/api/v1alpha1/llmserverpool_types.go
@@ -0,0 +1,66 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
+
+// LLMServerPoolSpec defines the desired state of LLMServerPool
+type LLMServerPoolSpec struct {
+
+	// ModelServerSelector uses label selection to watch model server pods
+	// that should be included in the LLMServerPool. ModelServers should not
+	// be with any other Service or LLMServerPool, that behavior is not supported
+	// and will result in sub-optimal utilization.
+	ModelServerSelector metav1.LabelSelector `json:"modelServerSelector,omitempty"`
+}
+
+// LLMServerPoolStatus defines the observed state of LLMServerPool
+type LLMServerPoolStatus struct {
+
+	// Conditions track the state of the LLMServerPool.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +genclient
+
+// LLMServerPool is the Schema for the llmserverpools API
+type LLMServerPool struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   LLMServerPoolSpec   `json:"spec,omitempty"`
+	Status LLMServerPoolStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// LLMServerPoolList contains a list of LLMServerPool
+type LLMServerPoolList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []LLMServerPool `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&LLMServerPool{}, &LLMServerPoolList{})
+}
diff --git a/api/v1alpha1/llmservice_types.go b/api/v1alpha1/llmservice_types.go
new file mode 100644
index 00000000..663f2ab1
--- /dev/null
+++ b/api/v1alpha1/llmservice_types.go
@@ -0,0 +1,136 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
+
+// LLMService represents a set of LLM services that are multiplexed onto one
+// or more LLMServerPools. This resource is managed by the "LLM Service Owner"
+// persona. The Service Owner persona is: a team that trains, verifies, and
+// leverages a large language model from a model frontend, drives the lifecycle
+// and rollout of new versions of those models, and defines the specific
+// performance and latency goals for the model. These services are
+// expected to operate within a LLMServerPool sharing compute capacity with other
+// LLMServices, defined by the Inference Platform Admin. We allow a user who
+// has multiple LLMServices across multiple pools (with the same config) to
+// specify the configuration exactly once, and deploy to many pools
+// simultaneously. Enabling a simpler config and single source of truth
+// for a given user. LLMService names are unique for a given LLMServerPool,
+// if the name is reused, an error will be  shown on the status of a
+// LLMService that attempted to reuse. The oldest LLMService, based on
+// creation timestamp, will be selected to remain valid. In the event of a race
+// condition, one will be selected at random.
+type LLMServiceSpec struct {
+	// Model defines the distinct services.
+	// Model can be in 2 priority classes, Critical and Noncritical.
+	// Priority class is implicitly set to Critical by specifying an Objective.
+	// Otherwise the Model is considered Noncritical.
+	Models []Model `json:"models,omitempty"`
+	// PoolRef are references to the backend pools that the LLMService registers to.
+	PoolRef []corev1.ObjectReference `json:"poolRef,omitempty"`
+}
+
+// Model defines the policies for routing the traffic of a use case, this includes performance objectives
+// and traffic splitting between different versions of the model.
+type Model struct {
+	// The name of the model as the users set in the "model" parameter in the requests.
+	// The name should be unique among the services that reference the same backend pool.
+	// This is the parameter that will be used to match the request with. In the future, we may
+	// allow to match on other request parameters. The other approach to support matching on
+	// on other request parameters is to use a different ModelName per HTTPFilter.
+	// Names can be reserved without implementing an actual model in the pool.
+	// This can be done by specifying a target model and setting the weight to zero,
+	// an error will be returned specifying that no valid target model is found.
+	Name string `json:"name,omitempty"`
+	// Optional
+	// LLM Services with an objective have higher priority than services without.
+	// IMPORTANT: By specifying an objective, this places the LLMService in a higher priority class than LLMServices without a defined priority class.
+	// In the face of resource-scarcity. Higher priority requests will be preserved, and lower priority class requests will be rejected.
+	Objective *Objective `json:"objective,omitempty"`
+	// Optional.
+	// Allow multiple versions of a model for traffic splitting.
+	// If not specified, the target model name is defaulted to the modelName parameter.
+	// modelName is often in reference to a LoRA adapter.
+	TargetModels []TargetModel `json:"targetModels,omitempty"`
+}
+
+// TargetModel represents a deployed model or a LoRA adapter. The
+// Name field is expected to match the name of the LoRA adapter
+// (or base model) as it is registered within the model server. Inference
+// Gateway assumes that the model exists on the model server and is the
+// responsibility of the user to validate a correct match. Should a model fail
+// to exist at request time, the error is processed by the Instance Gateway,
+// and then emitted on the appropriate LLMService object.
+type TargetModel struct {
+	// The name of the adapter as expected by the ModelServer.
+	Name string `json:"name,omitempty"`
+	// Weight is used to determine the percentage of traffic that should be
+	// sent to this target model when multiple versions of the model are specified.
+	Weight int `json:"weight,omitempty"`
+}
+
+// Objective captures the latency SLO of a LLM service.
+// In MVP, meeting the SLO is on a best effort basis.
+// Future: Extend the API for different behaviors of meeting the SLO.
+// The gateway will perform best-effort load balancing, and work with other components (e.g., autoscaler) to meet the
+// objectives.
+type Objective struct {
+	// The AverageLatencyPerOutputToken is calculated as the e2e request latency divided by output token
+	// length. Note that this is different from what is known as TPOT (time per output token) which only
+	// takes decode time into account.
+	// The P95 is calculated over a fixed time window defined at the operator level.
+	DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests *time.Duration `json:"desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests,omitempty"`
+}
+
+// LLMServiceStatus defines the observed state of LLMService
+type LLMServiceStatus struct {
+	// Conditions track the state of the LLMServerPool.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +genclient
+
+// LLMService is the Schema for the llmservices API
+type LLMService struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   LLMServiceSpec   `json:"spec,omitempty"`
+	Status LLMServiceStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// LLMServiceList contains a list of LLMService
+type LLMServiceList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []LLMService `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&LLMService{}, &LLMServiceList{})
+}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
new file mode 100644
index 00000000..b47b41f7
--- /dev/null
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -0,0 +1,293 @@
+//go:build !ignore_autogenerated
+
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	timex "time"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServerPool) DeepCopyInto(out *LLMServerPool) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServerPool.
+func (in *LLMServerPool) DeepCopy() *LLMServerPool {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServerPool)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *LLMServerPool) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServerPoolList) DeepCopyInto(out *LLMServerPoolList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]LLMServerPool, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServerPoolList.
+func (in *LLMServerPoolList) DeepCopy() *LLMServerPoolList {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServerPoolList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *LLMServerPoolList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServerPoolSpec) DeepCopyInto(out *LLMServerPoolSpec) {
+	*out = *in
+	in.ModelServerSelector.DeepCopyInto(&out.ModelServerSelector)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServerPoolSpec.
+func (in *LLMServerPoolSpec) DeepCopy() *LLMServerPoolSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServerPoolSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServerPoolStatus) DeepCopyInto(out *LLMServerPoolStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServerPoolStatus.
+func (in *LLMServerPoolStatus) DeepCopy() *LLMServerPoolStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServerPoolStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMService) DeepCopyInto(out *LLMService) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMService.
+func (in *LLMService) DeepCopy() *LLMService {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMService)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *LLMService) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServiceList) DeepCopyInto(out *LLMServiceList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]LLMService, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServiceList.
+func (in *LLMServiceList) DeepCopy() *LLMServiceList {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServiceList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *LLMServiceList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServiceSpec) DeepCopyInto(out *LLMServiceSpec) {
+	*out = *in
+	if in.Models != nil {
+		in, out := &in.Models, &out.Models
+		*out = make([]Model, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	if in.PoolRef != nil {
+		in, out := &in.PoolRef, &out.PoolRef
+		*out = make([]corev1.ObjectReference, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServiceSpec.
+func (in *LLMServiceSpec) DeepCopy() *LLMServiceSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServiceSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *LLMServiceStatus) DeepCopyInto(out *LLMServiceStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMServiceStatus.
+func (in *LLMServiceStatus) DeepCopy() *LLMServiceStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(LLMServiceStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Model) DeepCopyInto(out *Model) {
+	*out = *in
+	if in.Objective != nil {
+		in, out := &in.Objective, &out.Objective
+		*out = new(Objective)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.TargetModels != nil {
+		in, out := &in.TargetModels, &out.TargetModels
+		*out = make([]TargetModel, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Model.
+func (in *Model) DeepCopy() *Model {
+	if in == nil {
+		return nil
+	}
+	out := new(Model)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Objective) DeepCopyInto(out *Objective) {
+	*out = *in
+	if in.DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests != nil {
+		in, out := &in.DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests, &out.DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests
+		*out = new(timex.Duration)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Objective.
+func (in *Objective) DeepCopy() *Objective {
+	if in == nil {
+		return nil
+	}
+	out := new(Objective)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *TargetModel) DeepCopyInto(out *TargetModel) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetModel.
+func (in *TargetModel) DeepCopy() *TargetModel {
+	if in == nil {
+		return nil
+	}
+	out := new(TargetModel)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/llmserverpool.go b/client-go/applyconfiguration/api/v1alpha1/llmserverpool.go
new file mode 100644
index 00000000..1ce7f90c
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/llmserverpool.go
@@ -0,0 +1,224 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// LLMServerPoolApplyConfiguration represents a declarative configuration of the LLMServerPool type for use
+// with apply.
+type LLMServerPoolApplyConfiguration struct {
+	v1.TypeMetaApplyConfiguration    `json:",inline"`
+	*v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"`
+	Spec                             *LLMServerPoolSpecApplyConfiguration   `json:"spec,omitempty"`
+	Status                           *LLMServerPoolStatusApplyConfiguration `json:"status,omitempty"`
+}
+
+// LLMServerPool constructs a declarative configuration of the LLMServerPool type for use with
+// apply.
+func LLMServerPool(name, namespace string) *LLMServerPoolApplyConfiguration {
+	b := &LLMServerPoolApplyConfiguration{}
+	b.WithName(name)
+	b.WithNamespace(namespace)
+	b.WithKind("LLMServerPool")
+	b.WithAPIVersion("api/v1alpha1")
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithKind(value string) *LLMServerPoolApplyConfiguration {
+	b.TypeMetaApplyConfiguration.Kind = &value
+	return b
+}
+
+// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the APIVersion field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithAPIVersion(value string) *LLMServerPoolApplyConfiguration {
+	b.TypeMetaApplyConfiguration.APIVersion = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithName(value string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Name = &value
+	return b
+}
+
+// WithGenerateName sets the GenerateName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the GenerateName field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithGenerateName(value string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
+	return b
+}
+
+// WithNamespace sets the Namespace field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Namespace field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithNamespace(value string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Namespace = &value
+	return b
+}
+
+// WithUID sets the UID field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the UID field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithUID(value types.UID) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.UID = &value
+	return b
+}
+
+// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ResourceVersion field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithResourceVersion(value string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
+	return b
+}
+
+// WithGeneration sets the Generation field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Generation field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithGeneration(value int64) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Generation = &value
+	return b
+}
+
+// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the CreationTimestamp field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithCreationTimestamp(value metav1.Time) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
+	return b
+}
+
+// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionTimestamp field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
+	return b
+}
+
+// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
+	return b
+}
+
+// WithLabels puts the entries into the Labels field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Labels field,
+// overwriting an existing map entries in Labels field with the same key.
+func (b *LLMServerPoolApplyConfiguration) WithLabels(entries map[string]string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
+	}
+	return b
+}
+
+// WithAnnotations puts the entries into the Annotations field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Annotations field,
+// overwriting an existing map entries in Annotations field with the same key.
+func (b *LLMServerPoolApplyConfiguration) WithAnnotations(entries map[string]string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
+	}
+	return b
+}
+
+// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the OwnerReferences field.
+func (b *LLMServerPoolApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithOwnerReferences")
+		}
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
+	}
+	return b
+}
+
+// WithFinalizers adds the given value to the Finalizers field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Finalizers field.
+func (b *LLMServerPoolApplyConfiguration) WithFinalizers(values ...string) *LLMServerPoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
+	}
+	return b
+}
+
+func (b *LLMServerPoolApplyConfiguration) ensureObjectMetaApplyConfigurationExists() {
+	if b.ObjectMetaApplyConfiguration == nil {
+		b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{}
+	}
+}
+
+// WithSpec sets the Spec field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Spec field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithSpec(value *LLMServerPoolSpecApplyConfiguration) *LLMServerPoolApplyConfiguration {
+	b.Spec = value
+	return b
+}
+
+// WithStatus sets the Status field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Status field is set to the value of the last call.
+func (b *LLMServerPoolApplyConfiguration) WithStatus(value *LLMServerPoolStatusApplyConfiguration) *LLMServerPoolApplyConfiguration {
+	b.Status = value
+	return b
+}
+
+// GetName retrieves the value of the Name field in the declarative configuration.
+func (b *LLMServerPoolApplyConfiguration) GetName() *string {
+	b.ensureObjectMetaApplyConfigurationExists()
+	return b.ObjectMetaApplyConfiguration.Name
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/llmserverpoolspec.go b/client-go/applyconfiguration/api/v1alpha1/llmserverpoolspec.go
new file mode 100644
index 00000000..8a69cfde
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/llmserverpoolspec.go
@@ -0,0 +1,42 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// LLMServerPoolSpecApplyConfiguration represents a declarative configuration of the LLMServerPoolSpec type for use
+// with apply.
+type LLMServerPoolSpecApplyConfiguration struct {
+	ModelServerSelector *v1.LabelSelectorApplyConfiguration `json:"modelServerSelector,omitempty"`
+}
+
+// LLMServerPoolSpecApplyConfiguration constructs a declarative configuration of the LLMServerPoolSpec type for use with
+// apply.
+func LLMServerPoolSpec() *LLMServerPoolSpecApplyConfiguration {
+	return &LLMServerPoolSpecApplyConfiguration{}
+}
+
+// WithModelServerSelector sets the ModelServerSelector field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ModelServerSelector field is set to the value of the last call.
+func (b *LLMServerPoolSpecApplyConfiguration) WithModelServerSelector(value *v1.LabelSelectorApplyConfiguration) *LLMServerPoolSpecApplyConfiguration {
+	b.ModelServerSelector = value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/llmserverpoolstatus.go b/client-go/applyconfiguration/api/v1alpha1/llmserverpoolstatus.go
new file mode 100644
index 00000000..a0ac77b4
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/llmserverpoolstatus.go
@@ -0,0 +1,47 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// LLMServerPoolStatusApplyConfiguration represents a declarative configuration of the LLMServerPoolStatus type for use
+// with apply.
+type LLMServerPoolStatusApplyConfiguration struct {
+	Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"`
+}
+
+// LLMServerPoolStatusApplyConfiguration constructs a declarative configuration of the LLMServerPoolStatus type for use with
+// apply.
+func LLMServerPoolStatus() *LLMServerPoolStatusApplyConfiguration {
+	return &LLMServerPoolStatusApplyConfiguration{}
+}
+
+// WithConditions adds the given value to the Conditions field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Conditions field.
+func (b *LLMServerPoolStatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *LLMServerPoolStatusApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithConditions")
+		}
+		b.Conditions = append(b.Conditions, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/llmservice.go b/client-go/applyconfiguration/api/v1alpha1/llmservice.go
new file mode 100644
index 00000000..f1132064
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/llmservice.go
@@ -0,0 +1,224 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// LLMServiceApplyConfiguration represents a declarative configuration of the LLMService type for use
+// with apply.
+type LLMServiceApplyConfiguration struct {
+	v1.TypeMetaApplyConfiguration    `json:",inline"`
+	*v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"`
+	Spec                             *LLMServiceSpecApplyConfiguration   `json:"spec,omitempty"`
+	Status                           *LLMServiceStatusApplyConfiguration `json:"status,omitempty"`
+}
+
+// LLMService constructs a declarative configuration of the LLMService type for use with
+// apply.
+func LLMService(name, namespace string) *LLMServiceApplyConfiguration {
+	b := &LLMServiceApplyConfiguration{}
+	b.WithName(name)
+	b.WithNamespace(namespace)
+	b.WithKind("LLMService")
+	b.WithAPIVersion("api/v1alpha1")
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithKind(value string) *LLMServiceApplyConfiguration {
+	b.TypeMetaApplyConfiguration.Kind = &value
+	return b
+}
+
+// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the APIVersion field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithAPIVersion(value string) *LLMServiceApplyConfiguration {
+	b.TypeMetaApplyConfiguration.APIVersion = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithName(value string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Name = &value
+	return b
+}
+
+// WithGenerateName sets the GenerateName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the GenerateName field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithGenerateName(value string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
+	return b
+}
+
+// WithNamespace sets the Namespace field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Namespace field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithNamespace(value string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Namespace = &value
+	return b
+}
+
+// WithUID sets the UID field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the UID field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithUID(value types.UID) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.UID = &value
+	return b
+}
+
+// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ResourceVersion field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithResourceVersion(value string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
+	return b
+}
+
+// WithGeneration sets the Generation field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Generation field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithGeneration(value int64) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Generation = &value
+	return b
+}
+
+// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the CreationTimestamp field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithCreationTimestamp(value metav1.Time) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
+	return b
+}
+
+// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionTimestamp field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
+	return b
+}
+
+// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
+	return b
+}
+
+// WithLabels puts the entries into the Labels field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Labels field,
+// overwriting an existing map entries in Labels field with the same key.
+func (b *LLMServiceApplyConfiguration) WithLabels(entries map[string]string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
+	}
+	return b
+}
+
+// WithAnnotations puts the entries into the Annotations field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Annotations field,
+// overwriting an existing map entries in Annotations field with the same key.
+func (b *LLMServiceApplyConfiguration) WithAnnotations(entries map[string]string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
+	}
+	return b
+}
+
+// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the OwnerReferences field.
+func (b *LLMServiceApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithOwnerReferences")
+		}
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
+	}
+	return b
+}
+
+// WithFinalizers adds the given value to the Finalizers field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Finalizers field.
+func (b *LLMServiceApplyConfiguration) WithFinalizers(values ...string) *LLMServiceApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
+	}
+	return b
+}
+
+func (b *LLMServiceApplyConfiguration) ensureObjectMetaApplyConfigurationExists() {
+	if b.ObjectMetaApplyConfiguration == nil {
+		b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{}
+	}
+}
+
+// WithSpec sets the Spec field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Spec field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithSpec(value *LLMServiceSpecApplyConfiguration) *LLMServiceApplyConfiguration {
+	b.Spec = value
+	return b
+}
+
+// WithStatus sets the Status field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Status field is set to the value of the last call.
+func (b *LLMServiceApplyConfiguration) WithStatus(value *LLMServiceStatusApplyConfiguration) *LLMServiceApplyConfiguration {
+	b.Status = value
+	return b
+}
+
+// GetName retrieves the value of the Name field in the declarative configuration.
+func (b *LLMServiceApplyConfiguration) GetName() *string {
+	b.ensureObjectMetaApplyConfigurationExists()
+	return b.ObjectMetaApplyConfiguration.Name
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/llmservicespec.go b/client-go/applyconfiguration/api/v1alpha1/llmservicespec.go
new file mode 100644
index 00000000..6a057217
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/llmservicespec.go
@@ -0,0 +1,58 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	v1 "k8s.io/api/core/v1"
+)
+
+// LLMServiceSpecApplyConfiguration represents a declarative configuration of the LLMServiceSpec type for use
+// with apply.
+type LLMServiceSpecApplyConfiguration struct {
+	Models  []ModelApplyConfiguration `json:"models,omitempty"`
+	PoolRef []v1.ObjectReference      `json:"poolRef,omitempty"`
+}
+
+// LLMServiceSpecApplyConfiguration constructs a declarative configuration of the LLMServiceSpec type for use with
+// apply.
+func LLMServiceSpec() *LLMServiceSpecApplyConfiguration {
+	return &LLMServiceSpecApplyConfiguration{}
+}
+
+// WithModels adds the given value to the Models field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Models field.
+func (b *LLMServiceSpecApplyConfiguration) WithModels(values ...*ModelApplyConfiguration) *LLMServiceSpecApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithModels")
+		}
+		b.Models = append(b.Models, *values[i])
+	}
+	return b
+}
+
+// WithPoolRef adds the given value to the PoolRef field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the PoolRef field.
+func (b *LLMServiceSpecApplyConfiguration) WithPoolRef(values ...v1.ObjectReference) *LLMServiceSpecApplyConfiguration {
+	for i := range values {
+		b.PoolRef = append(b.PoolRef, values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/llmservicestatus.go b/client-go/applyconfiguration/api/v1alpha1/llmservicestatus.go
new file mode 100644
index 00000000..67976753
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/llmservicestatus.go
@@ -0,0 +1,47 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// LLMServiceStatusApplyConfiguration represents a declarative configuration of the LLMServiceStatus type for use
+// with apply.
+type LLMServiceStatusApplyConfiguration struct {
+	Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"`
+}
+
+// LLMServiceStatusApplyConfiguration constructs a declarative configuration of the LLMServiceStatus type for use with
+// apply.
+func LLMServiceStatus() *LLMServiceStatusApplyConfiguration {
+	return &LLMServiceStatusApplyConfiguration{}
+}
+
+// WithConditions adds the given value to the Conditions field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Conditions field.
+func (b *LLMServiceStatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *LLMServiceStatusApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithConditions")
+		}
+		b.Conditions = append(b.Conditions, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/model.go b/client-go/applyconfiguration/api/v1alpha1/model.go
new file mode 100644
index 00000000..b4eb76cd
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/model.go
@@ -0,0 +1,61 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+// ModelApplyConfiguration represents a declarative configuration of the Model type for use
+// with apply.
+type ModelApplyConfiguration struct {
+	Name         *string                         `json:"name,omitempty"`
+	Objective    *ObjectiveApplyConfiguration    `json:"objective,omitempty"`
+	TargetModels []TargetModelApplyConfiguration `json:"targetModels,omitempty"`
+}
+
+// ModelApplyConfiguration constructs a declarative configuration of the Model type for use with
+// apply.
+func Model() *ModelApplyConfiguration {
+	return &ModelApplyConfiguration{}
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *ModelApplyConfiguration) WithName(value string) *ModelApplyConfiguration {
+	b.Name = &value
+	return b
+}
+
+// WithObjective sets the Objective field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Objective field is set to the value of the last call.
+func (b *ModelApplyConfiguration) WithObjective(value *ObjectiveApplyConfiguration) *ModelApplyConfiguration {
+	b.Objective = value
+	return b
+}
+
+// WithTargetModels adds the given value to the TargetModels field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the TargetModels field.
+func (b *ModelApplyConfiguration) WithTargetModels(values ...*TargetModelApplyConfiguration) *ModelApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithTargetModels")
+		}
+		b.TargetModels = append(b.TargetModels, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/objective.go b/client-go/applyconfiguration/api/v1alpha1/objective.go
new file mode 100644
index 00000000..a6c04a39
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/objective.go
@@ -0,0 +1,42 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	time "time"
+)
+
+// ObjectiveApplyConfiguration represents a declarative configuration of the Objective type for use
+// with apply.
+type ObjectiveApplyConfiguration struct {
+	DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests *time.Duration `json:"desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests,omitempty"`
+}
+
+// ObjectiveApplyConfiguration constructs a declarative configuration of the Objective type for use with
+// apply.
+func Objective() *ObjectiveApplyConfiguration {
+	return &ObjectiveApplyConfiguration{}
+}
+
+// WithDesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests sets the DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests field is set to the value of the last call.
+func (b *ObjectiveApplyConfiguration) WithDesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests(value time.Duration) *ObjectiveApplyConfiguration {
+	b.DesiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha1/targetmodel.go b/client-go/applyconfiguration/api/v1alpha1/targetmodel.go
new file mode 100644
index 00000000..557196bc
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha1/targetmodel.go
@@ -0,0 +1,47 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha1
+
+// TargetModelApplyConfiguration represents a declarative configuration of the TargetModel type for use
+// with apply.
+type TargetModelApplyConfiguration struct {
+	Name   *string `json:"name,omitempty"`
+	Weight *int    `json:"weight,omitempty"`
+}
+
+// TargetModelApplyConfiguration constructs a declarative configuration of the TargetModel type for use with
+// apply.
+func TargetModel() *TargetModelApplyConfiguration {
+	return &TargetModelApplyConfiguration{}
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *TargetModelApplyConfiguration) WithName(value string) *TargetModelApplyConfiguration {
+	b.Name = &value
+	return b
+}
+
+// WithWeight sets the Weight field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Weight field is set to the value of the last call.
+func (b *TargetModelApplyConfiguration) WithWeight(value int) *TargetModelApplyConfiguration {
+	b.Weight = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/internal/internal.go b/client-go/applyconfiguration/internal/internal.go
new file mode 100644
index 00000000..8f24c8ba
--- /dev/null
+++ b/client-go/applyconfiguration/internal/internal.go
@@ -0,0 +1,61 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package internal
+
+import (
+	fmt "fmt"
+	sync "sync"
+
+	typed "sigs.k8s.io/structured-merge-diff/v4/typed"
+)
+
+func Parser() *typed.Parser {
+	parserOnce.Do(func() {
+		var err error
+		parser, err = typed.NewParser(schemaYAML)
+		if err != nil {
+			panic(fmt.Sprintf("Failed to parse schema: %v", err))
+		}
+	})
+	return parser
+}
+
+var parserOnce sync.Once
+var parser *typed.Parser
+var schemaYAML = typed.YAMLObject(`types:
+- name: __untyped_atomic_
+  scalar: untyped
+  list:
+    elementType:
+      namedType: __untyped_atomic_
+    elementRelationship: atomic
+  map:
+    elementType:
+      namedType: __untyped_atomic_
+    elementRelationship: atomic
+- name: __untyped_deduced_
+  scalar: untyped
+  list:
+    elementType:
+      namedType: __untyped_atomic_
+    elementRelationship: atomic
+  map:
+    elementType:
+      namedType: __untyped_deduced_
+    elementRelationship: separable
+`)
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
new file mode 100644
index 00000000..ab022626
--- /dev/null
+++ b/client-go/applyconfiguration/utils.go
@@ -0,0 +1,59 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package applyconfiguration
+
+import (
+	v1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration/api/v1alpha1"
+	internal "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration/internal"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	schema "k8s.io/apimachinery/pkg/runtime/schema"
+	testing "k8s.io/client-go/testing"
+)
+
+// ForKind returns an apply configuration type for the given GroupVersionKind, or nil if no
+// apply configuration type exists for the given GroupVersionKind.
+func ForKind(kind schema.GroupVersionKind) interface{} {
+	switch kind {
+	// Group=api, Version=v1alpha1
+	case v1alpha1.SchemeGroupVersion.WithKind("LLMServerPool"):
+		return &apiv1alpha1.LLMServerPoolApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("LLMServerPoolSpec"):
+		return &apiv1alpha1.LLMServerPoolSpecApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("LLMServerPoolStatus"):
+		return &apiv1alpha1.LLMServerPoolStatusApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("LLMService"):
+		return &apiv1alpha1.LLMServiceApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("LLMServiceSpec"):
+		return &apiv1alpha1.LLMServiceSpecApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("LLMServiceStatus"):
+		return &apiv1alpha1.LLMServiceStatusApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("Model"):
+		return &apiv1alpha1.ModelApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("Objective"):
+		return &apiv1alpha1.ObjectiveApplyConfiguration{}
+	case v1alpha1.SchemeGroupVersion.WithKind("TargetModel"):
+		return &apiv1alpha1.TargetModelApplyConfiguration{}
+
+	}
+	return nil
+}
+
+func NewTypeConverter(scheme *runtime.Scheme) *testing.TypeConverter {
+	return &testing.TypeConverter{Scheme: scheme, TypeResolver: internal.Parser()}
+}
diff --git a/client-go/clientset/versioned/clientset.go b/client-go/clientset/versioned/clientset.go
new file mode 100644
index 00000000..506bce0e
--- /dev/null
+++ b/client-go/clientset/versioned/clientset.go
@@ -0,0 +1,119 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package versioned
+
+import (
+	fmt "fmt"
+	http "net/http"
+
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/typed/api/v1alpha1"
+	discovery "k8s.io/client-go/discovery"
+	rest "k8s.io/client-go/rest"
+	flowcontrol "k8s.io/client-go/util/flowcontrol"
+)
+
+type Interface interface {
+	Discovery() discovery.DiscoveryInterface
+	ApiV1alpha1() apiv1alpha1.ApiV1alpha1Interface
+}
+
+// Clientset contains the clients for groups.
+type Clientset struct {
+	*discovery.DiscoveryClient
+	apiV1alpha1 *apiv1alpha1.ApiV1alpha1Client
+}
+
+// ApiV1alpha1 retrieves the ApiV1alpha1Client
+func (c *Clientset) ApiV1alpha1() apiv1alpha1.ApiV1alpha1Interface {
+	return c.apiV1alpha1
+}
+
+// Discovery retrieves the DiscoveryClient
+func (c *Clientset) Discovery() discovery.DiscoveryInterface {
+	if c == nil {
+		return nil
+	}
+	return c.DiscoveryClient
+}
+
+// NewForConfig creates a new Clientset for the given config.
+// If config's RateLimiter is not set and QPS and Burst are acceptable,
+// NewForConfig will generate a rate-limiter in configShallowCopy.
+// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
+// where httpClient was generated with rest.HTTPClientFor(c).
+func NewForConfig(c *rest.Config) (*Clientset, error) {
+	configShallowCopy := *c
+
+	if configShallowCopy.UserAgent == "" {
+		configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent()
+	}
+
+	// share the transport between all clients
+	httpClient, err := rest.HTTPClientFor(&configShallowCopy)
+	if err != nil {
+		return nil, err
+	}
+
+	return NewForConfigAndClient(&configShallowCopy, httpClient)
+}
+
+// NewForConfigAndClient creates a new Clientset for the given config and http client.
+// Note the http client provided takes precedence over the configured transport values.
+// If config's RateLimiter is not set and QPS and Burst are acceptable,
+// NewForConfigAndClient will generate a rate-limiter in configShallowCopy.
+func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) {
+	configShallowCopy := *c
+	if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 {
+		if configShallowCopy.Burst <= 0 {
+			return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0")
+		}
+		configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst)
+	}
+
+	var cs Clientset
+	var err error
+	cs.apiV1alpha1, err = apiv1alpha1.NewForConfigAndClient(&configShallowCopy, httpClient)
+	if err != nil {
+		return nil, err
+	}
+
+	cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient)
+	if err != nil {
+		return nil, err
+	}
+	return &cs, nil
+}
+
+// NewForConfigOrDie creates a new Clientset for the given config and
+// panics if there is an error in the config.
+func NewForConfigOrDie(c *rest.Config) *Clientset {
+	cs, err := NewForConfig(c)
+	if err != nil {
+		panic(err)
+	}
+	return cs
+}
+
+// New creates a new Clientset for the given RESTClient.
+func New(c rest.Interface) *Clientset {
+	var cs Clientset
+	cs.apiV1alpha1 = apiv1alpha1.New(c)
+
+	cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
+	return &cs
+}
diff --git a/client-go/clientset/versioned/fake/clientset_generated.go b/client-go/clientset/versioned/fake/clientset_generated.go
new file mode 100644
index 00000000..fd4ad406
--- /dev/null
+++ b/client-go/clientset/versioned/fake/clientset_generated.go
@@ -0,0 +1,121 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	applyconfiguration "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration"
+	clientset "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned"
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/typed/api/v1alpha1"
+	fakeapiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/typed/api/v1alpha1/fake"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/client-go/discovery"
+	fakediscovery "k8s.io/client-go/discovery/fake"
+	"k8s.io/client-go/testing"
+)
+
+// NewSimpleClientset returns a clientset that will respond with the provided objects.
+// It's backed by a very simple object tracker that processes creates, updates and deletions as-is,
+// without applying any field management, validations and/or defaults. It shouldn't be considered a replacement
+// for a real clientset and is mostly useful in simple unit tests.
+//
+// DEPRECATED: NewClientset replaces this with support for field management, which significantly improves
+// server side apply testing. NewClientset is only available when apply configurations are generated (e.g.
+// via --with-applyconfig).
+func NewSimpleClientset(objects ...runtime.Object) *Clientset {
+	o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder())
+	for _, obj := range objects {
+		if err := o.Add(obj); err != nil {
+			panic(err)
+		}
+	}
+
+	cs := &Clientset{tracker: o}
+	cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake}
+	cs.AddReactor("*", "*", testing.ObjectReaction(o))
+	cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) {
+		gvr := action.GetResource()
+		ns := action.GetNamespace()
+		watch, err := o.Watch(gvr, ns)
+		if err != nil {
+			return false, nil, err
+		}
+		return true, watch, nil
+	})
+
+	return cs
+}
+
+// Clientset implements clientset.Interface. Meant to be embedded into a
+// struct to get a default implementation. This makes faking out just the method
+// you want to test easier.
+type Clientset struct {
+	testing.Fake
+	discovery *fakediscovery.FakeDiscovery
+	tracker   testing.ObjectTracker
+}
+
+func (c *Clientset) Discovery() discovery.DiscoveryInterface {
+	return c.discovery
+}
+
+func (c *Clientset) Tracker() testing.ObjectTracker {
+	return c.tracker
+}
+
+// NewClientset returns a clientset that will respond with the provided objects.
+// It's backed by a very simple object tracker that processes creates, updates and deletions as-is,
+// without applying any validations and/or defaults. It shouldn't be considered a replacement
+// for a real clientset and is mostly useful in simple unit tests.
+func NewClientset(objects ...runtime.Object) *Clientset {
+	o := testing.NewFieldManagedObjectTracker(
+		scheme,
+		codecs.UniversalDecoder(),
+		applyconfiguration.NewTypeConverter(scheme),
+	)
+	for _, obj := range objects {
+		if err := o.Add(obj); err != nil {
+			panic(err)
+		}
+	}
+
+	cs := &Clientset{tracker: o}
+	cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake}
+	cs.AddReactor("*", "*", testing.ObjectReaction(o))
+	cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) {
+		gvr := action.GetResource()
+		ns := action.GetNamespace()
+		watch, err := o.Watch(gvr, ns)
+		if err != nil {
+			return false, nil, err
+		}
+		return true, watch, nil
+	})
+
+	return cs
+}
+
+var (
+	_ clientset.Interface = &Clientset{}
+	_ testing.FakeClient  = &Clientset{}
+)
+
+// ApiV1alpha1 retrieves the ApiV1alpha1Client
+func (c *Clientset) ApiV1alpha1() apiv1alpha1.ApiV1alpha1Interface {
+	return &fakeapiv1alpha1.FakeApiV1alpha1{Fake: &c.Fake}
+}
diff --git a/client-go/clientset/versioned/fake/doc.go b/client-go/clientset/versioned/fake/doc.go
new file mode 100644
index 00000000..57c8c1bc
--- /dev/null
+++ b/client-go/clientset/versioned/fake/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package has the automatically generated fake clientset.
+package fake
diff --git a/client-go/clientset/versioned/fake/register.go b/client-go/clientset/versioned/fake/register.go
new file mode 100644
index 00000000..6c06233c
--- /dev/null
+++ b/client-go/clientset/versioned/fake/register.go
@@ -0,0 +1,55 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	schema "k8s.io/apimachinery/pkg/runtime/schema"
+	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+)
+
+var scheme = runtime.NewScheme()
+var codecs = serializer.NewCodecFactory(scheme)
+
+var localSchemeBuilder = runtime.SchemeBuilder{
+	apiv1alpha1.AddToScheme,
+}
+
+// AddToScheme adds all types of this clientset into the given scheme. This allows composition
+// of clientsets, like in:
+//
+//	import (
+//	  "k8s.io/client-go/kubernetes"
+//	  clientsetscheme "k8s.io/client-go/kubernetes/scheme"
+//	  aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
+//	)
+//
+//	kclientset, _ := kubernetes.NewForConfig(c)
+//	_ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
+//
+// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
+// correctly.
+var AddToScheme = localSchemeBuilder.AddToScheme
+
+func init() {
+	v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"})
+	utilruntime.Must(AddToScheme(scheme))
+}
diff --git a/client-go/clientset/versioned/scheme/doc.go b/client-go/clientset/versioned/scheme/doc.go
new file mode 100644
index 00000000..b00a2a62
--- /dev/null
+++ b/client-go/clientset/versioned/scheme/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package contains the scheme of the automatically generated clientset.
+package scheme
diff --git a/client-go/clientset/versioned/scheme/register.go b/client-go/clientset/versioned/scheme/register.go
new file mode 100644
index 00000000..b2c6025d
--- /dev/null
+++ b/client-go/clientset/versioned/scheme/register.go
@@ -0,0 +1,55 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package scheme
+
+import (
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	schema "k8s.io/apimachinery/pkg/runtime/schema"
+	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+)
+
+var Scheme = runtime.NewScheme()
+var Codecs = serializer.NewCodecFactory(Scheme)
+var ParameterCodec = runtime.NewParameterCodec(Scheme)
+var localSchemeBuilder = runtime.SchemeBuilder{
+	apiv1alpha1.AddToScheme,
+}
+
+// AddToScheme adds all types of this clientset into the given scheme. This allows composition
+// of clientsets, like in:
+//
+//	import (
+//	  "k8s.io/client-go/kubernetes"
+//	  clientsetscheme "k8s.io/client-go/kubernetes/scheme"
+//	  aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
+//	)
+//
+//	kclientset, _ := kubernetes.NewForConfig(c)
+//	_ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
+//
+// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
+// correctly.
+var AddToScheme = localSchemeBuilder.AddToScheme
+
+func init() {
+	v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"})
+	utilruntime.Must(AddToScheme(Scheme))
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/api_client.go b/client-go/clientset/versioned/typed/api/v1alpha1/api_client.go
new file mode 100644
index 00000000..e4ad9b19
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/api_client.go
@@ -0,0 +1,111 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	http "net/http"
+
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	scheme "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/scheme"
+	rest "k8s.io/client-go/rest"
+)
+
+type ApiV1alpha1Interface interface {
+	RESTClient() rest.Interface
+	LLMServerPoolsGetter
+	LLMServicesGetter
+}
+
+// ApiV1alpha1Client is used to interact with features provided by the api group.
+type ApiV1alpha1Client struct {
+	restClient rest.Interface
+}
+
+func (c *ApiV1alpha1Client) LLMServerPools(namespace string) LLMServerPoolInterface {
+	return newLLMServerPools(c, namespace)
+}
+
+func (c *ApiV1alpha1Client) LLMServices(namespace string) LLMServiceInterface {
+	return newLLMServices(c, namespace)
+}
+
+// NewForConfig creates a new ApiV1alpha1Client for the given config.
+// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
+// where httpClient was generated with rest.HTTPClientFor(c).
+func NewForConfig(c *rest.Config) (*ApiV1alpha1Client, error) {
+	config := *c
+	if err := setConfigDefaults(&config); err != nil {
+		return nil, err
+	}
+	httpClient, err := rest.HTTPClientFor(&config)
+	if err != nil {
+		return nil, err
+	}
+	return NewForConfigAndClient(&config, httpClient)
+}
+
+// NewForConfigAndClient creates a new ApiV1alpha1Client for the given config and http client.
+// Note the http client provided takes precedence over the configured transport values.
+func NewForConfigAndClient(c *rest.Config, h *http.Client) (*ApiV1alpha1Client, error) {
+	config := *c
+	if err := setConfigDefaults(&config); err != nil {
+		return nil, err
+	}
+	client, err := rest.RESTClientForConfigAndClient(&config, h)
+	if err != nil {
+		return nil, err
+	}
+	return &ApiV1alpha1Client{client}, nil
+}
+
+// NewForConfigOrDie creates a new ApiV1alpha1Client for the given config and
+// panics if there is an error in the config.
+func NewForConfigOrDie(c *rest.Config) *ApiV1alpha1Client {
+	client, err := NewForConfig(c)
+	if err != nil {
+		panic(err)
+	}
+	return client
+}
+
+// New creates a new ApiV1alpha1Client for the given RESTClient.
+func New(c rest.Interface) *ApiV1alpha1Client {
+	return &ApiV1alpha1Client{c}
+}
+
+func setConfigDefaults(config *rest.Config) error {
+	gv := apiv1alpha1.SchemeGroupVersion
+	config.GroupVersion = &gv
+	config.APIPath = "/apis"
+	config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
+
+	if config.UserAgent == "" {
+		config.UserAgent = rest.DefaultKubernetesUserAgent()
+	}
+
+	return nil
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *ApiV1alpha1Client) RESTClient() rest.Interface {
+	if c == nil {
+		return nil
+	}
+	return c.restClient
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/doc.go b/client-go/clientset/versioned/typed/api/v1alpha1/doc.go
new file mode 100644
index 00000000..c0c36e9b
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package has the automatically generated typed clients.
+package v1alpha1
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go
new file mode 100644
index 00000000..3514f7d2
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+// Package fake has the automatically generated clients.
+package fake
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go
new file mode 100644
index 00000000..3dd733d8
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	v1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/typed/api/v1alpha1"
+	rest "k8s.io/client-go/rest"
+	testing "k8s.io/client-go/testing"
+)
+
+type FakeApiV1alpha1 struct {
+	*testing.Fake
+}
+
+func (c *FakeApiV1alpha1) LLMServerPools(namespace string) v1alpha1.LLMServerPoolInterface {
+	return &FakeLLMServerPools{c, namespace}
+}
+
+func (c *FakeApiV1alpha1) LLMServices(namespace string) v1alpha1.LLMServiceInterface {
+	return &FakeLLMServices{c, namespace}
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *FakeApiV1alpha1) RESTClient() rest.Interface {
+	var ret *rest.RESTClient
+	return ret
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_llmserverpool.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_llmserverpool.go
new file mode 100644
index 00000000..9ccf801c
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_llmserverpool.go
@@ -0,0 +1,196 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	context "context"
+	json "encoding/json"
+	fmt "fmt"
+
+	v1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration/api/v1alpha1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	testing "k8s.io/client-go/testing"
+)
+
+// FakeLLMServerPools implements LLMServerPoolInterface
+type FakeLLMServerPools struct {
+	Fake *FakeApiV1alpha1
+	ns   string
+}
+
+var llmserverpoolsResource = v1alpha1.SchemeGroupVersion.WithResource("llmserverpools")
+
+var llmserverpoolsKind = v1alpha1.SchemeGroupVersion.WithKind("LLMServerPool")
+
+// Get takes name of the lLMServerPool, and returns the corresponding lLMServerPool object, and an error if there is any.
+func (c *FakeLLMServerPools) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.LLMServerPool, err error) {
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewGetActionWithOptions(llmserverpoolsResource, c.ns, name, options), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
+
+// List takes label and field selectors, and returns the list of LLMServerPools that match those selectors.
+func (c *FakeLLMServerPools) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.LLMServerPoolList, err error) {
+	emptyResult := &v1alpha1.LLMServerPoolList{}
+	obj, err := c.Fake.
+		Invokes(testing.NewListActionWithOptions(llmserverpoolsResource, llmserverpoolsKind, c.ns, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+
+	label, _, _ := testing.ExtractFromListOptions(opts)
+	if label == nil {
+		label = labels.Everything()
+	}
+	list := &v1alpha1.LLMServerPoolList{ListMeta: obj.(*v1alpha1.LLMServerPoolList).ListMeta}
+	for _, item := range obj.(*v1alpha1.LLMServerPoolList).Items {
+		if label.Matches(labels.Set(item.Labels)) {
+			list.Items = append(list.Items, item)
+		}
+	}
+	return list, err
+}
+
+// Watch returns a watch.Interface that watches the requested lLMServerPools.
+func (c *FakeLLMServerPools) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) {
+	return c.Fake.
+		InvokesWatch(testing.NewWatchActionWithOptions(llmserverpoolsResource, c.ns, opts))
+
+}
+
+// Create takes the representation of a lLMServerPool and creates it.  Returns the server's representation of the lLMServerPool, and an error, if there is any.
+func (c *FakeLLMServerPools) Create(ctx context.Context, lLMServerPool *v1alpha1.LLMServerPool, opts v1.CreateOptions) (result *v1alpha1.LLMServerPool, err error) {
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewCreateActionWithOptions(llmserverpoolsResource, c.ns, lLMServerPool, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
+
+// Update takes the representation of a lLMServerPool and updates it. Returns the server's representation of the lLMServerPool, and an error, if there is any.
+func (c *FakeLLMServerPools) Update(ctx context.Context, lLMServerPool *v1alpha1.LLMServerPool, opts v1.UpdateOptions) (result *v1alpha1.LLMServerPool, err error) {
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewUpdateActionWithOptions(llmserverpoolsResource, c.ns, lLMServerPool, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
+
+// UpdateStatus was generated because the type contains a Status member.
+// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+func (c *FakeLLMServerPools) UpdateStatus(ctx context.Context, lLMServerPool *v1alpha1.LLMServerPool, opts v1.UpdateOptions) (result *v1alpha1.LLMServerPool, err error) {
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewUpdateSubresourceActionWithOptions(llmserverpoolsResource, "status", c.ns, lLMServerPool, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
+
+// Delete takes name of the lLMServerPool and deletes it. Returns an error if one occurs.
+func (c *FakeLLMServerPools) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
+	_, err := c.Fake.
+		Invokes(testing.NewDeleteActionWithOptions(llmserverpoolsResource, c.ns, name, opts), &v1alpha1.LLMServerPool{})
+
+	return err
+}
+
+// DeleteCollection deletes a collection of objects.
+func (c *FakeLLMServerPools) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error {
+	action := testing.NewDeleteCollectionActionWithOptions(llmserverpoolsResource, c.ns, opts, listOpts)
+
+	_, err := c.Fake.Invokes(action, &v1alpha1.LLMServerPoolList{})
+	return err
+}
+
+// Patch applies the patch and returns the patched lLMServerPool.
+func (c *FakeLLMServerPools) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.LLMServerPool, err error) {
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewPatchSubresourceActionWithOptions(llmserverpoolsResource, c.ns, name, pt, data, opts, subresources...), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
+
+// Apply takes the given apply declarative configuration, applies it and returns the applied lLMServerPool.
+func (c *FakeLLMServerPools) Apply(ctx context.Context, lLMServerPool *apiv1alpha1.LLMServerPoolApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.LLMServerPool, err error) {
+	if lLMServerPool == nil {
+		return nil, fmt.Errorf("lLMServerPool provided to Apply must not be nil")
+	}
+	data, err := json.Marshal(lLMServerPool)
+	if err != nil {
+		return nil, err
+	}
+	name := lLMServerPool.Name
+	if name == nil {
+		return nil, fmt.Errorf("lLMServerPool.Name must be provided to Apply")
+	}
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewPatchSubresourceActionWithOptions(llmserverpoolsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
+
+// ApplyStatus was generated because the type contains a Status member.
+// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
+func (c *FakeLLMServerPools) ApplyStatus(ctx context.Context, lLMServerPool *apiv1alpha1.LLMServerPoolApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.LLMServerPool, err error) {
+	if lLMServerPool == nil {
+		return nil, fmt.Errorf("lLMServerPool provided to Apply must not be nil")
+	}
+	data, err := json.Marshal(lLMServerPool)
+	if err != nil {
+		return nil, err
+	}
+	name := lLMServerPool.Name
+	if name == nil {
+		return nil, fmt.Errorf("lLMServerPool.Name must be provided to Apply")
+	}
+	emptyResult := &v1alpha1.LLMServerPool{}
+	obj, err := c.Fake.
+		Invokes(testing.NewPatchSubresourceActionWithOptions(llmserverpoolsResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMServerPool), err
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_llmservice.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_llmservice.go
new file mode 100644
index 00000000..9e86cd94
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_llmservice.go
@@ -0,0 +1,196 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	context "context"
+	json "encoding/json"
+	fmt "fmt"
+
+	v1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration/api/v1alpha1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	testing "k8s.io/client-go/testing"
+)
+
+// FakeLLMServices implements LLMServiceInterface
+type FakeLLMServices struct {
+	Fake *FakeApiV1alpha1
+	ns   string
+}
+
+var llmservicesResource = v1alpha1.SchemeGroupVersion.WithResource("llmservices")
+
+var llmservicesKind = v1alpha1.SchemeGroupVersion.WithKind("LLMService")
+
+// Get takes name of the lLMService, and returns the corresponding lLMService object, and an error if there is any.
+func (c *FakeLLMServices) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.LLMService, err error) {
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewGetActionWithOptions(llmservicesResource, c.ns, name, options), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
+
+// List takes label and field selectors, and returns the list of LLMServices that match those selectors.
+func (c *FakeLLMServices) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.LLMServiceList, err error) {
+	emptyResult := &v1alpha1.LLMServiceList{}
+	obj, err := c.Fake.
+		Invokes(testing.NewListActionWithOptions(llmservicesResource, llmservicesKind, c.ns, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+
+	label, _, _ := testing.ExtractFromListOptions(opts)
+	if label == nil {
+		label = labels.Everything()
+	}
+	list := &v1alpha1.LLMServiceList{ListMeta: obj.(*v1alpha1.LLMServiceList).ListMeta}
+	for _, item := range obj.(*v1alpha1.LLMServiceList).Items {
+		if label.Matches(labels.Set(item.Labels)) {
+			list.Items = append(list.Items, item)
+		}
+	}
+	return list, err
+}
+
+// Watch returns a watch.Interface that watches the requested lLMServices.
+func (c *FakeLLMServices) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) {
+	return c.Fake.
+		InvokesWatch(testing.NewWatchActionWithOptions(llmservicesResource, c.ns, opts))
+
+}
+
+// Create takes the representation of a lLMService and creates it.  Returns the server's representation of the lLMService, and an error, if there is any.
+func (c *FakeLLMServices) Create(ctx context.Context, lLMService *v1alpha1.LLMService, opts v1.CreateOptions) (result *v1alpha1.LLMService, err error) {
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewCreateActionWithOptions(llmservicesResource, c.ns, lLMService, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
+
+// Update takes the representation of a lLMService and updates it. Returns the server's representation of the lLMService, and an error, if there is any.
+func (c *FakeLLMServices) Update(ctx context.Context, lLMService *v1alpha1.LLMService, opts v1.UpdateOptions) (result *v1alpha1.LLMService, err error) {
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewUpdateActionWithOptions(llmservicesResource, c.ns, lLMService, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
+
+// UpdateStatus was generated because the type contains a Status member.
+// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+func (c *FakeLLMServices) UpdateStatus(ctx context.Context, lLMService *v1alpha1.LLMService, opts v1.UpdateOptions) (result *v1alpha1.LLMService, err error) {
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewUpdateSubresourceActionWithOptions(llmservicesResource, "status", c.ns, lLMService, opts), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
+
+// Delete takes name of the lLMService and deletes it. Returns an error if one occurs.
+func (c *FakeLLMServices) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
+	_, err := c.Fake.
+		Invokes(testing.NewDeleteActionWithOptions(llmservicesResource, c.ns, name, opts), &v1alpha1.LLMService{})
+
+	return err
+}
+
+// DeleteCollection deletes a collection of objects.
+func (c *FakeLLMServices) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error {
+	action := testing.NewDeleteCollectionActionWithOptions(llmservicesResource, c.ns, opts, listOpts)
+
+	_, err := c.Fake.Invokes(action, &v1alpha1.LLMServiceList{})
+	return err
+}
+
+// Patch applies the patch and returns the patched lLMService.
+func (c *FakeLLMServices) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.LLMService, err error) {
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewPatchSubresourceActionWithOptions(llmservicesResource, c.ns, name, pt, data, opts, subresources...), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
+
+// Apply takes the given apply declarative configuration, applies it and returns the applied lLMService.
+func (c *FakeLLMServices) Apply(ctx context.Context, lLMService *apiv1alpha1.LLMServiceApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.LLMService, err error) {
+	if lLMService == nil {
+		return nil, fmt.Errorf("lLMService provided to Apply must not be nil")
+	}
+	data, err := json.Marshal(lLMService)
+	if err != nil {
+		return nil, err
+	}
+	name := lLMService.Name
+	if name == nil {
+		return nil, fmt.Errorf("lLMService.Name must be provided to Apply")
+	}
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewPatchSubresourceActionWithOptions(llmservicesResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions()), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
+
+// ApplyStatus was generated because the type contains a Status member.
+// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
+func (c *FakeLLMServices) ApplyStatus(ctx context.Context, lLMService *apiv1alpha1.LLMServiceApplyConfiguration, opts v1.ApplyOptions) (result *v1alpha1.LLMService, err error) {
+	if lLMService == nil {
+		return nil, fmt.Errorf("lLMService provided to Apply must not be nil")
+	}
+	data, err := json.Marshal(lLMService)
+	if err != nil {
+		return nil, err
+	}
+	name := lLMService.Name
+	if name == nil {
+		return nil, fmt.Errorf("lLMService.Name must be provided to Apply")
+	}
+	emptyResult := &v1alpha1.LLMService{}
+	obj, err := c.Fake.
+		Invokes(testing.NewPatchSubresourceActionWithOptions(llmservicesResource, c.ns, *name, types.ApplyPatchType, data, opts.ToPatchOptions(), "status"), emptyResult)
+
+	if obj == nil {
+		return emptyResult, err
+	}
+	return obj.(*v1alpha1.LLMService), err
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go b/client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go
new file mode 100644
index 00000000..671db271
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go
@@ -0,0 +1,22 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+type LLMServerPoolExpansion interface{}
+
+type LLMServiceExpansion interface{}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/llmserverpool.go b/client-go/clientset/versioned/typed/api/v1alpha1/llmserverpool.go
new file mode 100644
index 00000000..2e863410
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/llmserverpool.go
@@ -0,0 +1,72 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	context "context"
+
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	applyconfigurationapiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration/api/v1alpha1"
+	scheme "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/scheme"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	gentype "k8s.io/client-go/gentype"
+)
+
+// LLMServerPoolsGetter has a method to return a LLMServerPoolInterface.
+// A group's client should implement this interface.
+type LLMServerPoolsGetter interface {
+	LLMServerPools(namespace string) LLMServerPoolInterface
+}
+
+// LLMServerPoolInterface has methods to work with LLMServerPool resources.
+type LLMServerPoolInterface interface {
+	Create(ctx context.Context, lLMServerPool *apiv1alpha1.LLMServerPool, opts v1.CreateOptions) (*apiv1alpha1.LLMServerPool, error)
+	Update(ctx context.Context, lLMServerPool *apiv1alpha1.LLMServerPool, opts v1.UpdateOptions) (*apiv1alpha1.LLMServerPool, error)
+	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+	UpdateStatus(ctx context.Context, lLMServerPool *apiv1alpha1.LLMServerPool, opts v1.UpdateOptions) (*apiv1alpha1.LLMServerPool, error)
+	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
+	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha1.LLMServerPool, error)
+	List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha1.LLMServerPoolList, error)
+	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha1.LLMServerPool, err error)
+	Apply(ctx context.Context, lLMServerPool *applyconfigurationapiv1alpha1.LLMServerPoolApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.LLMServerPool, err error)
+	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
+	ApplyStatus(ctx context.Context, lLMServerPool *applyconfigurationapiv1alpha1.LLMServerPoolApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.LLMServerPool, err error)
+	LLMServerPoolExpansion
+}
+
+// lLMServerPools implements LLMServerPoolInterface
+type lLMServerPools struct {
+	*gentype.ClientWithListAndApply[*apiv1alpha1.LLMServerPool, *apiv1alpha1.LLMServerPoolList, *applyconfigurationapiv1alpha1.LLMServerPoolApplyConfiguration]
+}
+
+// newLLMServerPools returns a LLMServerPools
+func newLLMServerPools(c *ApiV1alpha1Client, namespace string) *lLMServerPools {
+	return &lLMServerPools{
+		gentype.NewClientWithListAndApply[*apiv1alpha1.LLMServerPool, *apiv1alpha1.LLMServerPoolList, *applyconfigurationapiv1alpha1.LLMServerPoolApplyConfiguration](
+			"llmserverpools",
+			c.RESTClient(),
+			scheme.ParameterCodec,
+			namespace,
+			func() *apiv1alpha1.LLMServerPool { return &apiv1alpha1.LLMServerPool{} },
+			func() *apiv1alpha1.LLMServerPoolList { return &apiv1alpha1.LLMServerPoolList{} }),
+	}
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/llmservice.go b/client-go/clientset/versioned/typed/api/v1alpha1/llmservice.go
new file mode 100644
index 00000000..c7832baa
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha1/llmservice.go
@@ -0,0 +1,72 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	context "context"
+
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	applyconfigurationapiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/applyconfiguration/api/v1alpha1"
+	scheme "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned/scheme"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	gentype "k8s.io/client-go/gentype"
+)
+
+// LLMServicesGetter has a method to return a LLMServiceInterface.
+// A group's client should implement this interface.
+type LLMServicesGetter interface {
+	LLMServices(namespace string) LLMServiceInterface
+}
+
+// LLMServiceInterface has methods to work with LLMService resources.
+type LLMServiceInterface interface {
+	Create(ctx context.Context, lLMService *apiv1alpha1.LLMService, opts v1.CreateOptions) (*apiv1alpha1.LLMService, error)
+	Update(ctx context.Context, lLMService *apiv1alpha1.LLMService, opts v1.UpdateOptions) (*apiv1alpha1.LLMService, error)
+	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+	UpdateStatus(ctx context.Context, lLMService *apiv1alpha1.LLMService, opts v1.UpdateOptions) (*apiv1alpha1.LLMService, error)
+	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
+	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha1.LLMService, error)
+	List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha1.LLMServiceList, error)
+	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha1.LLMService, err error)
+	Apply(ctx context.Context, lLMService *applyconfigurationapiv1alpha1.LLMServiceApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.LLMService, err error)
+	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
+	ApplyStatus(ctx context.Context, lLMService *applyconfigurationapiv1alpha1.LLMServiceApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.LLMService, err error)
+	LLMServiceExpansion
+}
+
+// lLMServices implements LLMServiceInterface
+type lLMServices struct {
+	*gentype.ClientWithListAndApply[*apiv1alpha1.LLMService, *apiv1alpha1.LLMServiceList, *applyconfigurationapiv1alpha1.LLMServiceApplyConfiguration]
+}
+
+// newLLMServices returns a LLMServices
+func newLLMServices(c *ApiV1alpha1Client, namespace string) *lLMServices {
+	return &lLMServices{
+		gentype.NewClientWithListAndApply[*apiv1alpha1.LLMService, *apiv1alpha1.LLMServiceList, *applyconfigurationapiv1alpha1.LLMServiceApplyConfiguration](
+			"llmservices",
+			c.RESTClient(),
+			scheme.ParameterCodec,
+			namespace,
+			func() *apiv1alpha1.LLMService { return &apiv1alpha1.LLMService{} },
+			func() *apiv1alpha1.LLMServiceList { return &apiv1alpha1.LLMServiceList{} }),
+	}
+}
diff --git a/client-go/informers/externalversions/api/interface.go b/client-go/informers/externalversions/api/interface.go
new file mode 100644
index 00000000..1a47e2c7
--- /dev/null
+++ b/client-go/informers/externalversions/api/interface.go
@@ -0,0 +1,45 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package api
+
+import (
+	v1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/api/v1alpha1"
+	internalinterfaces "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/internalinterfaces"
+)
+
+// Interface provides access to each of this group's versions.
+type Interface interface {
+	// V1alpha1 provides access to shared informers for resources in V1alpha1.
+	V1alpha1() v1alpha1.Interface
+}
+
+type group struct {
+	factory          internalinterfaces.SharedInformerFactory
+	namespace        string
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+}
+
+// New returns a new Interface.
+func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
+	return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
+}
+
+// V1alpha1 returns a new v1alpha1.Interface.
+func (g *group) V1alpha1() v1alpha1.Interface {
+	return v1alpha1.New(g.factory, g.namespace, g.tweakListOptions)
+}
diff --git a/client-go/informers/externalversions/api/v1alpha1/interface.go b/client-go/informers/externalversions/api/v1alpha1/interface.go
new file mode 100644
index 00000000..70bb8e46
--- /dev/null
+++ b/client-go/informers/externalversions/api/v1alpha1/interface.go
@@ -0,0 +1,51 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	internalinterfaces "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/internalinterfaces"
+)
+
+// Interface provides access to all the informers in this group version.
+type Interface interface {
+	// LLMServerPools returns a LLMServerPoolInformer.
+	LLMServerPools() LLMServerPoolInformer
+	// LLMServices returns a LLMServiceInformer.
+	LLMServices() LLMServiceInformer
+}
+
+type version struct {
+	factory          internalinterfaces.SharedInformerFactory
+	namespace        string
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+}
+
+// New returns a new Interface.
+func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
+	return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
+}
+
+// LLMServerPools returns a LLMServerPoolInformer.
+func (v *version) LLMServerPools() LLMServerPoolInformer {
+	return &lLMServerPoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
+}
+
+// LLMServices returns a LLMServiceInformer.
+func (v *version) LLMServices() LLMServiceInformer {
+	return &lLMServiceInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
+}
diff --git a/client-go/informers/externalversions/api/v1alpha1/llmserverpool.go b/client-go/informers/externalversions/api/v1alpha1/llmserverpool.go
new file mode 100644
index 00000000..e08bb227
--- /dev/null
+++ b/client-go/informers/externalversions/api/v1alpha1/llmserverpool.go
@@ -0,0 +1,89 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	context "context"
+	time "time"
+
+	llminstancegatewayapiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	versioned "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned"
+	internalinterfaces "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/internalinterfaces"
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/listers/api/v1alpha1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	watch "k8s.io/apimachinery/pkg/watch"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// LLMServerPoolInformer provides access to a shared informer and lister for
+// LLMServerPools.
+type LLMServerPoolInformer interface {
+	Informer() cache.SharedIndexInformer
+	Lister() apiv1alpha1.LLMServerPoolLister
+}
+
+type lLMServerPoolInformer struct {
+	factory          internalinterfaces.SharedInformerFactory
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+	namespace        string
+}
+
+// NewLLMServerPoolInformer constructs a new informer for LLMServerPool type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewLLMServerPoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
+	return NewFilteredLLMServerPoolInformer(client, namespace, resyncPeriod, indexers, nil)
+}
+
+// NewFilteredLLMServerPoolInformer constructs a new informer for LLMServerPool type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewFilteredLLMServerPoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
+	return cache.NewSharedIndexInformer(
+		&cache.ListWatch{
+			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.ApiV1alpha1().LLMServerPools(namespace).List(context.TODO(), options)
+			},
+			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.ApiV1alpha1().LLMServerPools(namespace).Watch(context.TODO(), options)
+			},
+		},
+		&llminstancegatewayapiv1alpha1.LLMServerPool{},
+		resyncPeriod,
+		indexers,
+	)
+}
+
+func (f *lLMServerPoolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
+	return NewFilteredLLMServerPoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
+}
+
+func (f *lLMServerPoolInformer) Informer() cache.SharedIndexInformer {
+	return f.factory.InformerFor(&llminstancegatewayapiv1alpha1.LLMServerPool{}, f.defaultInformer)
+}
+
+func (f *lLMServerPoolInformer) Lister() apiv1alpha1.LLMServerPoolLister {
+	return apiv1alpha1.NewLLMServerPoolLister(f.Informer().GetIndexer())
+}
diff --git a/client-go/informers/externalversions/api/v1alpha1/llmservice.go b/client-go/informers/externalversions/api/v1alpha1/llmservice.go
new file mode 100644
index 00000000..7b91175c
--- /dev/null
+++ b/client-go/informers/externalversions/api/v1alpha1/llmservice.go
@@ -0,0 +1,89 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	context "context"
+	time "time"
+
+	llminstancegatewayapiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	versioned "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned"
+	internalinterfaces "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/internalinterfaces"
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/client-go/listers/api/v1alpha1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	watch "k8s.io/apimachinery/pkg/watch"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// LLMServiceInformer provides access to a shared informer and lister for
+// LLMServices.
+type LLMServiceInformer interface {
+	Informer() cache.SharedIndexInformer
+	Lister() apiv1alpha1.LLMServiceLister
+}
+
+type lLMServiceInformer struct {
+	factory          internalinterfaces.SharedInformerFactory
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+	namespace        string
+}
+
+// NewLLMServiceInformer constructs a new informer for LLMService type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewLLMServiceInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
+	return NewFilteredLLMServiceInformer(client, namespace, resyncPeriod, indexers, nil)
+}
+
+// NewFilteredLLMServiceInformer constructs a new informer for LLMService type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewFilteredLLMServiceInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
+	return cache.NewSharedIndexInformer(
+		&cache.ListWatch{
+			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.ApiV1alpha1().LLMServices(namespace).List(context.TODO(), options)
+			},
+			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.ApiV1alpha1().LLMServices(namespace).Watch(context.TODO(), options)
+			},
+		},
+		&llminstancegatewayapiv1alpha1.LLMService{},
+		resyncPeriod,
+		indexers,
+	)
+}
+
+func (f *lLMServiceInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
+	return NewFilteredLLMServiceInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
+}
+
+func (f *lLMServiceInformer) Informer() cache.SharedIndexInformer {
+	return f.factory.InformerFor(&llminstancegatewayapiv1alpha1.LLMService{}, f.defaultInformer)
+}
+
+func (f *lLMServiceInformer) Lister() apiv1alpha1.LLMServiceLister {
+	return apiv1alpha1.NewLLMServiceLister(f.Informer().GetIndexer())
+}
diff --git a/client-go/informers/externalversions/factory.go b/client-go/informers/externalversions/factory.go
new file mode 100644
index 00000000..7c167d1f
--- /dev/null
+++ b/client-go/informers/externalversions/factory.go
@@ -0,0 +1,261 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package externalversions
+
+import (
+	reflect "reflect"
+	sync "sync"
+	time "time"
+
+	versioned "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned"
+	api "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/api"
+	internalinterfaces "inference.networking.x-k8s.io/llm-instance-gateway/client-go/informers/externalversions/internalinterfaces"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	schema "k8s.io/apimachinery/pkg/runtime/schema"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// SharedInformerOption defines the functional option type for SharedInformerFactory.
+type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory
+
+type sharedInformerFactory struct {
+	client           versioned.Interface
+	namespace        string
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+	lock             sync.Mutex
+	defaultResync    time.Duration
+	customResync     map[reflect.Type]time.Duration
+	transform        cache.TransformFunc
+
+	informers map[reflect.Type]cache.SharedIndexInformer
+	// startedInformers is used for tracking which informers have been started.
+	// This allows Start() to be called multiple times safely.
+	startedInformers map[reflect.Type]bool
+	// wg tracks how many goroutines were started.
+	wg sync.WaitGroup
+	// shuttingDown is true when Shutdown has been called. It may still be running
+	// because it needs to wait for goroutines.
+	shuttingDown bool
+}
+
+// WithCustomResyncConfig sets a custom resync period for the specified informer types.
+func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption {
+	return func(factory *sharedInformerFactory) *sharedInformerFactory {
+		for k, v := range resyncConfig {
+			factory.customResync[reflect.TypeOf(k)] = v
+		}
+		return factory
+	}
+}
+
+// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory.
+func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption {
+	return func(factory *sharedInformerFactory) *sharedInformerFactory {
+		factory.tweakListOptions = tweakListOptions
+		return factory
+	}
+}
+
+// WithNamespace limits the SharedInformerFactory to the specified namespace.
+func WithNamespace(namespace string) SharedInformerOption {
+	return func(factory *sharedInformerFactory) *sharedInformerFactory {
+		factory.namespace = namespace
+		return factory
+	}
+}
+
+// WithTransform sets a transform on all informers.
+func WithTransform(transform cache.TransformFunc) SharedInformerOption {
+	return func(factory *sharedInformerFactory) *sharedInformerFactory {
+		factory.transform = transform
+		return factory
+	}
+}
+
+// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces.
+func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory {
+	return NewSharedInformerFactoryWithOptions(client, defaultResync)
+}
+
+// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory.
+// Listers obtained via this SharedInformerFactory will be subject to the same filters
+// as specified here.
+// Deprecated: Please use NewSharedInformerFactoryWithOptions instead
+func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory {
+	return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions))
+}
+
+// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options.
+func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory {
+	factory := &sharedInformerFactory{
+		client:           client,
+		namespace:        v1.NamespaceAll,
+		defaultResync:    defaultResync,
+		informers:        make(map[reflect.Type]cache.SharedIndexInformer),
+		startedInformers: make(map[reflect.Type]bool),
+		customResync:     make(map[reflect.Type]time.Duration),
+	}
+
+	// Apply all options
+	for _, opt := range options {
+		factory = opt(factory)
+	}
+
+	return factory
+}
+
+func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if f.shuttingDown {
+		return
+	}
+
+	for informerType, informer := range f.informers {
+		if !f.startedInformers[informerType] {
+			f.wg.Add(1)
+			// We need a new variable in each loop iteration,
+			// otherwise the goroutine would use the loop variable
+			// and that keeps changing.
+			informer := informer
+			go func() {
+				defer f.wg.Done()
+				informer.Run(stopCh)
+			}()
+			f.startedInformers[informerType] = true
+		}
+	}
+}
+
+func (f *sharedInformerFactory) Shutdown() {
+	f.lock.Lock()
+	f.shuttingDown = true
+	f.lock.Unlock()
+
+	// Will return immediately if there is nothing to wait for.
+	f.wg.Wait()
+}
+
+func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool {
+	informers := func() map[reflect.Type]cache.SharedIndexInformer {
+		f.lock.Lock()
+		defer f.lock.Unlock()
+
+		informers := map[reflect.Type]cache.SharedIndexInformer{}
+		for informerType, informer := range f.informers {
+			if f.startedInformers[informerType] {
+				informers[informerType] = informer
+			}
+		}
+		return informers
+	}()
+
+	res := map[reflect.Type]bool{}
+	for informType, informer := range informers {
+		res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced)
+	}
+	return res
+}
+
+// InformerFor returns the SharedIndexInformer for obj using an internal
+// client.
+func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	informerType := reflect.TypeOf(obj)
+	informer, exists := f.informers[informerType]
+	if exists {
+		return informer
+	}
+
+	resyncPeriod, exists := f.customResync[informerType]
+	if !exists {
+		resyncPeriod = f.defaultResync
+	}
+
+	informer = newFunc(f.client, resyncPeriod)
+	informer.SetTransform(f.transform)
+	f.informers[informerType] = informer
+
+	return informer
+}
+
+// SharedInformerFactory provides shared informers for resources in all known
+// API group versions.
+//
+// It is typically used like this:
+//
+//	ctx, cancel := context.Background()
+//	defer cancel()
+//	factory := NewSharedInformerFactory(client, resyncPeriod)
+//	defer factory.WaitForStop()    // Returns immediately if nothing was started.
+//	genericInformer := factory.ForResource(resource)
+//	typedInformer := factory.SomeAPIGroup().V1().SomeType()
+//	factory.Start(ctx.Done())          // Start processing these informers.
+//	synced := factory.WaitForCacheSync(ctx.Done())
+//	for v, ok := range synced {
+//	    if !ok {
+//	        fmt.Fprintf(os.Stderr, "caches failed to sync: %v", v)
+//	        return
+//	    }
+//	}
+//
+//	// Creating informers can also be created after Start, but then
+//	// Start must be called again:
+//	anotherGenericInformer := factory.ForResource(resource)
+//	factory.Start(ctx.Done())
+type SharedInformerFactory interface {
+	internalinterfaces.SharedInformerFactory
+
+	// Start initializes all requested informers. They are handled in goroutines
+	// which run until the stop channel gets closed.
+	// Warning: Start does not block. When run in a go-routine, it will race with a later WaitForCacheSync.
+	Start(stopCh <-chan struct{})
+
+	// Shutdown marks a factory as shutting down. At that point no new
+	// informers can be started anymore and Start will return without
+	// doing anything.
+	//
+	// In addition, Shutdown blocks until all goroutines have terminated. For that
+	// to happen, the close channel(s) that they were started with must be closed,
+	// either before Shutdown gets called or while it is waiting.
+	//
+	// Shutdown may be called multiple times, even concurrently. All such calls will
+	// block until all goroutines have terminated.
+	Shutdown()
+
+	// WaitForCacheSync blocks until all started informers' caches were synced
+	// or the stop channel gets closed.
+	WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool
+
+	// ForResource gives generic access to a shared informer of the matching type.
+	ForResource(resource schema.GroupVersionResource) (GenericInformer, error)
+
+	// InformerFor returns the SharedIndexInformer for obj using an internal
+	// client.
+	InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer
+
+	Api() api.Interface
+}
+
+func (f *sharedInformerFactory) Api() api.Interface {
+	return api.New(f, f.namespace, f.tweakListOptions)
+}
diff --git a/client-go/informers/externalversions/generic.go b/client-go/informers/externalversions/generic.go
new file mode 100644
index 00000000..f3a43267
--- /dev/null
+++ b/client-go/informers/externalversions/generic.go
@@ -0,0 +1,63 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package externalversions
+
+import (
+	fmt "fmt"
+
+	v1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	schema "k8s.io/apimachinery/pkg/runtime/schema"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// GenericInformer is type of SharedIndexInformer which will locate and delegate to other
+// sharedInformers based on type
+type GenericInformer interface {
+	Informer() cache.SharedIndexInformer
+	Lister() cache.GenericLister
+}
+
+type genericInformer struct {
+	informer cache.SharedIndexInformer
+	resource schema.GroupResource
+}
+
+// Informer returns the SharedIndexInformer.
+func (f *genericInformer) Informer() cache.SharedIndexInformer {
+	return f.informer
+}
+
+// Lister returns the GenericLister.
+func (f *genericInformer) Lister() cache.GenericLister {
+	return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource)
+}
+
+// ForResource gives generic access to a shared informer of the matching type
+// TODO extend this to unknown resources with a client pool
+func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) {
+	switch resource {
+	// Group=api, Version=v1alpha1
+	case v1alpha1.SchemeGroupVersion.WithResource("llmserverpools"):
+		return &genericInformer{resource: resource.GroupResource(), informer: f.Api().V1alpha1().LLMServerPools().Informer()}, nil
+	case v1alpha1.SchemeGroupVersion.WithResource("llmservices"):
+		return &genericInformer{resource: resource.GroupResource(), informer: f.Api().V1alpha1().LLMServices().Informer()}, nil
+
+	}
+
+	return nil, fmt.Errorf("no informer found for %v", resource)
+}
diff --git a/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go b/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go
new file mode 100644
index 00000000..7fa481a4
--- /dev/null
+++ b/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go
@@ -0,0 +1,39 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package internalinterfaces
+
+import (
+	time "time"
+
+	versioned "inference.networking.x-k8s.io/llm-instance-gateway/client-go/clientset/versioned"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer.
+type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer
+
+// SharedInformerFactory a small interface to allow for adding an informer without an import cycle
+type SharedInformerFactory interface {
+	Start(stopCh <-chan struct{})
+	InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer
+}
+
+// TweakListOptionsFunc is a function that transforms a v1.ListOptions.
+type TweakListOptionsFunc func(*v1.ListOptions)
diff --git a/client-go/listers/api/v1alpha1/expansion_generated.go b/client-go/listers/api/v1alpha1/expansion_generated.go
new file mode 100644
index 00000000..d764fe40
--- /dev/null
+++ b/client-go/listers/api/v1alpha1/expansion_generated.go
@@ -0,0 +1,34 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1alpha1
+
+// LLMServerPoolListerExpansion allows custom methods to be added to
+// LLMServerPoolLister.
+type LLMServerPoolListerExpansion interface{}
+
+// LLMServerPoolNamespaceListerExpansion allows custom methods to be added to
+// LLMServerPoolNamespaceLister.
+type LLMServerPoolNamespaceListerExpansion interface{}
+
+// LLMServiceListerExpansion allows custom methods to be added to
+// LLMServiceLister.
+type LLMServiceListerExpansion interface{}
+
+// LLMServiceNamespaceListerExpansion allows custom methods to be added to
+// LLMServiceNamespaceLister.
+type LLMServiceNamespaceListerExpansion interface{}
diff --git a/client-go/listers/api/v1alpha1/llmserverpool.go b/client-go/listers/api/v1alpha1/llmserverpool.go
new file mode 100644
index 00000000..6cde0df4
--- /dev/null
+++ b/client-go/listers/api/v1alpha1/llmserverpool.go
@@ -0,0 +1,69 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// LLMServerPoolLister helps list LLMServerPools.
+// All objects returned here must be treated as read-only.
+type LLMServerPoolLister interface {
+	// List lists all LLMServerPools in the indexer.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha1.LLMServerPool, err error)
+	// LLMServerPools returns an object that can list and get LLMServerPools.
+	LLMServerPools(namespace string) LLMServerPoolNamespaceLister
+	LLMServerPoolListerExpansion
+}
+
+// lLMServerPoolLister implements the LLMServerPoolLister interface.
+type lLMServerPoolLister struct {
+	listers.ResourceIndexer[*apiv1alpha1.LLMServerPool]
+}
+
+// NewLLMServerPoolLister returns a new LLMServerPoolLister.
+func NewLLMServerPoolLister(indexer cache.Indexer) LLMServerPoolLister {
+	return &lLMServerPoolLister{listers.New[*apiv1alpha1.LLMServerPool](indexer, apiv1alpha1.Resource("llmserverpool"))}
+}
+
+// LLMServerPools returns an object that can list and get LLMServerPools.
+func (s *lLMServerPoolLister) LLMServerPools(namespace string) LLMServerPoolNamespaceLister {
+	return lLMServerPoolNamespaceLister{listers.NewNamespaced[*apiv1alpha1.LLMServerPool](s.ResourceIndexer, namespace)}
+}
+
+// LLMServerPoolNamespaceLister helps list and get LLMServerPools.
+// All objects returned here must be treated as read-only.
+type LLMServerPoolNamespaceLister interface {
+	// List lists all LLMServerPools in the indexer for a given namespace.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha1.LLMServerPool, err error)
+	// Get retrieves the LLMServerPool from the indexer for a given namespace and name.
+	// Objects returned here must be treated as read-only.
+	Get(name string) (*apiv1alpha1.LLMServerPool, error)
+	LLMServerPoolNamespaceListerExpansion
+}
+
+// lLMServerPoolNamespaceLister implements the LLMServerPoolNamespaceLister
+// interface.
+type lLMServerPoolNamespaceLister struct {
+	listers.ResourceIndexer[*apiv1alpha1.LLMServerPool]
+}
diff --git a/client-go/listers/api/v1alpha1/llmservice.go b/client-go/listers/api/v1alpha1/llmservice.go
new file mode 100644
index 00000000..8dc76fcf
--- /dev/null
+++ b/client-go/listers/api/v1alpha1/llmservice.go
@@ -0,0 +1,69 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	apiv1alpha1 "inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
+)
+
+// LLMServiceLister helps list LLMServices.
+// All objects returned here must be treated as read-only.
+type LLMServiceLister interface {
+	// List lists all LLMServices in the indexer.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha1.LLMService, err error)
+	// LLMServices returns an object that can list and get LLMServices.
+	LLMServices(namespace string) LLMServiceNamespaceLister
+	LLMServiceListerExpansion
+}
+
+// lLMServiceLister implements the LLMServiceLister interface.
+type lLMServiceLister struct {
+	listers.ResourceIndexer[*apiv1alpha1.LLMService]
+}
+
+// NewLLMServiceLister returns a new LLMServiceLister.
+func NewLLMServiceLister(indexer cache.Indexer) LLMServiceLister {
+	return &lLMServiceLister{listers.New[*apiv1alpha1.LLMService](indexer, apiv1alpha1.Resource("llmservice"))}
+}
+
+// LLMServices returns an object that can list and get LLMServices.
+func (s *lLMServiceLister) LLMServices(namespace string) LLMServiceNamespaceLister {
+	return lLMServiceNamespaceLister{listers.NewNamespaced[*apiv1alpha1.LLMService](s.ResourceIndexer, namespace)}
+}
+
+// LLMServiceNamespaceLister helps list and get LLMServices.
+// All objects returned here must be treated as read-only.
+type LLMServiceNamespaceLister interface {
+	// List lists all LLMServices in the indexer for a given namespace.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha1.LLMService, err error)
+	// Get retrieves the LLMService from the indexer for a given namespace and name.
+	// Objects returned here must be treated as read-only.
+	Get(name string) (*apiv1alpha1.LLMService, error)
+	LLMServiceNamespaceListerExpansion
+}
+
+// lLMServiceNamespaceLister implements the LLMServiceNamespaceLister
+// interface.
+type lLMServiceNamespaceLister struct {
+	listers.ResourceIndexer[*apiv1alpha1.LLMService]
+}
diff --git a/config/crd/bases/gateway.inference.k8s.io_llmserverpools.yaml b/config/crd/bases/gateway.inference.k8s.io_llmserverpools.yaml
new file mode 100644
index 00000000..56bb3ff0
--- /dev/null
+++ b/config/crd/bases/gateway.inference.k8s.io_llmserverpools.yaml
@@ -0,0 +1,158 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.1
+  name: llmserverpools.gateway.inference.networking.x-k8s.io
+spec:
+  group: gateway.inference.networking.x-k8s.io
+  names:
+    kind: LLMServerPool
+    listKind: LLMServerPoolList
+    plural: llmserverpools
+    singular: llmserverpool
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: LLMServerPool is the Schema for the llmserverpools API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: LLMServerPoolSpec defines the desired state of LLMServerPool
+            properties:
+              modelServerSelector:
+                description: |-
+                  ModelServerSelector uses label selection to watch model server pods
+                  that should be included in the LLMServerPool. ModelServers should not
+                  be with any other Service or LLMServerPool, that behavior is not supported
+                  and will result in sub-optimal utilization.
+                properties:
+                  matchExpressions:
+                    description: matchExpressions is a list of label selector requirements.
+                      The requirements are ANDed.
+                    items:
+                      description: |-
+                        A label selector requirement is a selector that contains values, a key, and an operator that
+                        relates the key and values.
+                      properties:
+                        key:
+                          description: key is the label key that the selector applies
+                            to.
+                          type: string
+                        operator:
+                          description: |-
+                            operator represents a key's relationship to a set of values.
+                            Valid operators are In, NotIn, Exists and DoesNotExist.
+                          type: string
+                        values:
+                          description: |-
+                            values is an array of string values. If the operator is In or NotIn,
+                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                            the values array must be empty. This array is replaced during a strategic
+                            merge patch.
+                          items:
+                            type: string
+                          type: array
+                          x-kubernetes-list-type: atomic
+                      required:
+                      - key
+                      - operator
+                      type: object
+                    type: array
+                    x-kubernetes-list-type: atomic
+                  matchLabels:
+                    additionalProperties:
+                      type: string
+                    description: |-
+                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                      map is equivalent to an element of matchExpressions, whose key field is "key", the
+                      operator is "In", and the values array contains only "value". The requirements are ANDed.
+                    type: object
+                type: object
+                x-kubernetes-map-type: atomic
+            type: object
+          status:
+            description: LLMServerPoolStatus defines the observed state of LLMServerPool
+            properties:
+              conditions:
+                description: Conditions track the state of the LLMServerPool.
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/gateway.inference.k8s.io_llmservices.yaml b/config/crd/bases/gateway.inference.k8s.io_llmservices.yaml
new file mode 100644
index 00000000..b9a1a80e
--- /dev/null
+++ b/config/crd/bases/gateway.inference.k8s.io_llmservices.yaml
@@ -0,0 +1,239 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.1
+  name: llmservices.gateway.inference.networking.x-k8s.io
+spec:
+  group: gateway.inference.networking.x-k8s.io
+  names:
+    kind: LLMService
+    listKind: LLMServiceList
+    plural: llmservices
+    singular: llmservice
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: LLMService is the Schema for the llmservices API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: |-
+              LLMService represents a set of LLM services that are multiplexed onto one
+              or more LLMServerPools. This resource is managed by the "LLM Service Owner"
+              persona. The Service Owner persona is: a team that trains, verifies, and
+              leverages a large language model from a model frontend, drives the lifecycle
+              and rollout of new versions of those models, and defines the specific
+              performance and latency goals for the model. These services are
+              expected to operate within a LLMServerPool sharing compute capacity with other
+              LLMServices, defined by the Inference Platform Admin. We allow a user who
+              has multiple LLMServices across multiple pools (with the same config) to
+              specify the configuration exactly once, and deploy to many pools
+              simultaneously. Enabling a simpler config and single source of truth
+              for a given user. LLMService names are unique for a given LLMServerPool,
+              if the name is reused, an error will be  shown on the status of a
+              LLMService that attempted to reuse. The oldest LLMService, based on
+              creation timestamp, will be selected to remain valid. In the event of a race
+              condition, one will be selected at random.
+            properties:
+              models:
+                description: |-
+                  Model defines the distinct services.
+                  Model can be in 2 priority classes, Critical and Noncritical.
+                  Priority class is implicitly set to Critical by specifying an Objective.
+                  Otherwise the Model is considered Noncritical.
+                items:
+                  description: |-
+                    Model defines the policies for routing the traffic of a use case, this includes performance objectives
+                    and traffic splitting between different versions of the model.
+                  properties:
+                    name:
+                      description: |-
+                        The name of the model as the users set in the "model" parameter in the requests.
+                        The name should be unique among the services that reference the same backend pool.
+                        This is the parameter that will be used to match the request with. In the future, we may
+                        allow to match on other request parameters. The other approach to support matching on
+                        on other request parameters is to use a different ModelName per HTTPFilter.
+                        Names can be reserved without implementing an actual model in the pool.
+                        This can be done by specifying a target model and setting the weight to zero,
+                        an error will be returned specifying that no valid target model is found.
+                      type: string
+                    objective:
+                      description: |-
+                        Optional
+                        LLM Services with an objective have higher priority than services without.
+                        IMPORTANT: By specifying an objective, this places the LLMService in a higher priority class than LLMServices without a defined priority class.
+                        In the face of resource-scarcity. Higher priority requests will be preserved, and lower priority class requests will be rejected.
+                      properties:
+                        desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests:
+                          description: |-
+                            The AverageLatencyPerOutputToken is calculated as the e2e request latency divided by output token
+                            length. Note that this is different from what is known as TPOT (time per output token) which only
+                            takes decode time into account.
+                            The P95 is calculated over a fixed time window defined at the operator level.
+                          format: int64
+                          type: integer
+                      type: object
+                    targetModels:
+                      description: |-
+                        Optional.
+                        Allow multiple versions of a model for traffic splitting.
+                        If not specified, the target model name is defaulted to the modelName parameter.
+                        modelName is often in reference to a LoRA adapter.
+                      items:
+                        description: |-
+                          TargetModel represents a deployed model or a LoRA adapter. The
+                          Name field is expected to match the name of the LoRA adapter
+                          (or base model) as it is registered within the model server. Inference
+                          Gateway assumes that the model exists on the model server and is the
+                          responsibility of the user to validate a correct match. Should a model fail
+                          to exist at request time, the error is processed by the Instance Gateway,
+                          and then emitted on the appropriate LLMService object.
+                        properties:
+                          name:
+                            description: The name of the adapter as expected by the
+                              ModelServer.
+                            type: string
+                          weight:
+                            description: |-
+                              Weight is used to determine the percentage of traffic that should be
+                              sent to this target model when multiple versions of the model are specified.
+                            type: integer
+                        type: object
+                      type: array
+                  type: object
+                type: array
+              poolRef:
+                description: PoolRef are references to the backend pools that the
+                  LLMService registers to.
+                items:
+                  description: ObjectReference contains enough information to let
+                    you inspect or modify the referred object.
+                  properties:
+                    apiVersion:
+                      description: API version of the referent.
+                      type: string
+                    fieldPath:
+                      description: |-
+                        If referring to a piece of an object instead of an entire object, this string
+                        should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+                        For example, if the object reference is to a container within a pod, this would take on a value like:
+                        "spec.containers{name}" (where "name" refers to the name of the container that triggered
+                        the event) or if no container name is specified "spec.containers[2]" (container with
+                        index 2 in this pod). This syntax is chosen only to have some well-defined way of
+                        referencing a part of an object.
+                      type: string
+                    kind:
+                      description: |-
+                        Kind of the referent.
+                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+                      type: string
+                    name:
+                      description: |-
+                        Name of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                      type: string
+                    namespace:
+                      description: |-
+                        Namespace of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+                      type: string
+                    resourceVersion:
+                      description: |-
+                        Specific resourceVersion to which this reference is made, if any.
+                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+                      type: string
+                    uid:
+                      description: |-
+                        UID of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+                      type: string
+                  type: object
+                  x-kubernetes-map-type: atomic
+                type: array
+            type: object
+          status:
+            description: LLMServiceStatus defines the observed state of LLMService
+            properties:
+              conditions:
+                description: Conditions track the state of the LLMServerPool.
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml b/config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml
new file mode 100644
index 00000000..56bb3ff0
--- /dev/null
+++ b/config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml
@@ -0,0 +1,158 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.1
+  name: llmserverpools.gateway.inference.networking.x-k8s.io
+spec:
+  group: gateway.inference.networking.x-k8s.io
+  names:
+    kind: LLMServerPool
+    listKind: LLMServerPoolList
+    plural: llmserverpools
+    singular: llmserverpool
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: LLMServerPool is the Schema for the llmserverpools API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: LLMServerPoolSpec defines the desired state of LLMServerPool
+            properties:
+              modelServerSelector:
+                description: |-
+                  ModelServerSelector uses label selection to watch model server pods
+                  that should be included in the LLMServerPool. ModelServers should not
+                  be with any other Service or LLMServerPool, that behavior is not supported
+                  and will result in sub-optimal utilization.
+                properties:
+                  matchExpressions:
+                    description: matchExpressions is a list of label selector requirements.
+                      The requirements are ANDed.
+                    items:
+                      description: |-
+                        A label selector requirement is a selector that contains values, a key, and an operator that
+                        relates the key and values.
+                      properties:
+                        key:
+                          description: key is the label key that the selector applies
+                            to.
+                          type: string
+                        operator:
+                          description: |-
+                            operator represents a key's relationship to a set of values.
+                            Valid operators are In, NotIn, Exists and DoesNotExist.
+                          type: string
+                        values:
+                          description: |-
+                            values is an array of string values. If the operator is In or NotIn,
+                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                            the values array must be empty. This array is replaced during a strategic
+                            merge patch.
+                          items:
+                            type: string
+                          type: array
+                          x-kubernetes-list-type: atomic
+                      required:
+                      - key
+                      - operator
+                      type: object
+                    type: array
+                    x-kubernetes-list-type: atomic
+                  matchLabels:
+                    additionalProperties:
+                      type: string
+                    description: |-
+                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                      map is equivalent to an element of matchExpressions, whose key field is "key", the
+                      operator is "In", and the values array contains only "value". The requirements are ANDed.
+                    type: object
+                type: object
+                x-kubernetes-map-type: atomic
+            type: object
+          status:
+            description: LLMServerPoolStatus defines the observed state of LLMServerPool
+            properties:
+              conditions:
+                description: Conditions track the state of the LLMServerPool.
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/gateway.inference.networking.x-k8s.io_llmservices.yaml b/config/crd/bases/gateway.inference.networking.x-k8s.io_llmservices.yaml
new file mode 100644
index 00000000..b9a1a80e
--- /dev/null
+++ b/config/crd/bases/gateway.inference.networking.x-k8s.io_llmservices.yaml
@@ -0,0 +1,239 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.1
+  name: llmservices.gateway.inference.networking.x-k8s.io
+spec:
+  group: gateway.inference.networking.x-k8s.io
+  names:
+    kind: LLMService
+    listKind: LLMServiceList
+    plural: llmservices
+    singular: llmservice
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: LLMService is the Schema for the llmservices API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: |-
+              LLMService represents a set of LLM services that are multiplexed onto one
+              or more LLMServerPools. This resource is managed by the "LLM Service Owner"
+              persona. The Service Owner persona is: a team that trains, verifies, and
+              leverages a large language model from a model frontend, drives the lifecycle
+              and rollout of new versions of those models, and defines the specific
+              performance and latency goals for the model. These services are
+              expected to operate within a LLMServerPool sharing compute capacity with other
+              LLMServices, defined by the Inference Platform Admin. We allow a user who
+              has multiple LLMServices across multiple pools (with the same config) to
+              specify the configuration exactly once, and deploy to many pools
+              simultaneously. Enabling a simpler config and single source of truth
+              for a given user. LLMService names are unique for a given LLMServerPool,
+              if the name is reused, an error will be  shown on the status of a
+              LLMService that attempted to reuse. The oldest LLMService, based on
+              creation timestamp, will be selected to remain valid. In the event of a race
+              condition, one will be selected at random.
+            properties:
+              models:
+                description: |-
+                  Model defines the distinct services.
+                  Model can be in 2 priority classes, Critical and Noncritical.
+                  Priority class is implicitly set to Critical by specifying an Objective.
+                  Otherwise the Model is considered Noncritical.
+                items:
+                  description: |-
+                    Model defines the policies for routing the traffic of a use case, this includes performance objectives
+                    and traffic splitting between different versions of the model.
+                  properties:
+                    name:
+                      description: |-
+                        The name of the model as the users set in the "model" parameter in the requests.
+                        The name should be unique among the services that reference the same backend pool.
+                        This is the parameter that will be used to match the request with. In the future, we may
+                        allow to match on other request parameters. The other approach to support matching on
+                        on other request parameters is to use a different ModelName per HTTPFilter.
+                        Names can be reserved without implementing an actual model in the pool.
+                        This can be done by specifying a target model and setting the weight to zero,
+                        an error will be returned specifying that no valid target model is found.
+                      type: string
+                    objective:
+                      description: |-
+                        Optional
+                        LLM Services with an objective have higher priority than services without.
+                        IMPORTANT: By specifying an objective, this places the LLMService in a higher priority class than LLMServices without a defined priority class.
+                        In the face of resource-scarcity. Higher priority requests will be preserved, and lower priority class requests will be rejected.
+                      properties:
+                        desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests:
+                          description: |-
+                            The AverageLatencyPerOutputToken is calculated as the e2e request latency divided by output token
+                            length. Note that this is different from what is known as TPOT (time per output token) which only
+                            takes decode time into account.
+                            The P95 is calculated over a fixed time window defined at the operator level.
+                          format: int64
+                          type: integer
+                      type: object
+                    targetModels:
+                      description: |-
+                        Optional.
+                        Allow multiple versions of a model for traffic splitting.
+                        If not specified, the target model name is defaulted to the modelName parameter.
+                        modelName is often in reference to a LoRA adapter.
+                      items:
+                        description: |-
+                          TargetModel represents a deployed model or a LoRA adapter. The
+                          Name field is expected to match the name of the LoRA adapter
+                          (or base model) as it is registered within the model server. Inference
+                          Gateway assumes that the model exists on the model server and is the
+                          responsibility of the user to validate a correct match. Should a model fail
+                          to exist at request time, the error is processed by the Instance Gateway,
+                          and then emitted on the appropriate LLMService object.
+                        properties:
+                          name:
+                            description: The name of the adapter as expected by the
+                              ModelServer.
+                            type: string
+                          weight:
+                            description: |-
+                              Weight is used to determine the percentage of traffic that should be
+                              sent to this target model when multiple versions of the model are specified.
+                            type: integer
+                        type: object
+                      type: array
+                  type: object
+                type: array
+              poolRef:
+                description: PoolRef are references to the backend pools that the
+                  LLMService registers to.
+                items:
+                  description: ObjectReference contains enough information to let
+                    you inspect or modify the referred object.
+                  properties:
+                    apiVersion:
+                      description: API version of the referent.
+                      type: string
+                    fieldPath:
+                      description: |-
+                        If referring to a piece of an object instead of an entire object, this string
+                        should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+                        For example, if the object reference is to a container within a pod, this would take on a value like:
+                        "spec.containers{name}" (where "name" refers to the name of the container that triggered
+                        the event) or if no container name is specified "spec.containers[2]" (container with
+                        index 2 in this pod). This syntax is chosen only to have some well-defined way of
+                        referencing a part of an object.
+                      type: string
+                    kind:
+                      description: |-
+                        Kind of the referent.
+                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+                      type: string
+                    name:
+                      description: |-
+                        Name of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                      type: string
+                    namespace:
+                      description: |-
+                        Namespace of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+                      type: string
+                    resourceVersion:
+                      description: |-
+                        Specific resourceVersion to which this reference is made, if any.
+                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+                      type: string
+                    uid:
+                      description: |-
+                        UID of the referent.
+                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+                      type: string
+                  type: object
+                  x-kubernetes-map-type: atomic
+                type: array
+            type: object
+          status:
+            description: LLMServiceStatus defines the observed state of LLMService
+            properties:
+              conditions:
+                description: Conditions track the state of the LLMServerPool.
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml
new file mode 100644
index 00000000..2a0bcfcb
--- /dev/null
+++ b/config/crd/kustomization.yaml
@@ -0,0 +1,24 @@
+# This kustomization.yaml is not intended to be run by itself,
+# since it depends on service name and namespace that are out of this kustomize package.
+# It should be run by config/default
+resources:
+- bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml
+- bases/gateway.inference.networking.x-k8s.io_llmservices.yaml
+# +kubebuilder:scaffold:crdkustomizeresource
+
+patches:
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
+# patches here are for enabling the conversion webhook for each CRD
+# +kubebuilder:scaffold:crdkustomizewebhookpatch
+
+# [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix.
+# patches here are for enabling the CA injection for each CRD
+#- path: patches/cainjection_in_llmserverpools.yaml
+#- path: patches/cainjection_in_llmservices.yaml
+# +kubebuilder:scaffold:crdkustomizecainjectionpatch
+
+# [WEBHOOK] To enable webhook, uncomment the following section
+# the following config is for teaching kustomize how to do kustomization for CRDs.
+
+#configurations:
+#- kustomizeconfig.yaml
diff --git a/config/crd/kustomizeconfig.yaml b/config/crd/kustomizeconfig.yaml
new file mode 100644
index 00000000..ec5c150a
--- /dev/null
+++ b/config/crd/kustomizeconfig.yaml
@@ -0,0 +1,19 @@
+# This file is for teaching kustomize how to substitute name and namespace reference in CRD
+nameReference:
+- kind: Service
+  version: v1
+  fieldSpecs:
+  - kind: CustomResourceDefinition
+    version: v1
+    group: apiextensions.k8s.io
+    path: spec/conversion/webhook/clientConfig/service/name
+
+namespace:
+- kind: CustomResourceDefinition
+  version: v1
+  group: apiextensions.k8s.io
+  path: spec/conversion/webhook/clientConfig/service/namespace
+  create: false
+
+varReference:
+- path: metadata/annotations
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
new file mode 100644
index 00000000..1fd9939f
--- /dev/null
+++ b/config/default/kustomization.yaml
@@ -0,0 +1,151 @@
+# Adds namespace to all resources.
+namespace: api-system
+
+# Value of this field is prepended to the
+# names of all resources, e.g. a deployment named
+# "wordpress" becomes "alices-wordpress".
+# Note that it should also match with the prefix (text before '-') of the namespace
+# field above.
+namePrefix: api-
+
+# Labels to add to all resources and selectors.
+#labels:
+#- includeSelectors: true
+#  pairs:
+#    someName: someValue
+
+resources:
+- ../crd
+- ../rbac
+- ../manager
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+# crd/kustomization.yaml
+#- ../webhook
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
+#- ../certmanager
+# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
+#- ../prometheus
+# [METRICS] Expose the controller manager metrics service.
+- metrics_service.yaml
+# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
+# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
+# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
+# be able to communicate with the Webhook Server.
+#- ../network-policy
+
+# Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager
+patches:
+# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
+# More info: https://book.kubebuilder.io/reference/metrics
+- path: manager_metrics_patch.yaml
+  target:
+    kind: Deployment
+
+# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
+# crd/kustomization.yaml
+#- path: manager_webhook_patch.yaml
+
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'.
+# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks.
+# 'CERTMANAGER' needs to be enabled to use ca injection
+#- path: webhookcainjection_patch.yaml
+
+# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
+# Uncomment the following replacements to add the cert-manager CA injection annotations
+#replacements:
+#  - source: # Add cert-manager annotation to ValidatingWebhookConfiguration, MutatingWebhookConfiguration and CRDs
+#      kind: Certificate
+#      group: cert-manager.io
+#      version: v1
+#      name: serving-cert # this name should match the one in certificate.yaml
+#      fieldPath: .metadata.namespace # namespace of the certificate CR
+#    targets:
+#      - select:
+#          kind: ValidatingWebhookConfiguration
+#        fieldPaths:
+#          - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#        options:
+#          delimiter: '/'
+#          index: 0
+#          create: true
+#      - select:
+#          kind: MutatingWebhookConfiguration
+#        fieldPaths:
+#          - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#        options:
+#          delimiter: '/'
+#          index: 0
+#          create: true
+#      - select:
+#          kind: CustomResourceDefinition
+#        fieldPaths:
+#          - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#        options:
+#          delimiter: '/'
+#          index: 0
+#          create: true
+#  - source:
+#      kind: Certificate
+#      group: cert-manager.io
+#      version: v1
+#      name: serving-cert # this name should match the one in certificate.yaml
+#      fieldPath: .metadata.name
+#    targets:
+#      - select:
+#          kind: ValidatingWebhookConfiguration
+#        fieldPaths:
+#          - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#        options:
+#          delimiter: '/'
+#          index: 1
+#          create: true
+#      - select:
+#          kind: MutatingWebhookConfiguration
+#        fieldPaths:
+#          - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#        options:
+#          delimiter: '/'
+#          index: 1
+#          create: true
+#      - select:
+#          kind: CustomResourceDefinition
+#        fieldPaths:
+#          - .metadata.annotations.[cert-manager.io/inject-ca-from]
+#        options:
+#          delimiter: '/'
+#          index: 1
+#          create: true
+#  - source: # Add cert-manager annotation to the webhook Service
+#      kind: Service
+#      version: v1
+#      name: webhook-service
+#      fieldPath: .metadata.name # namespace of the service
+#    targets:
+#      - select:
+#          kind: Certificate
+#          group: cert-manager.io
+#          version: v1
+#        fieldPaths:
+#          - .spec.dnsNames.0
+#          - .spec.dnsNames.1
+#        options:
+#          delimiter: '.'
+#          index: 0
+#          create: true
+#  - source:
+#      kind: Service
+#      version: v1
+#      name: webhook-service
+#      fieldPath: .metadata.namespace # namespace of the service
+#    targets:
+#      - select:
+#          kind: Certificate
+#          group: cert-manager.io
+#          version: v1
+#        fieldPaths:
+#          - .spec.dnsNames.0
+#          - .spec.dnsNames.1
+#        options:
+#          delimiter: '.'
+#          index: 1
+#          create: true
diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml
new file mode 100644
index 00000000..2aaef653
--- /dev/null
+++ b/config/default/manager_metrics_patch.yaml
@@ -0,0 +1,4 @@
+# This patch adds the args to allow exposing the metrics endpoint using HTTPS
+- op: add
+  path: /spec/template/spec/containers/0/args/0
+  value: --metrics-bind-address=:8443
diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml
new file mode 100644
index 00000000..140d4943
--- /dev/null
+++ b/config/default/metrics_service.yaml
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: controller-manager-metrics-service
+  namespace: system
+spec:
+  ports:
+  - name: https
+    port: 8443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    control-plane: controller-manager
diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml
new file mode 100644
index 00000000..5c5f0b84
--- /dev/null
+++ b/config/manager/kustomization.yaml
@@ -0,0 +1,2 @@
+resources:
+- manager.yaml
diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml
new file mode 100644
index 00000000..384167d7
--- /dev/null
+++ b/config/manager/manager.yaml
@@ -0,0 +1,95 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: system
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: controller-manager
+  namespace: system
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+spec:
+  selector:
+    matchLabels:
+      control-plane: controller-manager
+  replicas: 1
+  template:
+    metadata:
+      annotations:
+        kubectl.kubernetes.io/default-container: manager
+      labels:
+        control-plane: controller-manager
+    spec:
+      # TODO(user): Uncomment the following code to configure the nodeAffinity expression
+      # according to the platforms which are supported by your solution.
+      # It is considered best practice to support multiple architectures. You can
+      # build your manager image using the makefile target docker-buildx.
+      # affinity:
+      #   nodeAffinity:
+      #     requiredDuringSchedulingIgnoredDuringExecution:
+      #       nodeSelectorTerms:
+      #         - matchExpressions:
+      #           - key: kubernetes.io/arch
+      #             operator: In
+      #             values:
+      #               - amd64
+      #               - arm64
+      #               - ppc64le
+      #               - s390x
+      #           - key: kubernetes.io/os
+      #             operator: In
+      #             values:
+      #               - linux
+      securityContext:
+        runAsNonRoot: true
+        # TODO(user): For common cases that do not require escalating privileges
+        # it is recommended to ensure that all your Pods/Containers are restrictive.
+        # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
+        # Please uncomment the following code if your project does NOT have to work on old Kubernetes
+        # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
+        # seccompProfile:
+        #   type: RuntimeDefault
+      containers:
+      - command:
+        - /manager
+        args:
+          - --leader-elect
+          - --health-probe-bind-address=:8081
+        image: controller:latest
+        name: manager
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - "ALL"
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8081
+          initialDelaySeconds: 15
+          periodSeconds: 20
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: 8081
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        # TODO(user): Configure the resources accordingly based on the project requirements.
+        # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+        resources:
+          limits:
+            cpu: 500m
+            memory: 128Mi
+          requests:
+            cpu: 10m
+            memory: 64Mi
+      serviceAccountName: controller-manager
+      terminationGracePeriodSeconds: 10
diff --git a/config/network-policy/allow-metrics-traffic.yaml b/config/network-policy/allow-metrics-traffic.yaml
new file mode 100644
index 00000000..aae53668
--- /dev/null
+++ b/config/network-policy/allow-metrics-traffic.yaml
@@ -0,0 +1,26 @@
+# This NetworkPolicy allows ingress traffic
+# with Pods running on namespaces labeled with 'metrics: enabled'. Only Pods on those
+# namespaces are able to gathering data from the metrics endpoint.
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: allow-metrics-traffic
+  namespace: system
+spec:
+  podSelector:
+    matchLabels:
+      control-plane: controller-manager
+  policyTypes:
+    - Ingress
+  ingress:
+    # This allows ingress traffic from any namespace with the label metrics: enabled
+    - from:
+      - namespaceSelector:
+          matchLabels:
+            metrics: enabled  # Only from namespaces with this label
+      ports:
+        - port: 8443
+          protocol: TCP
diff --git a/config/network-policy/kustomization.yaml b/config/network-policy/kustomization.yaml
new file mode 100644
index 00000000..ec0fb5e5
--- /dev/null
+++ b/config/network-policy/kustomization.yaml
@@ -0,0 +1,2 @@
+resources:
+- allow-metrics-traffic.yaml
diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml
new file mode 100644
index 00000000..ed137168
--- /dev/null
+++ b/config/prometheus/kustomization.yaml
@@ -0,0 +1,2 @@
+resources:
+- monitor.yaml
diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml
new file mode 100644
index 00000000..aac24ef3
--- /dev/null
+++ b/config/prometheus/monitor.yaml
@@ -0,0 +1,30 @@
+# Prometheus Monitor Service (Metrics)
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  labels:
+    control-plane: controller-manager
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: controller-manager-metrics-monitor
+  namespace: system
+spec:
+  endpoints:
+    - path: /metrics
+      port: https # Ensure this is the name of the port that exposes HTTPS metrics
+      scheme: https
+      bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+      tlsConfig:
+        # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables
+        # certificate verification. This poses a significant security risk by making the system vulnerable to
+        # man-in-the-middle attacks, where an attacker could intercept and manipulate the communication between
+        # Prometheus and the monitored services. This could lead to unauthorized access to sensitive metrics data,
+        # compromising the integrity and confidentiality of the information.
+        # Please use the following options for secure configurations:
+        # caFile: /etc/metrics-certs/ca.crt
+        # certFile: /etc/metrics-certs/tls.crt
+        # keyFile: /etc/metrics-certs/tls.key
+        insecureSkipVerify: true
+  selector:
+    matchLabels:
+      control-plane: controller-manager
diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml
new file mode 100644
index 00000000..8ae5a814
--- /dev/null
+++ b/config/rbac/kustomization.yaml
@@ -0,0 +1,29 @@
+resources:
+# All RBAC will be applied under this service account in
+# the deployment namespace. You may comment out this resource
+# if your manager will use a service account that exists at
+# runtime. Be sure to update RoleBinding and ClusterRoleBinding
+# subjects if changing service account names.
+- service_account.yaml
+- role.yaml
+- role_binding.yaml
+- leader_election_role.yaml
+- leader_election_role_binding.yaml
+# The following RBAC configurations are used to protect
+# the metrics endpoint with authn/authz. These configurations
+# ensure that only authorized users and service accounts
+# can access the metrics endpoint. Comment the following
+# permissions if you want to disable this protection.
+# More info: https://book.kubebuilder.io/reference/metrics.html
+- metrics_auth_role.yaml
+- metrics_auth_role_binding.yaml
+- metrics_reader_role.yaml
+# For each CRD, "Editor" and "Viewer" roles are scaffolded by
+# default, aiding admins in cluster management. Those roles are
+# not used by the Project itself. You can comment the following lines
+# if you do not want those helpers be installed with your Project.
+- llmservice_editor_role.yaml
+- llmservice_viewer_role.yaml
+- llmserverpool_editor_role.yaml
+- llmserverpool_viewer_role.yaml
+
diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml
new file mode 100644
index 00000000..e2f8551b
--- /dev/null
+++ b/config/rbac/leader_election_role.yaml
@@ -0,0 +1,40 @@
+# permissions to do leader election.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: leader-election-role
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - update
+  - patch
+  - delete
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - update
+  - patch
+  - delete
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml
new file mode 100644
index 00000000..fb71a122
--- /dev/null
+++ b/config/rbac/leader_election_role_binding.yaml
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: leader-election-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: leader-election-role
+subjects:
+- kind: ServiceAccount
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/llmserverpool_editor_role.yaml b/config/rbac/llmserverpool_editor_role.yaml
new file mode 100644
index 00000000..ef8ad5c1
--- /dev/null
+++ b/config/rbac/llmserverpool_editor_role.yaml
@@ -0,0 +1,27 @@
+# permissions for end users to edit llmserverpools.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: llmserverpool-editor-role
+rules:
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmserverpools
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmserverpools/status
+  verbs:
+  - get
diff --git a/config/rbac/llmserverpool_viewer_role.yaml b/config/rbac/llmserverpool_viewer_role.yaml
new file mode 100644
index 00000000..35a4ade8
--- /dev/null
+++ b/config/rbac/llmserverpool_viewer_role.yaml
@@ -0,0 +1,23 @@
+# permissions for end users to view llmserverpools.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: llmserverpool-viewer-role
+rules:
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmserverpools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmserverpools/status
+  verbs:
+  - get
diff --git a/config/rbac/llmservice_editor_role.yaml b/config/rbac/llmservice_editor_role.yaml
new file mode 100644
index 00000000..09215188
--- /dev/null
+++ b/config/rbac/llmservice_editor_role.yaml
@@ -0,0 +1,27 @@
+# permissions for end users to edit llmservices.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: llmservice-editor-role
+rules:
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmservices
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmservices/status
+  verbs:
+  - get
diff --git a/config/rbac/llmservice_viewer_role.yaml b/config/rbac/llmservice_viewer_role.yaml
new file mode 100644
index 00000000..9c26d7c8
--- /dev/null
+++ b/config/rbac/llmservice_viewer_role.yaml
@@ -0,0 +1,23 @@
+# permissions for end users to view llmservices.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: llmservice-viewer-role
+rules:
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmservices
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - gateway.inference.networking.x-k8s.io
+  resources:
+  - llmservices/status
+  verbs:
+  - get
diff --git a/config/rbac/metrics_auth_role.yaml b/config/rbac/metrics_auth_role.yaml
new file mode 100644
index 00000000..32d2e4ec
--- /dev/null
+++ b/config/rbac/metrics_auth_role.yaml
@@ -0,0 +1,17 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: metrics-auth-role
+rules:
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
diff --git a/config/rbac/metrics_auth_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml
new file mode 100644
index 00000000..e775d67f
--- /dev/null
+++ b/config/rbac/metrics_auth_role_binding.yaml
@@ -0,0 +1,12 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: metrics-auth-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: metrics-auth-role
+subjects:
+- kind: ServiceAccount
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/metrics_reader_role.yaml b/config/rbac/metrics_reader_role.yaml
new file mode 100644
index 00000000..51a75db4
--- /dev/null
+++ b/config/rbac/metrics_reader_role.yaml
@@ -0,0 +1,9 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: metrics-reader
+rules:
+- nonResourceURLs:
+  - "/metrics"
+  verbs:
+  - get
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
new file mode 100644
index 00000000..9d6247eb
--- /dev/null
+++ b/config/rbac/role.yaml
@@ -0,0 +1,11 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: manager-role
+rules:
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "watch"]
diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml
new file mode 100644
index 00000000..c66b66bf
--- /dev/null
+++ b/config/rbac/role_binding.yaml
@@ -0,0 +1,15 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: manager-rolebinding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: manager-role
+subjects:
+- kind: ServiceAccount
+  name: controller-manager
+  namespace: system
diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml
new file mode 100644
index 00000000..9286120f
--- /dev/null
+++ b/config/rbac/service_account.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: controller-manager
+  namespace: system
diff --git a/config/samples/gateway_v1alpha1_llmserverpool.yaml b/config/samples/gateway_v1alpha1_llmserverpool.yaml
new file mode 100644
index 00000000..36f0659d
--- /dev/null
+++ b/config/samples/gateway_v1alpha1_llmserverpool.yaml
@@ -0,0 +1,11 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: LLMServerPool
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: llmserverpool-sample
+spec:
+  serviceRefs:
+  - gemini-jetstream-tpu-v5e-service
+  - gemini-vllm-a100-service
diff --git a/config/samples/gateway_v1alpha1_llmservice.yaml b/config/samples/gateway_v1alpha1_llmservice.yaml
new file mode 100644
index 00000000..5df0f9ac
--- /dev/null
+++ b/config/samples/gateway_v1alpha1_llmservice.yaml
@@ -0,0 +1,21 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: LLMService
+metadata:
+  labels:
+    app.kubernetes.io/name: api
+    app.kubernetes.io/managed-by: kustomize
+  name: llmservice-sample
+spec:
+  LLMServices:
+  - modelName: sql-code-assist
+  - modelName: npc-bot
+    objective: 
+      desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests: 50ms
+    targetModels:
+      targetModelName: npc-bot-v1
+        weight: 50
+      targetModelName: npc-bot-v2
+        weight: 50 	
+  poolRefs:
+  - name: llama-2-pool
+  - name: gemini-pool
diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml
new file mode 100644
index 00000000..33b2d373
--- /dev/null
+++ b/config/samples/kustomization.yaml
@@ -0,0 +1,5 @@
+## Append samples of your project ##
+resources:
+- gateway_v1alpha1_llmserverpool.yaml
+- gateway_v1alpha1_llmservice.yaml
+# +kubebuilder:scaffold:manifestskustomizesamples
diff --git a/docs/proposals/002-api-proposal/proposal.md b/docs/proposals/002-api-proposal/proposal.md
index edf71f49..988ee310 100644
--- a/docs/proposals/002-api-proposal/proposal.md
+++ b/docs/proposals/002-api-proposal/proposal.md
@@ -216,11 +216,11 @@ type LLMServerPool struct {
 }
 
 type LLMServerPoolSpec struct {
-        // Select the distinct services to include in the backend pool. These
-        // services should be consumed by only the llmServerPool they are part
-        // of. Should this behavior be breached, routing behavior is not
-        // guaranteed.
-        ServiceRef []corev1.ObjectReference
+        // ModelServerSelector uses label selection to watch model server pods
+        // that should be included in the LLMServerPool. ModelServers should not
+        // be with any other Service or LLMServerPool, that behavior is not supported
+        // and will result in sub-optimal utilization.
+        ModelServerSelector map[string]string `json:"modelServerSelector,omitempty"`
 }
 ```
 
diff --git a/go.mod b/go.mod
new file mode 100644
index 00000000..e3b118e3
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,102 @@
+module inference.networking.x-k8s.io/llm-instance-gateway
+
+go 1.22.0
+
+require (
+	github.com/onsi/ginkgo/v2 v2.19.0
+	github.com/onsi/gomega v1.33.1
+	k8s.io/api v0.31.0
+	k8s.io/apimachinery v0.31.1
+	k8s.io/client-go v0.31.0
+	k8s.io/code-generator v0.31.1
+	sigs.k8s.io/controller-runtime v0.19.0
+	sigs.k8s.io/structured-merge-diff/v4 v4.4.1
+)
+
+require (
+	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
+	github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/blang/semver/v4 v4.0.0 // indirect
+	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
+	github.com/evanphx/json-patch/v5 v5.9.0 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/fsnotify/fsnotify v1.7.0 // indirect
+	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+	github.com/go-logr/logr v1.4.2 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-logr/zapr v1.3.0 // indirect
+	github.com/go-openapi/jsonpointer v0.19.6 // indirect
+	github.com/go-openapi/jsonreference v0.20.2 // indirect
+	github.com/go-openapi/swag v0.22.4 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/google/cel-go v0.20.1 // indirect
+	github.com/google/gnostic-models v0.6.8 // indirect
+	github.com/google/go-cmp v0.6.0 // indirect
+	github.com/google/gofuzz v1.2.0 // indirect
+	github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
+	github.com/imdario/mergo v0.3.6 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/prometheus/client_golang v1.19.1 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.55.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
+	github.com/spf13/cobra v1.8.1 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/stoewer/go-strcase v1.2.0 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
+	go.opentelemetry.io/otel v1.28.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect
+	go.opentelemetry.io/otel/metric v1.28.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.28.0 // indirect
+	go.opentelemetry.io/otel/trace v1.28.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.3.1 // indirect
+	go.uber.org/multierr v1.11.0 // indirect
+	go.uber.org/zap v1.26.0 // indirect
+	golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect
+	golang.org/x/mod v0.17.0 // indirect
+	golang.org/x/net v0.26.0 // indirect
+	golang.org/x/oauth2 v0.21.0 // indirect
+	golang.org/x/sync v0.7.0 // indirect
+	golang.org/x/sys v0.21.0 // indirect
+	golang.org/x/term v0.21.0 // indirect
+	golang.org/x/text v0.16.0 // indirect
+	golang.org/x/time v0.3.0 // indirect
+	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
+	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
+	google.golang.org/grpc v1.65.0 // indirect
+	google.golang.org/protobuf v1.34.2 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/apiextensions-apiserver v0.31.0 // indirect
+	k8s.io/apiserver v0.31.0 // indirect
+	k8s.io/component-base v0.31.0 // indirect
+	k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect
+	k8s.io/klog/v2 v2.130.1 // indirect
+	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+	k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
+	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect
+	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
+	sigs.k8s.io/yaml v1.4.0 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 00000000..a24f482a
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,257 @@
+github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
+github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
+github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA=
+github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
+github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
+github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
+github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
+github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
+github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
+github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
+github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
+github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
+github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
+github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU=
+github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/cel-go v0.20.1 h1:nDx9r8S3L4pE61eDdt8igGj8rf5kjYR3ILxWIpWNi84=
+github.com/google/cel-go v0.20.1/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg=
+github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
+github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM=
+github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
+github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
+github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
+github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
+github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
+github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
+github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
+github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg=
+go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo=
+go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffATk0X+HD+f1Z8lswGiOQYKHRlzfmdJm0wEaVrFA=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ=
+go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q=
+go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s=
+go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE=
+go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg=
+go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g=
+go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI=
+go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
+go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
+go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc h1:mCRnTeVUjcrhlRmO0VK8a6k6Rrf6TF9htwo2pJVSjIU=
+golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
+golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
+golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
+golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
+golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
+golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
+golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
+gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
+google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 h1:7whR9kGa5LUwFtpLm2ArCEejtnxlGeLbAyjFY8sGNFw=
+google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157/go.mod h1:99sLkeliLXfdj2J75X3Ho+rrVCaJze0uwN7zDDkjPVU=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY=
+google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
+google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo=
+k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE=
+k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk=
+k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk=
+k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U=
+k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
+k8s.io/apiserver v0.31.0 h1:p+2dgJjy+bk+B1Csz+mc2wl5gHwvNkC9QJV+w55LVrY=
+k8s.io/apiserver v0.31.0/go.mod h1:KI9ox5Yu902iBnnyMmy7ajonhKnkeZYJhTZ/YI+WEMk=
+k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8=
+k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU=
+k8s.io/code-generator v0.31.1 h1:GvkRZEP2g2UnB2QKT2Dgc/kYxIkDxCHENv2Q1itioVs=
+k8s.io/code-generator v0.31.1/go.mod h1:oL2ky46L48osNqqZAeOcWWy0S5BXj50vVdwOtTefqIs=
+k8s.io/component-base v0.31.0 h1:/KIzGM5EvPNQcYgwq5NwoQBaOlVFrghoVGr8lG6vNRs=
+k8s.io/component-base v0.31.0/go.mod h1:TYVuzI1QmN4L5ItVdMSXKvH7/DtvIuas5/mm8YT3rTo=
+k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo=
+k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 h1:2770sDpzrjjsAtVhSeUFseziht227YAWYHLGNM8QPwY=
+sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
+sigs.k8s.io/controller-runtime v0.19.0 h1:nWVM7aq+Il2ABxwiCizrVDSlmDcshi9llbaFbC0ji/Q=
+sigs.k8s.io/controller-runtime v0.19.0/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt
new file mode 100644
index 00000000..ff72ff2a
--- /dev/null
+++ b/hack/boilerplate.go.txt
@@ -0,0 +1,15 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
\ No newline at end of file
diff --git a/hack/referencer.go b/hack/referencer.go
new file mode 100644
index 00000000..87fcc557
--- /dev/null
+++ b/hack/referencer.go
@@ -0,0 +1,21 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package internal
+
+import (
+	// Keep a reference to the code generators so they are not removed by `go mod tidy`
+	_ "k8s.io/code-generator"
+)
diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh
new file mode 100755
index 00000000..6782c3c1
--- /dev/null
+++ b/hack/update-codegen.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Copyright 2023 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
+echo "$SCRIPT_ROOT script"
+CODEGEN_PKG=${CODEGEN_PKG:-$(cd "${SCRIPT_ROOT}"; ls -d -1 ./vendor/k8s.io/code-generator 2>/dev/null || echo ../code-generator)}
+echo $CODEGEN_PKG
+source "${CODEGEN_PKG}/kube_codegen.sh"
+THIS_PKG="inference.networking.x-k8s.io/llm-instance-gateway"
+
+
+kube::codegen::gen_helpers \
+    --boilerplate "${SCRIPT_ROOT}/hack/boilerplate.go.txt" \
+    "${SCRIPT_ROOT}"
+
+kube::codegen::gen_client \
+--with-watch \
+--with-applyconfig \
+--output-dir "${SCRIPT_ROOT}/client-go" \
+--output-pkg "${THIS_PKG}/client-go" \
+--boilerplate "${SCRIPT_ROOT}/hack/boilerplate.go.txt" \
+"${SCRIPT_ROOT}"
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
new file mode 100644
index 00000000..076a0573
--- /dev/null
+++ b/test/e2e/e2e_suite_test.go
@@ -0,0 +1,32 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"fmt"
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+// Run e2e tests using the Ginkgo runner.
+func TestE2E(t *testing.T) {
+	RegisterFailHandler(Fail)
+	_, _ = fmt.Fprintf(GinkgoWriter, "Starting api suite\n")
+	RunSpecs(t, "e2e suite")
+}
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
new file mode 100644
index 00000000..5af72529
--- /dev/null
+++ b/test/e2e/e2e_test.go
@@ -0,0 +1,122 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"fmt"
+	"os/exec"
+	"time"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"inference.networking.x-k8s.io/llm-instance-gateway/test/utils"
+)
+
+const namespace = "api-system"
+
+var _ = Describe("controller", Ordered, func() {
+	BeforeAll(func() {
+		By("installing prometheus operator")
+		Expect(utils.InstallPrometheusOperator()).To(Succeed())
+
+		By("installing the cert-manager")
+		Expect(utils.InstallCertManager()).To(Succeed())
+
+		By("creating manager namespace")
+		cmd := exec.Command("kubectl", "create", "ns", namespace)
+		_, _ = utils.Run(cmd)
+	})
+
+	AfterAll(func() {
+		By("uninstalling the Prometheus manager bundle")
+		utils.UninstallPrometheusOperator()
+
+		By("uninstalling the cert-manager bundle")
+		utils.UninstallCertManager()
+
+		By("removing manager namespace")
+		cmd := exec.Command("kubectl", "delete", "ns", namespace)
+		_, _ = utils.Run(cmd)
+	})
+
+	Context("Operator", func() {
+		It("should run successfully", func() {
+			var controllerPodName string
+			var err error
+
+			// projectimage stores the name of the image used in the example
+			var projectimage = "example.com/api:v0.0.1"
+
+			By("building the manager(Operator) image")
+			cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage))
+			_, err = utils.Run(cmd)
+			ExpectWithOffset(1, err).NotTo(HaveOccurred())
+
+			By("loading the the manager(Operator) image on Kind")
+			err = utils.LoadImageToKindClusterWithName(projectimage)
+			ExpectWithOffset(1, err).NotTo(HaveOccurred())
+
+			By("installing CRDs")
+			cmd = exec.Command("make", "install")
+			_, err = utils.Run(cmd)
+			ExpectWithOffset(1, err).NotTo(HaveOccurred())
+
+			By("deploying the controller-manager")
+			cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage))
+			_, err = utils.Run(cmd)
+			ExpectWithOffset(1, err).NotTo(HaveOccurred())
+
+			By("validating that the controller-manager pod is running as expected")
+			verifyControllerUp := func() error {
+				// Get pod name
+
+				cmd = exec.Command("kubectl", "get",
+					"pods", "-l", "control-plane=controller-manager",
+					"-o", "go-template={{ range .items }}"+
+						"{{ if not .metadata.deletionTimestamp }}"+
+						"{{ .metadata.name }}"+
+						"{{ \"\\n\" }}{{ end }}{{ end }}",
+					"-n", namespace,
+				)
+
+				podOutput, err := utils.Run(cmd)
+				ExpectWithOffset(2, err).NotTo(HaveOccurred())
+				podNames := utils.GetNonEmptyLines(string(podOutput))
+				if len(podNames) != 1 {
+					return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames))
+				}
+				controllerPodName = podNames[0]
+				ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager"))
+
+				// Validate pod status
+				cmd = exec.Command("kubectl", "get",
+					"pods", controllerPodName, "-o", "jsonpath={.status.phase}",
+					"-n", namespace,
+				)
+				status, err := utils.Run(cmd)
+				ExpectWithOffset(2, err).NotTo(HaveOccurred())
+				if string(status) != "Running" {
+					return fmt.Errorf("controller pod in %s status", status)
+				}
+				return nil
+			}
+			EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed())
+
+		})
+	})
+})
diff --git a/test/placeholder.md b/test/placeholder.md
deleted file mode 100644
index e69de29b..00000000
diff --git a/test/utils/utils.go b/test/utils/utils.go
new file mode 100644
index 00000000..6b96ab5d
--- /dev/null
+++ b/test/utils/utils.go
@@ -0,0 +1,140 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package utils
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+
+	. "github.com/onsi/ginkgo/v2" //nolint:golint,revive
+)
+
+const (
+	prometheusOperatorVersion = "v0.72.0"
+	prometheusOperatorURL     = "https://github.com/prometheus-operator/prometheus-operator/" +
+		"releases/download/%s/bundle.yaml"
+
+	certmanagerVersion = "v1.14.4"
+	certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml"
+)
+
+func warnError(err error) {
+	_, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err)
+}
+
+// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics.
+func InstallPrometheusOperator() error {
+	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
+	cmd := exec.Command("kubectl", "create", "-f", url)
+	_, err := Run(cmd)
+	return err
+}
+
+// Run executes the provided command within this context
+func Run(cmd *exec.Cmd) ([]byte, error) {
+	dir, _ := GetProjectDir()
+	cmd.Dir = dir
+
+	if err := os.Chdir(cmd.Dir); err != nil {
+		_, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err)
+	}
+
+	cmd.Env = append(os.Environ(), "GO111MODULE=on")
+	command := strings.Join(cmd.Args, " ")
+	_, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output))
+	}
+
+	return output, nil
+}
+
+// UninstallPrometheusOperator uninstalls the prometheus
+func UninstallPrometheusOperator() {
+	url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion)
+	cmd := exec.Command("kubectl", "delete", "-f", url)
+	if _, err := Run(cmd); err != nil {
+		warnError(err)
+	}
+}
+
+// UninstallCertManager uninstalls the cert manager
+func UninstallCertManager() {
+	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
+	cmd := exec.Command("kubectl", "delete", "-f", url)
+	if _, err := Run(cmd); err != nil {
+		warnError(err)
+	}
+}
+
+// InstallCertManager installs the cert manager bundle.
+func InstallCertManager() error {
+	url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion)
+	cmd := exec.Command("kubectl", "apply", "-f", url)
+	if _, err := Run(cmd); err != nil {
+		return err
+	}
+	// Wait for cert-manager-webhook to be ready, which can take time if cert-manager
+	// was re-installed after uninstalling on a cluster.
+	cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook",
+		"--for", "condition=Available",
+		"--namespace", "cert-manager",
+		"--timeout", "5m",
+	)
+
+	_, err := Run(cmd)
+	return err
+}
+
+// LoadImageToKindClusterWithName loads a local docker image to the kind cluster
+func LoadImageToKindClusterWithName(name string) error {
+	cluster := "kind"
+	if v, ok := os.LookupEnv("KIND_CLUSTER"); ok {
+		cluster = v
+	}
+	kindOptions := []string{"load", "docker-image", name, "--name", cluster}
+	cmd := exec.Command("kind", kindOptions...)
+	_, err := Run(cmd)
+	return err
+}
+
+// GetNonEmptyLines converts given command output string into individual objects
+// according to line breakers, and ignores the empty elements in it.
+func GetNonEmptyLines(output string) []string {
+	var res []string
+	elements := strings.Split(output, "\n")
+	for _, element := range elements {
+		if element != "" {
+			res = append(res, element)
+		}
+	}
+
+	return res
+}
+
+// GetProjectDir will return the directory where the project is
+func GetProjectDir() (string, error) {
+	wd, err := os.Getwd()
+	if err != nil {
+		return wd, err
+	}
+	wd = strings.Replace(wd, "/test/e2e", "", -1)
+	return wd, nil
+}