kubernetes-sigs · k8s-ci-robot · Mar 19, 2025 · Feb 27, 2025 · Feb 28, 2025 · Mar 18, 2025
diff --git a/config/charts/inferencepool/.helmignore b/config/charts/inferencepool/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/config/charts/inferencepool/Chart.yaml b/config/charts/inferencepool/Chart.yaml
@@ -0,0 +1,9 @@
+apiVersion: v2
+name: InferencePool
+description: A Helm chart for InferencePool
+
+type: application
+
+version: 0.1.0
+
+appVersion: "0.2.0"
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -0,0 +1,61 @@
+# Gateway Api Inference Extension
-# Gateway Api Inference Extension
+# InferencePool 
-# Gateway Api Inference Extension
+# InferencePool 
+
+A chart to deploy the inference extension and a InferencePool managed by the extension.
-A chart to deploy the inference extension and a InferencePool managed by the extension.
+A chart to deploy an InferencePool and a corresponding EndpointPicker (epp) deployment.
-A chart to deploy the inference extension and a InferencePool managed by the extension.
+A chart to deploy an InferencePool and a corresponding EndpointPicker (epp) deployment.
+
+## Install
+
+Suppose now a vllm service with label `app: vllm-llama2-7b` and served on port `8000` is deployed in `default` namespace in the cluster.
+
+To deploy the inference extension, you can run the following command:
+
+```txt
+$ helm install my-release . -n default \
+    --set inferencePool.targetPortNumber=8000 \
+    --set inferencePool.selector.app=vllm-llama2-7b
+```
-Suppose now a vllm service with label `app: vllm-llama2-7b` and served on port `8000` is deployed in `default` namespace in the cluster.
-
-To deploy the inference extension, you can run the following command:
-
-```txt
-$ helm install my-release . -n default \
-    --set inferencePool.targetPortNumber=8000 \
-    --set inferencePool.selector.app=vllm-llama2-7b
-```
+To install an InferencePool named `pool-1`  that selects from endpoints with label `app: vllm-llama2-7b` and listening on port `8000`, you can run the following command:
+
+```txt
+$ helm install ./config/charts/inferencepool \
+    --set inferencePool.name=pool-1 \
+    --set inferencePool.selector.app=vllm-llama2-7b \
+    --set inferencePool.targetPortNumber=8000
-Suppose now a vllm service with label `app: vllm-llama2-7b` and served on port `8000` is deployed in `default` namespace in the cluster.
-
-To deploy the inference extension, you can run the following command:
-
-```txt
-$ helm install my-release . -n default \
-    --set inferencePool.targetPortNumber=8000 \
-    --set inferencePool.selector.app=vllm-llama2-7b
-```
+To install an InferencePool named `pool-1`  that selects from endpoints with label `app: vllm-llama2-7b` and listening on port `8000`, you can run the following command:
+
+```txt
+$ helm install ./config/charts/inferencepool \
+    --set inferencePool.name=pool-1 \
+    --set inferencePool.selector.app=vllm-llama2-7b \
+    --set inferencePool.targetPortNumber=8000
+
+Or you can change the `values.yaml` to:
+
+```yaml
+inferencePool:
+  name: pool-1
+  targetPortNumber: 8000
+  selector:
+    app: vllm-llama2-7b
+```
+
+where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.selector` is the selector to match the vllm backends. And then run:
+
+```txt
+$ helm install my-release .
+```
+
+## Uninstall
+
+Run the following command to uninstall the chart:
+
+```txt
+$ helm uninstall my-release
+```
+
+## Configuration
+
+The following table list the configurable parameters of the chart.
+
+| **Parameter Name**                          | **Description**                                                                                                   |
+|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
+| `inferenceExtension.replicas`               | Number of replicas for the inference extension service. Defaults to `1`.                                           |
+| `inferenceExtension.image.name`             | Name of the container image used for the inference extension.                                                    |
+| `inferenceExtension.image.hub`              | Registry URL where the inference extension image is hosted.                                                     |
+| `inferenceExtension.image.tag`              | Image tag of the inference extension.                                                                             |
+| `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
+| `inferenceExtension.extProcPort`            | Port where the inference extension service is served for external processing. Defaults to `9002`.                  |
+| `inferencePool.name`                        | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`.                |
+| `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension.             |
+| `inferencePool.selector`                     | Label selector to match vllm backends managed by the inference pool.                                             |
+
+## Notes
+
+This chart will only deploy the inference extension and InferencePool, before install the chart, please make sure that the inference extension CRDs have already been installed in the cluster. And You need to apply traffic policies to route traffic to the inference extension from the gateway after the inference extension is deployed.
+
+For more details, please refer to the [website](https://gateway-api-inference-extension.sigs.k8s.io/guides/).
-This chart will only deploy the inference extension and InferencePool, before install the chart, please make sure that the inference extension CRDs have already been installed in the cluster. And You need to apply traffic policies to route traffic to the inference extension from the gateway after the inference extension is deployed.
-
-For more details, please refer to the [website](https://gateway-api-inference-extension.sigs.k8s.io/guides/).
+This chart will only deploy an InferencePool and its corresponding EndpointPicker extension. Before install the chart, please make sure that the inference extension CRDs are installed in the cluster. For more details, please refer to the [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/).
-This chart will only deploy the inference extension and InferencePool, before install the chart, please make sure that the inference extension CRDs have already been installed in the cluster. And You need to apply traffic policies to route traffic to the inference extension from the gateway after the inference extension is deployed.
-
-For more details, please refer to the [website](https://gateway-api-inference-extension.sigs.k8s.io/guides/).
+This chart will only deploy an InferencePool and its corresponding EndpointPicker extension. Before install the chart, please make sure that the inference extension CRDs are installed in the cluster. For more details, please refer to the [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/).
diff --git a/config/charts/inferencepool/templates/NOTES.txt b/config/charts/inferencepool/templates/NOTES.txt
@@ -0,0 +1 @@
+Gateway api inference extension deployed.
-Gateway api inference extension deployed.
+InferencePool deployed.
-Gateway api inference extension deployed.
+InferencePool deployed.
diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl
@@ -0,0 +1,24 @@
+{{/*
+Common labels
+*/}}
+{{- define "gateway-api-inference-extension.labels" -}}
+app.kubernetes.io/name: {{ include "gateway-api-inference-extension.name" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+{{- end }}
+
+{{/*
+Inference extension name
+*/}}
+{{- define "gateway-api-inference-extension.name" -}}
+{{- $base := .Values.inferencePool.name | default "default-pool" | lower | trim | trunc 40 -}}
+{{ $base }}-epp
+{{- end -}}
+
+{{/*
+Selector labels
+*/}}
+{{- define "gateway-api-inference-extension.selectorLabels" -}}
+app: {{ include "gateway-api-inference-extension.name" . }}
+{{- end -}}
diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml
@@ -0,0 +1,89 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  name: {{ .Values.inferencePool.name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetPortNumber: {{ .Values.inferencePool.targetPortNumber }}
+  selector:
+      {{- range $key, $value := .Values.inferencePool.selector }}
+      {{ $key }}: {{ quote $value }}
+      {{- end }}
+  extensionRef:
+    name: {{ include "gateway-api-inference-extension.name" . }}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
+      containers:
+      - name: epp
+        image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
+        imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
+        args:
+        - -poolName
+        - {{ .Values.inferencePool.name }}
+        - -poolNamespace
+        - {{ .Release.Namespace }}
+        - -v
+        - "3"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        - -metricsPort
+        - "9090"
+        ports:
+        - name: grpc
+          containerPort: 9002
+        - name: grpc-health
+          containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  selector:
+    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
+  ports:
+    - name: grpc-ext-proc
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+    - name: http-metrics
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+  type: ClusterIP
diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml
@@ -0,0 +1,45 @@
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels, inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+---
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+subjects:
+- kind: ServiceAccount
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  kind: ClusterRole
+  name: {{ include "gateway-api-inference-extension.name" . }}
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -0,0 +1,14 @@
+inferenceExtension:
+  replicas: 1
+  image:
+    name: epp
+    hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension
+    tag: main
+    pullPolicy: Always
+  extProcPort: 9002
+
+inferencePool:
+  name: pool-1
+  targetPortNumber: 8000
+  selector:
+    app: vllm-llama2-7b