Skip to content

Commit 84cd0dc

Browse files
committed
llmservice reconciler implementation
1 parent a55f870 commit 84cd0dc

File tree

11 files changed

+348
-120
lines changed

11 files changed

+348
-120
lines changed

api/v1alpha1/llmserverpool_types.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type LLMServerPoolSpec struct {
3737
// TargetPort is the port number that the model servers within the pool expect
3838
// to recieve traffic from.
3939
// This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
40-
TargetPort int32
40+
TargetPort int32 `json:"targetPort,omitempty"`
4141
}
4242

4343
// LLMServerPoolStatus defines the observed state of LLMServerPool

client-go/applyconfiguration/api/v1alpha1/llmserverpoolspec.go

+9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml

+13-44
Original file line numberDiff line numberDiff line change
@@ -40,55 +40,24 @@ spec:
4040
description: LLMServerPoolSpec defines the desired state of LLMServerPool
4141
properties:
4242
modelServerSelector:
43+
additionalProperties:
44+
type: string
4345
description: |-
44-
ModelServerSelector uses label selection to watch model server pods
46+
ModelServerSelector uses a map of label to watch model server pods
4547
that should be included in the LLMServerPool. ModelServers should not
4648
be with any other Service or LLMServerPool, that behavior is not supported
4749
and will result in sub-optimal utilization.
48-
properties:
49-
matchExpressions:
50-
description: matchExpressions is a list of label selector requirements.
51-
The requirements are ANDed.
52-
items:
53-
description: |-
54-
A label selector requirement is a selector that contains values, a key, and an operator that
55-
relates the key and values.
56-
properties:
57-
key:
58-
description: key is the label key that the selector applies
59-
to.
60-
type: string
61-
operator:
62-
description: |-
63-
operator represents a key's relationship to a set of values.
64-
Valid operators are In, NotIn, Exists and DoesNotExist.
65-
type: string
66-
values:
67-
description: |-
68-
values is an array of string values. If the operator is In or NotIn,
69-
the values array must be non-empty. If the operator is Exists or DoesNotExist,
70-
the values array must be empty. This array is replaced during a strategic
71-
merge patch.
72-
items:
73-
type: string
74-
type: array
75-
x-kubernetes-list-type: atomic
76-
required:
77-
- key
78-
- operator
79-
type: object
80-
type: array
81-
x-kubernetes-list-type: atomic
82-
matchLabels:
83-
additionalProperties:
84-
type: string
85-
description: |-
86-
matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
87-
map is equivalent to an element of matchExpressions, whose key field is "key", the
88-
operator is "In", and the values array contains only "value". The requirements are ANDed.
89-
type: object
50+
Due to this selector being translated to a service a simple map is used instead
51+
of: https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelector
52+
To avoid footshoot errors when the https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelectorAsMap would be used.
9053
type: object
91-
x-kubernetes-map-type: atomic
54+
targetPort:
55+
description: |-
56+
TargetPort is the port number that the model servers within the pool expect
57+
to recieve traffic from.
58+
This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
59+
format: int32
60+
type: integer
9261
type: object
9362
status:
9463
description: LLMServerPoolStatus defines the observed state of LLMServerPool

examples/poc/README.md

-71
This file was deleted.
-911 KB
Binary file not shown.
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
apiVersion: inference.networking.x-k8s.io/v1alpha1
2+
kind: LLMService
3+
metadata:
4+
labels:
5+
app.kubernetes.io/name: api
6+
app.kubernetes.io/managed-by: kustomize
7+
name: llmservice-sample
8+
spec:
9+
models:
10+
- name: sql-code-assist
11+
- name: npc-bot
12+
objective:
13+
desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests: 50
14+
targetModels:
15+
- name: npc-bot-v1
16+
weight: 50
17+
- name: npc-bot-v2
18+
weight: 50
19+
poolRef:
20+
- kind: LLMServerPool
21+
name: test-pool
22+
- name: gemini-pool
23+
kind: LLMServerPool

pkg/ext-proc/backend/datastore.go

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
// The datastore is a local cache of relevant data for the given LLMServerPool (currently all pulled from k8s-api)
1111
type K8sDatastore struct {
1212
LLMServerPool *v1alpha1.LLMServerPool
13+
LLMServices *sync.Map
1314
Pods *sync.Map
1415
}
1516

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package backend
2+
3+
import (
4+
"context"
5+
"strings"
6+
7+
"inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
8+
"k8s.io/apimachinery/pkg/runtime"
9+
"k8s.io/client-go/tools/record"
10+
"k8s.io/klog/v2"
11+
ctrl "sigs.k8s.io/controller-runtime"
12+
"sigs.k8s.io/controller-runtime/pkg/client"
13+
)
14+
15+
type LLMServiceReconciler struct {
16+
client.Client
17+
Scheme *runtime.Scheme
18+
Record record.EventRecorder
19+
Datastore *K8sDatastore
20+
ServerPoolName string
21+
Namespace string
22+
}
23+
24+
func (c *LLMServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
25+
if req.Namespace != c.Namespace {
26+
return ctrl.Result{}, nil
27+
}
28+
klog.V(1).Info("reconciling LLMService", req.NamespacedName)
29+
30+
service := &v1alpha1.LLMService{}
31+
if err := c.Get(ctx, req.NamespacedName, service); err != nil {
32+
klog.Error(err, "unable to get LLMServerPool")
33+
return ctrl.Result{}, err
34+
}
35+
36+
c.updateDatastore(service)
37+
return ctrl.Result{}, nil
38+
}
39+
40+
func (c *LLMServiceReconciler) SetupWithManager(mgr ctrl.Manager) error {
41+
return ctrl.NewControllerManagedBy(mgr).
42+
For(&v1alpha1.LLMService{}).
43+
Complete(c)
44+
}
45+
46+
func (c *LLMServiceReconciler) updateDatastore(service *v1alpha1.LLMService) {
47+
for _, ref := range service.Spec.PoolRef {
48+
if strings.Contains(strings.ToLower(ref.Kind), strings.ToLower("LLMServerPool")) && ref.Name == c.ServerPoolName {
49+
klog.V(2).Infof("Adding/Updating service: %v", service.Name)
50+
c.Datastore.LLMServices.Store(service.Name, service)
51+
return
52+
}
53+
}
54+
klog.V(2).Infof("Removing/Not adding service: %v", service.Name)
55+
// If we get here. The service is not relevant to this pool, remove.
56+
c.Datastore.LLMServices.Delete(service.Name)
57+
}

0 commit comments

Comments
 (0)