Skip to content

Commit 1e5b222

Browse files
committed
Big dawg api change
1 parent ca47aa2 commit 1e5b222

File tree

65 files changed

+1635
-1539
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1635
-1539
lines changed

Diff for: PROJECT

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ resources:
1313
namespaced: true
1414
domain: x-k8s.io
1515
group: inference
16-
kind: LLMServerPool
16+
kind: InferencePool
1717
path: sigs.k8s.io/llm-instance-gateway/api/v1alpha1
1818
version: v1alpha1
1919
- api:
2020
crdVersion: v1
2121
namespaced: true
2222
domain: x-k8s.io
2323
group: inference
24-
kind: LLMService
24+
kind: InferenceModel
2525
path: sigs.k8s.io/llm-instance-gateway/api/v1alpha1
2626
version: v1alpha1
2727
version: "3"

Diff for: api/v1alpha1/inferencemodel_types.go

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
24+
25+
// InferenceModelSpec represents a set of Models/Adapters that are multiplexed onto one
26+
// or more backend pools. This resource is managed by the "Inference Workload Owner"
27+
// persona. The Inference Workload Owner persona is: a team that trains, verifies, and
28+
// leverages a large language model from a model frontend, drives the lifecycle
29+
// and rollout of new versions of those models, and defines the specific
30+
// performance and latency goals for the model. These workloads are
31+
// expected to operate within an InferencePool sharing compute capacity with other
32+
// InferenceModels, defined by the Inference Platform Admin. We allow a user who
33+
// has multiple InferenceModels across multiple pools (with the same config) to
34+
// specify the configuration exactly once, and deploy to many pools
35+
// simultaneously. Enabling a simpler config and single source of truth
36+
// for a given user. InferenceModel names are unique for a given InferencePool,
37+
// if the name is reused, an error will be shown on the status of a
38+
// InferenceModel that attempted to reuse. The oldest InferenceModel, based on
39+
// creation timestamp, will be selected to remain valid. In the event of a race
40+
// condition, one will be selected at random.
41+
type InferenceModelSpec struct {
42+
// The name of the model as the users set in the "model" parameter in the requests.
43+
// The name should be unique among the workloads that reference the same backend pool.
44+
// This is the parameter that will be used to match the request with. In the future, we may
45+
// allow to match on other request parameters. The other approach to support matching on
46+
// on other request parameters is to use a different ModelName per HTTPFilter.
47+
// Names can be reserved without implementing an actual model in the pool.
48+
// This can be done by specifying a target model and setting the weight to zero,
49+
// an error will be returned specifying that no valid target model is found.
50+
ModelName string `json:"modelName,omitempty"`
51+
// Optional
52+
// Defines how important it is to serve the model compared to other models referencing the same pool.
53+
Criticality *Criticality `json:"criticality,omitempty"`
54+
// Optional.
55+
// Allow multiple versions of a model for traffic splitting.
56+
// If not specified, the target model name is defaulted to the modelName parameter.
57+
// modelName is often in reference to a LoRA adapter.
58+
TargetModels []TargetModel `json:"targetModels,omitempty"`
59+
// Reference to the InferencePool that the model registers to. It must exist in the same namespace.
60+
PoolRef string `json:"poolRef,omitempty"`
61+
}
62+
63+
// Defines how important it is to serve the model compared to other models.
64+
type Criticality string
65+
66+
const (
67+
// Most important. Requests to this band will be shed last.
68+
Critical Criticality = "Critical"
69+
// More important than Sheddable, less important than Critical.
70+
// Requests in this band will be shed before critical traffic.
71+
Default Criticality = "Default"
72+
// Least important. Requests to this band will be shed before all other bands.
73+
Sheddable Criticality = "Sheddable"
74+
)
75+
76+
// TargetModel represents a deployed model or a LoRA adapter. The
77+
// Name field is expected to match the name of the LoRA adapter
78+
// (or base model) as it is registered within the model server. Inference
79+
// Gateway assumes that the model exists on the model server and is the
80+
// responsibility of the user to validate a correct match. Should a model fail
81+
// to exist at request time, the error is processed by the Instance Gateway,
82+
// and then emitted on the appropriate InferenceModel object.
83+
type TargetModel struct {
84+
// The name of the adapter as expected by the ModelServer.
85+
Name string `json:"name,omitempty"`
86+
// Weight is used to determine the percentage of traffic that should be
87+
// sent to this target model when multiple versions of the model are specified.
88+
Weight int `json:"weight,omitempty"`
89+
}
90+
91+
// InferenceModelStatus defines the observed state of InferenceModel
92+
type InferenceModelStatus struct {
93+
// Conditions track the state of the InferencePool.
94+
Conditions []metav1.Condition `json:"conditions,omitempty"`
95+
}
96+
97+
// +kubebuilder:object:root=true
98+
// +kubebuilder:subresource:status
99+
// +genclient
100+
101+
// InferenceModel is the Schema for the InferenceModels API
102+
type InferenceModel struct {
103+
metav1.TypeMeta `json:",inline"`
104+
metav1.ObjectMeta `json:"metadata,omitempty"`
105+
106+
Spec InferenceModelSpec `json:"spec,omitempty"`
107+
Status InferenceModelStatus `json:"status,omitempty"`
108+
}
109+
110+
// +kubebuilder:object:root=true
111+
112+
// InferenceModelList contains a list of InferenceModel
113+
type InferenceModelList struct {
114+
metav1.TypeMeta `json:",inline"`
115+
metav1.ListMeta `json:"metadata,omitempty"`
116+
Items []InferenceModel `json:"items"`
117+
}
118+
119+
func init() {
120+
SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{})
121+
}

Diff for: api/v1alpha1/llmserverpool_types.go renamed to api/v1alpha1/inferencepool_types.go

+15-15
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ import (
2222

2323
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
2424

25-
// LLMServerPoolSpec defines the desired state of LLMServerPool
26-
type LLMServerPoolSpec struct {
25+
// InferencePoolSpec defines the desired state of InferencePool
26+
type InferencePoolSpec struct {
2727

2828
// ModelServerSelector uses a map of label to watch model server pods
29-
// that should be included in the LLMServerPool. ModelServers should not
30-
// be with any other Service or LLMServerPool, that behavior is not supported
29+
// that should be included in the InferencePool. ModelServers should not
30+
// be with any other Service or InferencePool, that behavior is not supported
3131
// and will result in sub-optimal utilization.
3232
// Due to this selector being translated to a service a simple map is used instead
3333
// of: https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelector
@@ -40,35 +40,35 @@ type LLMServerPoolSpec struct {
4040
TargetPort int32 `json:"targetPort,omitempty"`
4141
}
4242

43-
// LLMServerPoolStatus defines the observed state of LLMServerPool
44-
type LLMServerPoolStatus struct {
43+
// InferencePoolStatus defines the observed state of InferencePool
44+
type InferencePoolStatus struct {
4545

46-
// Conditions track the state of the LLMServerPool.
46+
// Conditions track the state of the InferencePool.
4747
Conditions []metav1.Condition `json:"conditions,omitempty"`
4848
}
4949

5050
// +kubebuilder:object:root=true
5151
// +kubebuilder:subresource:status
5252
// +genclient
5353

54-
// LLMServerPool is the Schema for the llmserverpools API
55-
type LLMServerPool struct {
54+
// InferencePool is the Schema for the Inferencepools API
55+
type InferencePool struct {
5656
metav1.TypeMeta `json:",inline"`
5757
metav1.ObjectMeta `json:"metadata,omitempty"`
5858

59-
Spec LLMServerPoolSpec `json:"spec,omitempty"`
60-
Status LLMServerPoolStatus `json:"status,omitempty"`
59+
Spec InferencePoolSpec `json:"spec,omitempty"`
60+
Status InferencePoolStatus `json:"status,omitempty"`
6161
}
6262

6363
// +kubebuilder:object:root=true
6464

65-
// LLMServerPoolList contains a list of LLMServerPool
66-
type LLMServerPoolList struct {
65+
// InferencePoolList contains a list of InferencePool
66+
type InferencePoolList struct {
6767
metav1.TypeMeta `json:",inline"`
6868
metav1.ListMeta `json:"metadata,omitempty"`
69-
Items []LLMServerPool `json:"items"`
69+
Items []InferencePool `json:"items"`
7070
}
7171

7272
func init() {
73-
SchemeBuilder.Register(&LLMServerPool{}, &LLMServerPoolList{})
73+
SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{})
7474
}

Diff for: api/v1alpha1/llmservice_types.go

-136
This file was deleted.

0 commit comments

Comments
 (0)