Skip to content

Commit a0cdf8f

Browse files
authored
Updates APIs Based on Kubernetes API Conventions (#143)
* Updates APIs Based on Kubernetes API Conventions Signed-off-by: Daneyon Hansen <[email protected]> * Reverts targetPort to targetPortNumber Signed-off-by: Daneyon Hansen <[email protected]> --------- Signed-off-by: Daneyon Hansen <[email protected]>
1 parent 34862ab commit a0cdf8f

7 files changed

+136
-115
lines changed

api/v1alpha1/inferencemodel_types.go

+61-45
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,32 @@ import (
2020
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2121
)
2222

23-
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
23+
// InferenceModel is the Schema for the InferenceModels API.
24+
//
25+
// +kubebuilder:object:root=true
26+
// +kubebuilder:subresource:status
27+
// +genclient
28+
type InferenceModel struct {
29+
metav1.TypeMeta `json:",inline"`
30+
metav1.ObjectMeta `json:"metadata,omitempty"`
31+
32+
Spec InferenceModelSpec `json:"spec,omitempty"`
33+
Status InferenceModelStatus `json:"status,omitempty"`
34+
}
35+
36+
// InferenceModelList contains a list of InferenceModel.
37+
//
38+
// +kubebuilder:object:root=true
39+
type InferenceModelList struct {
40+
metav1.TypeMeta `json:",inline"`
41+
metav1.ListMeta `json:"metadata,omitempty"`
42+
Items []InferenceModel `json:"items"`
43+
}
2444

25-
// InferenceModelSpec represents a specific model use case. This resource is
45+
// InferenceModelSpec represents the desired state of a specific model use case. This resource is
2646
// managed by the "Inference Workload Owner" persona.
2747
//
28-
// The Inference Workload Owner persona is: a team that trains, verifies, and
48+
// The Inference Workload Owner persona is someone that trains, verifies, and
2949
// leverages a large language model from a model frontend, drives the lifecycle
3050
// and rollout of new versions of those models, and defines the specific
3151
// performance and latency goals for the model. These workloads are
@@ -38,7 +58,7 @@ import (
3858
// creation timestamp, will be selected to remain valid. In the event of a race
3959
// condition, one will be selected at random.
4060
type InferenceModelSpec struct {
41-
// The name of the model as the users set in the "model" parameter in the requests.
61+
// ModelName is the name of the model as the users set in the "model" parameter in the requests.
4262
// The name should be unique among the workloads that reference the same backend pool.
4363
// This is the parameter that will be used to match the request with. In the future, we may
4464
// allow to match on other request parameters. The other approach to support matching
@@ -47,22 +67,25 @@ type InferenceModelSpec struct {
4767
// This can be done by specifying a target model and setting the weight to zero,
4868
// an error will be returned specifying that no valid target model is found.
4969
//
50-
// +optional
5170
// +kubebuilder:validation:MaxLength=253
52-
ModelName string `json:"modelName,omitempty"`
53-
// Defines how important it is to serve the model compared to other models referencing the same pool.
71+
// +kubebuilder:validation:Required
72+
ModelName string `json:"modelName"`
73+
74+
// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
5475
//
5576
// +optional
5677
// +kubebuilder:default="Default"
5778
Criticality *Criticality `json:"criticality,omitempty"`
58-
// Allow multiple versions of a model for traffic splitting.
79+
80+
// TargetModels allow multiple versions of a model for traffic splitting.
5981
// If not specified, the target model name is defaulted to the modelName parameter.
6082
// modelName is often in reference to a LoRA adapter.
6183
//
6284
// +optional
6385
// +kubebuilder:validation:MaxItems=10
6486
TargetModels []TargetModel `json:"targetModels,omitempty"`
65-
// Reference to the inference pool, the pool must exist in the same namespace.
87+
88+
// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
6689
//
6790
// +kubebuilder:validation:Required
6891
PoolRef PoolObjectReference `json:"poolRef"`
@@ -93,39 +116,54 @@ type PoolObjectReference struct {
93116
// +kubebuilder:validation:MinLength=1
94117
// +kubebuilder:validation:MaxLength=253
95118
// +kubebuilder:validation:Required
96-
Name string `json:"name,omitempty"`
119+
Name string `json:"name"`
97120
}
98121

99-
// Defines how important it is to serve the model compared to other models.
122+
// Criticality defines how important it is to serve the model compared to other models.
100123
// +kubebuilder:validation:Enum=Critical;Default;Sheddable
101124
type Criticality string
102125

103126
const (
104-
// Most important. Requests to this band will be shed last.
127+
// Critical defines the highest level of criticality. Requests to this band will be shed last.
105128
Critical Criticality = "Critical"
106-
// More important than Sheddable, less important than Critical.
107-
// Requests in this band will be shed before critical traffic.
108-
// +kubebuilder:default=Default
129+
130+
// Default defines the default criticality level and is more important than Sheddable but less
131+
// important than Critical. Requests in this band will be shed before critical traffic.
109132
Default Criticality = "Default"
110-
// Least important. Requests to this band will be shed before all other bands.
133+
134+
// Sheddable defines the lowest level of criticality. Requests to this band will be shed before
135+
// all other bands.
111136
Sheddable Criticality = "Sheddable"
112137
)
113138

114139
// TargetModel represents a deployed model or a LoRA adapter. The
115140
// Name field is expected to match the name of the LoRA adapter
116141
// (or base model) as it is registered within the model server. Inference
117-
// Gateway assumes that the model exists on the model server and is the
142+
// Gateway assumes that the model exists on the model server and it's the
118143
// responsibility of the user to validate a correct match. Should a model fail
119-
// to exist at request time, the error is processed by the Instance Gateway,
120-
// and then emitted on the appropriate InferenceModel object.
144+
// to exist at request time, the error is processed by the Inference Gateway
145+
// and emitted on the appropriate InferenceModel object.
121146
type TargetModel struct {
122-
// The name of the adapter as expected by the ModelServer.
147+
// Name is the name of the adapter as expected by the ModelServer.
123148
//
124-
// +optional
125149
// +kubebuilder:validation:MaxLength=253
126-
Name string `json:"name,omitempty"`
150+
// +kubebuilder:validation:Required
151+
Name string `json:"name"`
152+
127153
// Weight is used to determine the proportion of traffic that should be
128-
// sent to this target model when multiple versions of the model are specified.
154+
// sent to this model when multiple target models are specified.
155+
//
156+
// Weight defines the proportion of requests forwarded to the specified
157+
// model. This is computed as weight/(sum of all weights in this
158+
// TargetModels list). For non-zero values, there may be some epsilon from
159+
// the exact proportion defined here depending on the precision an
160+
// implementation supports. Weight is not a percentage and the sum of
161+
// weights does not need to equal 100.
162+
//
163+
// If only one model is specified and it has a weight greater than 0, 100%
164+
// of the traffic is forwarded to that model. If weight is set to 0, no
165+
// traffic should be forwarded for this model. If unspecified, weight
166+
// defaults to 1.
129167
//
130168
// +optional
131169
// +kubebuilder:default=1
@@ -140,28 +178,6 @@ type InferenceModelStatus struct {
140178
Conditions []metav1.Condition `json:"conditions,omitempty"`
141179
}
142180

143-
// +kubebuilder:object:root=true
144-
// +kubebuilder:subresource:status
145-
// +genclient
146-
147-
// InferenceModel is the Schema for the InferenceModels API
148-
type InferenceModel struct {
149-
metav1.TypeMeta `json:",inline"`
150-
metav1.ObjectMeta `json:"metadata,omitempty"`
151-
152-
Spec InferenceModelSpec `json:"spec,omitempty"`
153-
Status InferenceModelStatus `json:"status,omitempty"`
154-
}
155-
156-
// +kubebuilder:object:root=true
157-
158-
// InferenceModelList contains a list of InferenceModel
159-
type InferenceModelList struct {
160-
metav1.TypeMeta `json:",inline"`
161-
metav1.ListMeta `json:"metadata,omitempty"`
162-
Items []InferenceModel `json:"items"`
163-
}
164-
165181
func init() {
166182
SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{})
167183
}

api/v1alpha1/inferencepool_types.go

+27-32
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,47 @@ import (
2020
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2121
)
2222

23-
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
23+
// InferencePool is the Schema for the InferencePools API.
24+
//
25+
// +kubebuilder:object:root=true
26+
// +kubebuilder:subresource:status
27+
// +genclient
28+
type InferencePool struct {
29+
metav1.TypeMeta `json:",inline"`
30+
metav1.ObjectMeta `json:"metadata,omitempty"`
31+
32+
Spec InferencePoolSpec `json:"spec,omitempty"`
33+
Status InferencePoolStatus `json:"status,omitempty"`
34+
}
35+
36+
// InferencePoolList contains a list of InferencePool.
37+
//
38+
// +kubebuilder:object:root=true
39+
type InferencePoolList struct {
40+
metav1.TypeMeta `json:",inline"`
41+
metav1.ListMeta `json:"metadata,omitempty"`
42+
Items []InferencePool `json:"items"`
43+
}
2444

2545
// InferencePoolSpec defines the desired state of InferencePool
2646
type InferencePoolSpec struct {
27-
28-
// Selector uses a map of label to watch model server pods
47+
// Selector defines a map of label to watch model server pods
2948
// that should be included in the InferencePool. ModelServers should not
3049
// be with any other Service or InferencePool, that behavior is not supported
3150
// and will result in sub-optimal utilization.
3251
// In some cases, implementations may translate this to a Service selector, so this matches the simple
3352
// map used for Service selectors instead of the full Kubernetes LabelSelector type.
3453
//
3554
// +kubebuilder:validation:Required
36-
Selector map[LabelKey]LabelValue `json:"selector,omitempty"`
55+
Selector map[LabelKey]LabelValue `json:"selector"`
3756

38-
// TargetPortNumber is the port number that the model servers within the pool expect
39-
// to receive traffic from.
40-
// This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
57+
// TargetPortNumber defines the port number to access the selected model servers.
58+
// The number must be in the range 1 to 65535.
4159
//
42-
// +kubebuilder:validation:Minimum=0
60+
// +kubebuilder:validation:Minimum=1
4361
// +kubebuilder:validation:Maximum=65535
4462
// +kubebuilder:validation:Required
45-
TargetPortNumber int32 `json:"targetPortNumber,omitempty"`
63+
TargetPortNumber int32 `json:"targetPortNumber"`
4664
}
4765

4866
// Originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
@@ -87,33 +105,10 @@ type LabelValue string
87105

88106
// InferencePoolStatus defines the observed state of InferencePool
89107
type InferencePoolStatus struct {
90-
91108
// Conditions track the state of the InferencePool.
92109
Conditions []metav1.Condition `json:"conditions,omitempty"`
93110
}
94111

95-
// +kubebuilder:object:root=true
96-
// +kubebuilder:subresource:status
97-
// +genclient
98-
99-
// InferencePool is the Schema for the Inferencepools API
100-
type InferencePool struct {
101-
metav1.TypeMeta `json:",inline"`
102-
metav1.ObjectMeta `json:"metadata,omitempty"`
103-
104-
Spec InferencePoolSpec `json:"spec,omitempty"`
105-
Status InferencePoolStatus `json:"status,omitempty"`
106-
}
107-
108-
// +kubebuilder:object:root=true
109-
110-
// InferencePoolList contains a list of InferencePool
111-
type InferencePoolList struct {
112-
metav1.TypeMeta `json:",inline"`
113-
metav1.ListMeta `json:"metadata,omitempty"`
114-
Items []InferencePool `json:"items"`
115-
}
116-
117112
func init() {
118113
SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{})
119114
}

config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml

+30-14
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ spec:
1717
- name: v1alpha1
1818
schema:
1919
openAPIV3Schema:
20-
description: InferenceModel is the Schema for the InferenceModels API
20+
description: InferenceModel is the Schema for the InferenceModels API.
2121
properties:
2222
apiVersion:
2323
description: |-
@@ -38,10 +38,10 @@ spec:
3838
type: object
3939
spec:
4040
description: |-
41-
InferenceModelSpec represents a specific model use case. This resource is
41+
InferenceModelSpec represents the desired state of a specific model use case. This resource is
4242
managed by the "Inference Workload Owner" persona.
4343
44-
The Inference Workload Owner persona is: a team that trains, verifies, and
44+
The Inference Workload Owner persona is someone that trains, verifies, and
4545
leverages a large language model from a model frontend, drives the lifecycle
4646
and rollout of new versions of those models, and defines the specific
4747
performance and latency goals for the model. These workloads are
@@ -56,16 +56,16 @@ spec:
5656
properties:
5757
criticality:
5858
default: Default
59-
description: Defines how important it is to serve the model compared
60-
to other models referencing the same pool.
59+
description: Criticality defines how important it is to serve the
60+
model compared to other models referencing the same pool.
6161
enum:
6262
- Critical
6363
- Default
6464
- Sheddable
6565
type: string
6666
modelName:
6767
description: |-
68-
The name of the model as the users set in the "model" parameter in the requests.
68+
ModelName is the name of the model as the users set in the "model" parameter in the requests.
6969
The name should be unique among the workloads that reference the same backend pool.
7070
This is the parameter that will be used to match the request with. In the future, we may
7171
allow to match on other request parameters. The other approach to support matching
@@ -76,8 +76,8 @@ spec:
7676
maxLength: 253
7777
type: string
7878
poolRef:
79-
description: Reference to the inference pool, the pool must exist
80-
in the same namespace.
79+
description: PoolRef is a reference to the inference pool, the pool
80+
must exist in the same namespace.
8181
properties:
8282
group:
8383
default: inference.networking.x-k8s.io
@@ -102,36 +102,52 @@ spec:
102102
type: object
103103
targetModels:
104104
description: |-
105-
Allow multiple versions of a model for traffic splitting.
105+
TargetModels allow multiple versions of a model for traffic splitting.
106106
If not specified, the target model name is defaulted to the modelName parameter.
107107
modelName is often in reference to a LoRA adapter.
108108
items:
109109
description: |-
110110
TargetModel represents a deployed model or a LoRA adapter. The
111111
Name field is expected to match the name of the LoRA adapter
112112
(or base model) as it is registered within the model server. Inference
113-
Gateway assumes that the model exists on the model server and is the
113+
Gateway assumes that the model exists on the model server and it's the
114114
responsibility of the user to validate a correct match. Should a model fail
115-
to exist at request time, the error is processed by the Instance Gateway,
116-
and then emitted on the appropriate InferenceModel object.
115+
to exist at request time, the error is processed by the Inference Gateway
116+
and emitted on the appropriate InferenceModel object.
117117
properties:
118118
name:
119-
description: The name of the adapter as expected by the ModelServer.
119+
description: Name is the name of the adapter as expected by
120+
the ModelServer.
120121
maxLength: 253
121122
type: string
122123
weight:
123124
default: 1
124125
description: |-
125126
Weight is used to determine the proportion of traffic that should be
126-
sent to this target model when multiple versions of the model are specified.
127+
sent to this model when multiple target models are specified.
128+
129+
Weight defines the proportion of requests forwarded to the specified
130+
model. This is computed as weight/(sum of all weights in this
131+
TargetModels list). For non-zero values, there may be some epsilon from
132+
the exact proportion defined here depending on the precision an
133+
implementation supports. Weight is not a percentage and the sum of
134+
weights does not need to equal 100.
135+
136+
If only one model is specified and it has a weight greater than 0, 100%
137+
of the traffic is forwarded to that model. If weight is set to 0, no
138+
traffic should be forwarded for this model. If unspecified, weight
139+
defaults to 1.
127140
format: int32
128141
maximum: 1000000
129142
minimum: 0
130143
type: integer
144+
required:
145+
- name
131146
type: object
132147
maxItems: 10
133148
type: array
134149
required:
150+
- modelName
135151
- poolRef
136152
type: object
137153
status:

0 commit comments

Comments
 (0)