@@ -7,20 +7,16 @@ import (
7
7
"net"
8
8
"net/http"
9
9
"strconv"
10
- "time"
11
10
12
- extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
13
11
"github.com/prometheus/client_golang/prometheus/promhttp"
14
12
"google.golang.org/grpc"
15
13
healthPb "google.golang.org/grpc/health/grpc_health_v1"
16
14
"inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
17
15
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
18
16
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend/vllm"
19
- "inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers"
20
17
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
21
- "inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling "
18
+ runserver "inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/server "
22
19
"k8s.io/apimachinery/pkg/runtime"
23
- "k8s.io/apimachinery/pkg/types"
24
20
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
25
21
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
26
22
"k8s.io/client-go/rest"
@@ -37,7 +33,7 @@ const (
37
33
var (
38
34
grpcPort = flag .Int (
39
35
"grpcPort" ,
40
- 9002 ,
36
+ runserver . DefaultGrpcPort ,
41
37
"The gRPC port used for communicating with Envoy proxy" )
42
38
grpcHealthPort = flag .Int (
43
39
"grpcHealthPort" ,
@@ -47,31 +43,31 @@ var (
47
43
"metricsPort" , 9090 , "The metrics port" )
48
44
targetPodHeader = flag .String (
49
45
"targetPodHeader" ,
50
- "target-pod" ,
46
+ runserver . DefaultTargetPodHeader ,
51
47
"Header key used by Envoy to route to the appropriate pod. This must match Envoy configuration." )
52
48
poolName = flag .String (
53
49
"poolName" ,
54
- "" ,
50
+ runserver . DefaultPoolName ,
55
51
"Name of the InferencePool this Endpoint Picker is associated with." )
56
52
poolNamespace = flag .String (
57
53
"poolNamespace" ,
58
- "default" ,
54
+ runserver . DefaultPoolNamespace ,
59
55
"Namespace of the InferencePool this Endpoint Picker is associated with." )
60
56
serviceName = flag .String (
61
57
"serviceName" ,
62
- "" ,
58
+ runserver . DefaultServiceName ,
63
59
"Name of the Service that will be used to read EndpointSlices from" )
64
60
zone = flag .String (
65
61
"zone" ,
66
- "" ,
62
+ runserver . DefaultZone ,
67
63
"The zone that this instance is created in. Will be passed to the corresponding endpointSlice. " )
68
64
refreshPodsInterval = flag .Duration (
69
65
"refreshPodsInterval" ,
70
- 10 * time . Second ,
66
+ runserver . DefaultRefreshPodsInterval ,
71
67
"interval to refresh pods" )
72
68
refreshMetricsInterval = flag .Duration (
73
69
"refreshMetricsInterval" ,
74
- 50 * time . Millisecond ,
70
+ runserver . DefaultRefreshMetricsInterval ,
75
71
"interval to refresh metrics" )
76
72
77
73
scheme = runtime .NewScheme ()
@@ -103,71 +99,34 @@ func main() {
103
99
})
104
100
klog .Info (flags )
105
101
106
- // Create a new manager to manage controllers
107
- mgr , err := ctrl .NewManager (ctrl .GetConfigOrDie (), ctrl.Options {Scheme : scheme })
108
- if err != nil {
109
- klog .Fatalf ("Failed to create controller manager: %v" , err )
110
- }
111
-
112
- // Create the data store used to cache watched resources
113
102
datastore := backend .NewK8sDataStore ()
114
103
115
- // Create the controllers and register them with the manager
116
- if err := (& backend.InferencePoolReconciler {
117
- Datastore : datastore ,
118
- Scheme : mgr .GetScheme (),
119
- Client : mgr .GetClient (),
120
- PoolNamespacedName : types.NamespacedName {
121
- Name : * poolName ,
122
- Namespace : * poolNamespace ,
123
- },
124
- Record : mgr .GetEventRecorderFor ("InferencePool" ),
125
- }).SetupWithManager (mgr ); err != nil {
126
- klog .Fatalf ("Failed setting up InferencePoolReconciler: %v" , err )
127
- }
128
-
129
- if err := (& backend.InferenceModelReconciler {
130
- Datastore : datastore ,
131
- Scheme : mgr .GetScheme (),
132
- Client : mgr .GetClient (),
133
- PoolNamespacedName : types.NamespacedName {
134
- Name : * poolName ,
135
- Namespace : * poolNamespace ,
136
- },
137
- Record : mgr .GetEventRecorderFor ("InferenceModel" ),
138
- }).SetupWithManager (mgr ); err != nil {
139
- klog .Fatalf ("Failed setting up InferenceModelReconciler: %v" , err )
140
- }
141
-
142
- if err := (& backend.EndpointSliceReconciler {
143
- Datastore : datastore ,
144
- Scheme : mgr .GetScheme (),
145
- Client : mgr .GetClient (),
146
- Record : mgr .GetEventRecorderFor ("endpointslice" ),
147
- ServiceName : * serviceName ,
148
- Zone : * zone ,
149
- }).SetupWithManager (mgr ); err != nil {
150
- klog .Fatalf ("Failed setting up EndpointSliceReconciler: %v" , err )
104
+ serverRunner := & runserver.ExtProcServerRunner {
105
+ GrpcPort : * grpcPort ,
106
+ TargetPodHeader : * targetPodHeader ,
107
+ PoolName : * poolName ,
108
+ PoolNamespace : * poolNamespace ,
109
+ ServiceName : * serviceName ,
110
+ Zone : * zone ,
111
+ RefreshPodsInterval : * refreshPodsInterval ,
112
+ RefreshMetricsInterval : * refreshMetricsInterval ,
113
+ Scheme : scheme ,
114
+ Config : ctrl .GetConfigOrDie (),
115
+ Datastore : datastore ,
151
116
}
117
+ serverRunner .Setup ()
152
118
153
119
// Start health and ext-proc servers in goroutines
154
120
healthSvr := startHealthServer (datastore , * grpcHealthPort )
155
- extProcSvr := startExternalProcessorServer (
121
+ extProcSvr := serverRunner . Start (
156
122
datastore ,
157
- * grpcPort ,
158
- * refreshPodsInterval ,
159
- * refreshMetricsInterval ,
160
- * targetPodHeader ,
123
+ & vllm.PodMetricsClientImpl {},
161
124
)
162
125
// Start metrics handler
163
126
metricsSvr := startMetricsHandler (* metricsPort , cfg )
164
127
165
- // Start the controller manager. Blocking and will return when shutdown is complete.
166
- klog .Infof ("Starting controller manager" )
167
- if err := mgr .Start (ctrl .SetupSignalHandler ()); err != nil {
168
- klog .Fatalf ("Error starting controller manager: %v" , err )
169
- }
170
- klog .Info ("Controller manager shutting down" )
128
+ // Start manager, blocking
129
+ serverRunner .StartManager ()
171
130
172
131
// Gracefully shutdown servers
173
132
if healthSvr != nil {
@@ -209,43 +168,6 @@ func startHealthServer(ds *backend.K8sDatastore, port int) *grpc.Server {
209
168
return svr
210
169
}
211
170
212
- // startExternalProcessorServer starts the Envoy external processor server in a goroutine.
213
- func startExternalProcessorServer (
214
- datastore * backend.K8sDatastore ,
215
- port int ,
216
- refreshPodsInterval , refreshMetricsInterval time.Duration ,
217
- targetPodHeader string ,
218
- ) * grpc.Server {
219
- svr := grpc .NewServer ()
220
-
221
- go func () {
222
- lis , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , port ))
223
- if err != nil {
224
- klog .Fatalf ("Ext-proc server failed to listen: %v" , err )
225
- }
226
- klog .Infof ("Ext-proc server listening on port: %d" , port )
227
-
228
- // Initialize backend provider
229
- pp := backend .NewProvider (& vllm.PodMetricsClientImpl {}, datastore )
230
- if err := pp .Init (refreshPodsInterval , refreshMetricsInterval ); err != nil {
231
- klog .Fatalf ("Failed to initialize backend provider: %v" , err )
232
- }
233
-
234
- // Register ext_proc handlers
235
- extProcPb .RegisterExternalProcessorServer (
236
- svr ,
237
- handlers .NewServer (pp , scheduling .NewScheduler (pp ), targetPodHeader , datastore ),
238
- )
239
-
240
- // Blocking and will return when shutdown is complete.
241
- if err := svr .Serve (lis ); err != nil && err != grpc .ErrServerStopped {
242
- klog .Fatalf ("Ext-proc server failed: %v" , err )
243
- }
244
- klog .Info ("Ext-proc server shutting down" )
245
- }()
246
- return svr
247
- }
248
-
249
171
func startMetricsHandler (port int , cfg * rest.Config ) * http.Server {
250
172
metrics .Register ()
251
173
0 commit comments