Skip to content

Commit ea4a531

Browse files
rebase fork with main
1 parent c8bde9a commit ea4a531

File tree

3 files changed

+32
-89
lines changed

3 files changed

+32
-89
lines changed

pkg/ext-proc/main.go

+2-89
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,14 @@ import (
88
"net/http"
99
"strconv"
1010

11-
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
1211
"github.com/prometheus/client_golang/prometheus/promhttp"
1312
"google.golang.org/grpc"
1413
healthPb "google.golang.org/grpc/health/grpc_health_v1"
1514
"inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
1615
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
1716
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend/vllm"
18-
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers"
1917
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
20-
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
18+
runserver "inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/server"
2119
"k8s.io/apimachinery/pkg/runtime"
2220
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2321
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
@@ -122,10 +120,7 @@ func main() {
122120
healthSvr := startHealthServer(datastore, *grpcHealthPort)
123121
extProcSvr := serverRunner.Start(
124122
datastore,
125-
*grpcPort,
126-
*refreshPodsInterval,
127-
*refreshMetricsInterval,
128-
*targetPodHeader,
123+
&vllm.PodMetricsClientImpl{},
129124
)
130125
// Start metrics handler
131126
metricsSvr := startMetricsHandler(*metricsPort, cfg)
@@ -173,86 +168,6 @@ func startHealthServer(ds *backend.K8sDatastore, port int) *grpc.Server {
173168
return svr
174169
}
175170

176-
// startExternalProcessorServer starts the Envoy external processor server in a goroutine.
177-
func startExternalProcessorServer(
178-
datastore *backend.K8sDatastore,
179-
port int,
180-
refreshPodsInterval, refreshMetricsInterval time.Duration,
181-
targetPodHeader string,
182-
) *grpc.Server {
183-
svr := grpc.NewServer()
184-
185-
go func() {
186-
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
187-
if err != nil {
188-
klog.Fatalf("Ext-proc server failed to listen: %v", err)
189-
}
190-
klog.Infof("Ext-proc server listening on port: %d", port)
191-
192-
// Initialize backend provider
193-
pp := backend.NewProvider(&vllm.PodMetricsClientImpl{}, datastore)
194-
if err := pp.Init(refreshPodsInterval, refreshMetricsInterval); err != nil {
195-
klog.Fatalf("Failed to initialize backend provider: %v", err)
196-
}
197-
198-
// Register ext_proc handlers
199-
extProcPb.RegisterExternalProcessorServer(
200-
svr,
201-
handlers.NewServer(pp, scheduling.NewScheduler(pp), targetPodHeader, datastore),
202-
)
203-
204-
// Blocking and will return when shutdown is complete.
205-
if err := svr.Serve(lis); err != nil && err != grpc.ErrServerStopped {
206-
klog.Fatalf("Ext-proc server failed: %v", err)
207-
}
208-
klog.Info("Ext-proc server shutting down")
209-
}()
210-
return svr
211-
}
212-
213-
func startMetricsHandler(port int, cfg *rest.Config) *http.Server {
214-
metrics.Register()
215-
216-
var svr *http.Server
217-
go func() {
218-
klog.Info("Starting metrics HTTP handler ...")
219-
220-
mux := http.NewServeMux()
221-
mux.Handle(defaultMetricsEndpoint, metricsHandlerWithAuthenticationAndAuthorization(cfg))
222-
223-
svr = &http.Server{
224-
Addr: net.JoinHostPort("", strconv.Itoa(port)),
225-
Handler: mux,
226-
}
227-
if err := svr.ListenAndServe(); err != http.ErrServerClosed {
228-
klog.Fatalf("failed to start metrics HTTP handler: %v", err)
229-
}
230-
}()
231-
return svr
232-
}
233-
234-
func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) http.Handler {
235-
h := promhttp.HandlerFor(
236-
legacyregistry.DefaultGatherer,
237-
promhttp.HandlerOpts{},
238-
)
239-
httpClient, err := rest.HTTPClientFor(cfg)
240-
if err != nil {
241-
klog.Fatalf("failed to create http client for metrics auth: %v", err)
242-
}
243-
244-
filter, err := filters.WithAuthenticationAndAuthorization(cfg, httpClient)
245-
if err != nil {
246-
klog.Fatalf("failed to create metrics filter for auth: %v", err)
247-
}
248-
metricsLogger := klog.LoggerWithValues(klog.NewKlogr(), "path", defaultMetricsEndpoint)
249-
metricsAuthHandler, err := filter(metricsLogger, h)
250-
if err != nil {
251-
klog.Fatalf("failed to create metrics auth handler: %v", err)
252-
}
253-
return metricsAuthHandler
254-
}
255-
256171
func startMetricsHandler(port int, cfg *rest.Config) *http.Server {
257172
metrics.Register()
258173

@@ -296,8 +211,6 @@ func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) http.Han
296211
return metricsAuthHandler
297212
}
298213

299-
=======
300-
>>>>>>> ad32d85 (Add updated hermetic test with k8s client API, these pull from example object yamls.)
301214
func validateFlags() error {
302215
if *poolName == "" {
303216
return fmt.Errorf("required %q flag not set", "poolName")

pkg/ext-proc/test/artifacts/inferencepool-with-model-hermetic.yaml

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: inference.networking.x-k8s.io/v1alpha1
2+
kind: InferencePool
3+
metadata:
4+
labels:
5+
name: vllm-llama2-7b-pool
6+
spec:
7+
targetPortNumber: 8000
8+
selector:
9+
app: vllm-llama2-7b-pool
10+
---
11+
apiVersion: inference.networking.x-k8s.io/v1alpha1
12+
kind: InferenceModel
13+
metadata:
14+
labels:
15+
app.kubernetes.io/name: api
16+
app.kubernetes.io/managed-by: kustomize
17+
name: inferencemodel-sample
18+
namespace: default
19+
spec:
20+
modelName: sql-lora
21+
criticality: Critical
22+
poolRef:
23+
# this is the default val:
24+
group: inference.networking.x-k8s.io
25+
# this is the default val:
26+
kind: InferencePool
27+
name: vllm-llama2-7b-pool
28+
targetModels:
29+
- name: sql-lora-1fdg2
30+
weight: 100

0 commit comments

Comments
 (0)