@@ -65,8 +65,8 @@ type StreamingServer struct {
65
65
func (s * StreamingServer ) Process (srv extProcPb.ExternalProcessor_ProcessServer ) error {
66
66
ctx := srv .Context ()
67
67
logger := log .FromContext (ctx )
68
- loggerVerbose := logger .V (logutil .VERBOSE )
69
- loggerVerbose .Info ("Processing" )
68
+ loggerTrace := logger .V (logutil .TRACE )
69
+ loggerTrace .Info ("Processing" )
70
70
71
71
// Create request context to share states during life time of an HTTP request.
72
72
// See https://github.com/envoyproxy/envoy/issues/17540.
@@ -103,21 +103,21 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
103
103
if recvErr != nil {
104
104
// This error occurs very frequently, though it doesn't seem to have any impact.
105
105
// TODO Figure out if we can remove this noise.
106
- loggerVerbose .Error (err , "Cannot receive stream request" )
106
+ logger . V ( logutil . DEFAULT ) .Error (err , "Cannot receive stream request" )
107
107
return status .Errorf (codes .Unknown , "cannot receive stream request: %v" , err )
108
108
}
109
109
110
110
switch v := req .Request .(type ) {
111
111
case * extProcPb.ProcessingRequest_RequestHeaders :
112
112
err = s .HandleRequestHeaders (ctx , reqCtx , v )
113
113
case * extProcPb.ProcessingRequest_RequestBody :
114
- loggerVerbose .Info ("Incoming body chunk" , "body" , string ( v . RequestBody . Body ) , "EoS" , v .RequestBody .EndOfStream )
114
+ loggerTrace .Info ("Incoming body chunk" , "EoS" , v .RequestBody .EndOfStream )
115
115
// In the stream case, we can receive multiple request bodies.
116
116
body = append (body , v .RequestBody .Body ... )
117
117
118
118
// Message is buffered, we can read and decode.
119
119
if v .RequestBody .EndOfStream {
120
- loggerVerbose .Info ("decoding" )
120
+ loggerTrace .Info ("decoding" )
121
121
err = json .Unmarshal (body , & requestBody )
122
122
if err != nil {
123
123
logger .V (logutil .DEFAULT ).Error (err , "Error unmarshaling request body" )
@@ -133,22 +133,19 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
133
133
metrics .RecordRequestCounter (reqCtx .Model , reqCtx .ResolvedTargetModel )
134
134
metrics .RecordRequestSizes (reqCtx .Model , reqCtx .ResolvedTargetModel , reqCtx .RequestSize )
135
135
}
136
- loggerVerbose .Info ("Request context after HandleRequestBody" , "context" , reqCtx )
137
136
}
138
137
case * extProcPb.ProcessingRequest_RequestTrailers :
139
138
// This is currently unused.
140
139
case * extProcPb.ProcessingRequest_ResponseHeaders :
141
- loggerVerbose .Info ("got response headers" , "headers" , v .ResponseHeaders .Headers .GetHeaders ())
142
140
for _ , header := range v .ResponseHeaders .Headers .GetHeaders () {
143
141
value := string (header .RawValue )
144
142
145
- logger . V ( logutil . TRACE ) .Info ("header" , "key" , header .Key , "value" , value )
143
+ loggerTrace .Info ("header" , "key" , header .Key , "value" , value )
146
144
if header .Key == "status" && value != "200" {
147
145
reqCtx .ResponseStatusCode = errutil .ModelServerError
148
146
} else if header .Key == "content-type" && strings .Contains (value , "text/event-stream" ) {
149
147
reqCtx .modelServerStreaming = true
150
- loggerVerbose .Info ("model server is streaming response" )
151
- logger .Error (nil , "made it here" )
148
+ loggerTrace .Info ("model server is streaming response" )
152
149
}
153
150
}
154
151
reqCtx .RequestState = ResponseRecieved
@@ -179,7 +176,7 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
179
176
responseText := string (v .ResponseBody .Body )
180
177
s .HandleResponseBodyModelStreaming (ctx , reqCtx , responseText )
181
178
if v .ResponseBody .EndOfStream {
182
- loggerVerbose .Info ("streaming is completed" )
179
+ loggerTrace .Info ("stream completed" )
183
180
184
181
reqCtx .ResponseCompleteTimestamp = time .Now ()
185
182
metrics .RecordRequestLatencies (ctx , reqCtx .Model , reqCtx .ResolvedTargetModel , reqCtx .RequestReceivedTimestamp , reqCtx .ResponseCompleteTimestamp )
@@ -207,6 +204,7 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
207
204
208
205
// Message is buffered, we can read and decode.
209
206
if v .ResponseBody .EndOfStream {
207
+ loggerTrace .Info ("stream completed" )
210
208
// Don't send a 500 on a response error. Just let the message passthrough and log our error for debugging purposes.
211
209
// We assume the body is valid JSON, err messages are not guaranteed to be json, and so capturing and sending a 500 obfuscates the response message.
212
210
// using the standard 'err' var will send an immediate error response back to the caller.
@@ -226,7 +224,6 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
226
224
metrics .RecordInputTokens (reqCtx .Model , reqCtx .ResolvedTargetModel , reqCtx .Usage .PromptTokens )
227
225
metrics .RecordOutputTokens (reqCtx .Model , reqCtx .ResolvedTargetModel , reqCtx .Usage .CompletionTokens )
228
226
}
229
- loggerVerbose .Info ("Request context after HandleResponseBody" , "context" , reqCtx )
230
227
}
231
228
}
232
229
case * extProcPb.ProcessingRequest_ResponseTrailers :
@@ -246,27 +243,28 @@ func (s *StreamingServer) Process(srv extProcPb.ExternalProcessor_ProcessServer)
246
243
}
247
244
return nil
248
245
}
249
- loggerVerbose .Info ("checking" , "request state" , reqCtx .RequestState )
250
- if err := reqCtx .updateStateAndSendIfNeeded (srv , loggerVerbose ); err != nil {
246
+ loggerTrace .Info ("checking" , "request state" , reqCtx .RequestState )
247
+ if err := reqCtx .updateStateAndSendIfNeeded (srv , logger ); err != nil {
251
248
return err
252
249
}
253
250
}
254
251
}
255
252
256
253
// updateStateAndSendIfNeeded checks state and can send mutiple responses in a single pass, but only if ordered properly.
257
254
// Order of requests matter in FULL_DUPLEX_STREAMING. For both request and response, the order of response sent back MUST be: Header->Body->Trailer, with trailer being optional.
258
- func (r * RequestContext ) updateStateAndSendIfNeeded (srv extProcPb.ExternalProcessor_ProcessServer , loggerVerbose logr.Logger ) error {
255
+ func (r * RequestContext ) updateStateAndSendIfNeeded (srv extProcPb.ExternalProcessor_ProcessServer , logger logr.Logger ) error {
256
+ loggerTrace := logger .V (logutil .TRACE )
259
257
// No switch statement as we could send multiple responses in one pass.
260
258
if r .RequestState == RequestReceived && r .reqHeaderResp != nil {
261
- loggerVerbose .Info ("Request header response" , "obj" , r .reqHeaderResp )
259
+ loggerTrace .Info ("Sending request header response" , "obj" , r .reqHeaderResp )
262
260
if err := srv .Send (r .reqHeaderResp ); err != nil {
263
- loggerVerbose .Error (err , "error sending response" )
261
+ logger . V ( logutil . DEFAULT ) .Error (err , "error sending response" )
264
262
return status .Errorf (codes .Unknown , "failed to send response back to Envoy: %v" , err )
265
263
}
266
264
r .RequestState = HeaderRequestResponseComplete
267
265
}
268
266
if r .RequestState == HeaderRequestResponseComplete && r .reqBodyResp != nil {
269
- loggerVerbose .Info ("Request body response" , "obj" , r . reqBodyResp )
267
+ loggerTrace .Info ("Sending request body response" )
270
268
if err := srv .Send (r .reqBodyResp ); err != nil {
271
269
return status .Errorf (codes .Unknown , "failed to send response back to Envoy: %v" , err )
272
270
}
@@ -281,14 +279,14 @@ func (r *RequestContext) updateStateAndSendIfNeeded(srv extProcPb.ExternalProces
281
279
}
282
280
}
283
281
if r .RequestState == ResponseRecieved && r .respHeaderResp != nil {
284
- loggerVerbose .Info ("Response header response" , "obj" , r .respHeaderResp )
282
+ loggerTrace .Info ("Sending response header response" , "obj" , r .respHeaderResp )
285
283
if err := srv .Send (r .respHeaderResp ); err != nil {
286
284
return status .Errorf (codes .Unknown , "failed to send response back to Envoy: %v" , err )
287
285
}
288
286
r .RequestState = HeaderResponseResponseComplete
289
287
}
290
288
if r .RequestState == HeaderResponseResponseComplete && r .respBodyResp != nil {
291
- loggerVerbose .Info ("Response body response" , "obj" , r . respBodyResp )
289
+ loggerTrace .Info ("Sending response body response" )
292
290
if err := srv .Send (r .respBodyResp ); err != nil {
293
291
return status .Errorf (codes .Unknown , "failed to send response back to Envoy: %v" , err )
294
292
}
@@ -298,7 +296,7 @@ func (r *RequestContext) updateStateAndSendIfNeeded(srv extProcPb.ExternalProces
298
296
r .RequestState = BodyResponseResponsesComplete
299
297
}
300
298
// Dump the response so a new stream message can begin
301
- r .reqBodyResp = nil
299
+ r .respBodyResp = nil
302
300
}
303
301
if r .RequestState == BodyResponseResponsesComplete && r .respTrailerResp != nil {
304
302
// Trailers in requests are not guaranteed
@@ -318,15 +316,13 @@ func (s *StreamingServer) HandleRequestBody(
318
316
) (* RequestContext , error ) {
319
317
var requestBodyBytes []byte
320
318
logger := log .FromContext (ctx )
321
- loggerVerbose := logger .V (logutil .VERBOSE )
322
- loggerVerbose .Info ("Handling request body" )
323
319
324
320
// Resolve target models.
325
321
model , ok := requestBodyMap ["model" ].(string )
326
322
if ! ok {
327
323
return reqCtx , errutil.Error {Code : errutil .BadRequest , Msg : "model not found in request" }
328
324
}
329
- loggerVerbose . Info ( "Model requested" , "model" , model )
325
+
330
326
modelName := model
331
327
332
328
// NOTE: The nil checking for the modelObject means that we DO allow passthrough currently.
@@ -347,7 +343,7 @@ func (s *StreamingServer) HandleRequestBody(
347
343
ResolvedTargetModel : modelName ,
348
344
Critical : datastore .IsCritical (modelObj ),
349
345
}
350
- loggerVerbose . Info ("LLM request assembled" , "request " , llmReq )
346
+ logger . V ( logutil . DEBUG ). Info ("LLM request assembled" , "model " , llmReq . Model , "targetModel" , llmReq . ResolvedTargetModel , "critical" , llmReq . Critical )
351
347
352
348
var err error
353
349
// Update target models in the body.
@@ -360,7 +356,6 @@ func (s *StreamingServer) HandleRequestBody(
360
356
logger .V (logutil .DEFAULT ).Error (err , "Error marshaling request body" )
361
357
return reqCtx , errutil.Error {Code : errutil .Internal , Msg : fmt .Sprintf ("error marshaling request body: %v" , err )}
362
358
}
363
- loggerVerbose .Info ("Updated request body marshalled" , "body" , string (requestBodyBytes ))
364
359
365
360
target , err := s .scheduler .Schedule (ctx , llmReq )
366
361
if err != nil {
@@ -377,15 +372,16 @@ func (s *StreamingServer) HandleRequestBody(
377
372
endpoint := targetPod .Address + ":" + strconv .Itoa (int (pool .Spec .TargetPortNumber ))
378
373
379
374
logger .V (logutil .DEFAULT ).Info ("Request handled" ,
380
- "model" , llmReq .Model , "targetModel" , llmReq .ResolvedTargetModel , "endpoint" , targetPod )
375
+ "model" , llmReq .Model , "targetModel" , llmReq .ResolvedTargetModel , "endpoint" , targetPod , "endpoint metrics" ,
376
+ fmt .Sprintf ("%+v" , target ))
381
377
382
378
reqCtx .Model = llmReq .Model
383
379
reqCtx .ResolvedTargetModel = llmReq .ResolvedTargetModel
384
380
reqCtx .RequestSize = len (requestBodyBytes )
385
381
reqCtx .TargetPod = targetPod .NamespacedName .String ()
386
382
reqCtx .TargetEndpoint = endpoint
387
383
388
- s .populateRequestHeaderResponse (ctx , reqCtx , endpoint , len (requestBodyBytes ))
384
+ s .populateRequestHeaderResponse (reqCtx , endpoint , len (requestBodyBytes ))
389
385
390
386
reqCtx .reqBodyResp = & extProcPb.ProcessingResponse {
391
387
// The Endpoint Picker supports two approaches to communicating the target endpoint, as a request header
@@ -416,8 +412,6 @@ func (s *StreamingServer) HandleResponseBody(
416
412
response map [string ]interface {},
417
413
) (* RequestContext , error ) {
418
414
logger := log .FromContext (ctx )
419
- loggerVerbose := logger .V (logutil .VERBOSE )
420
- loggerVerbose .Info ("Processing HandleResponseBody" )
421
415
responseBytes , err := json .Marshal (response )
422
416
if err != nil {
423
417
logger .V (logutil .DEFAULT ).Error (err , "error marshalling responseBody" )
@@ -431,7 +425,7 @@ func (s *StreamingServer) HandleResponseBody(
431
425
TotalTokens : int (usg ["total_tokens" ].(float64 )),
432
426
}
433
427
reqCtx .Usage = usage
434
- loggerVerbose .Info ("Response generated" , "usage" , reqCtx .Usage )
428
+ logger . V ( logutil . VERBOSE ) .Info ("Response generated" , "usage" , reqCtx .Usage )
435
429
}
436
430
reqCtx .ResponseSize = len (responseBytes )
437
431
// ResponseComplete is to indicate the response is complete. In non-streaming
@@ -469,12 +463,8 @@ func (s *StreamingServer) HandleResponseBodyModelStreaming(
469
463
reqCtx * RequestContext ,
470
464
responseText string ,
471
465
) {
472
- logger := log .FromContext (ctx )
473
- loggerVerbose := logger .V (logutil .VERBOSE )
474
- loggerVerbose .Info ("Processing HandleResponseBody" )
475
-
476
466
if strings .Contains (responseText , streamingEndMsg ) {
477
- resp := ParseRespForUsage (ctx , responseText , loggerVerbose )
467
+ resp := ParseRespForUsage (ctx , responseText )
478
468
metrics .RecordInputTokens (reqCtx .Model , reqCtx .ResolvedTargetModel , resp .Usage .PromptTokens )
479
469
metrics .RecordOutputTokens (reqCtx .Model , reqCtx .ResolvedTargetModel , resp .Usage .CompletionTokens )
480
470
}
@@ -495,13 +485,12 @@ func (s *StreamingServer) HandleRequestHeaders(ctx context.Context, reqCtx *Requ
495
485
return err
496
486
}
497
487
endpoint := pod .Address + ":" + strconv .Itoa (int (pool .Spec .TargetPortNumber ))
498
- s .populateRequestHeaderResponse (ctx , reqCtx , endpoint , 0 )
488
+ s .populateRequestHeaderResponse (reqCtx , endpoint , 0 )
499
489
}
500
490
return nil
501
491
}
502
492
503
- func (s * StreamingServer ) populateRequestHeaderResponse (ctx context.Context , reqCtx * RequestContext , endpoint string , requestBodyLength int ) {
504
- logger := log .FromContext (ctx )
493
+ func (s * StreamingServer ) populateRequestHeaderResponse (reqCtx * RequestContext , endpoint string , requestBodyLength int ) {
505
494
headers := []* configPb.HeaderValueOption {
506
495
{
507
496
Header : & configPb.HeaderValue {
@@ -520,10 +509,6 @@ func (s *StreamingServer) populateRequestHeaderResponse(ctx context.Context, req
520
509
},
521
510
})
522
511
}
523
- // Print headers for debugging
524
- for _ , header := range headers {
525
- logger .V (logutil .DEBUG ).Info ("Request body header" , "key" , header .Header .Key , "value" , header .Header .RawValue )
526
- }
527
512
528
513
targetEndpointValue := & structpb.Struct {
529
514
Fields : map [string ]* structpb.Value {
0 commit comments