@@ -3,6 +3,8 @@ package handlers
3
3
import (
4
4
"encoding/json"
5
5
"fmt"
6
+ "regexp"
7
+ "strings"
6
8
7
9
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
8
10
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
@@ -16,6 +18,10 @@ func (s *Server) HandleResponseHeaders(reqCtx *RequestContext, req *extProcPb.Pr
16
18
h := req .Request .(* extProcPb.ProcessingRequest_ResponseHeaders )
17
19
klog .V (logutil .VERBOSE ).Infof ("Headers before: %+v\n " , h )
18
20
21
+ if h .ResponseHeaders .EndOfStream {
22
+ reqCtx .StreamingCompleted = true
23
+ klog .V (logutil .VERBOSE ).Info ("Response is completed" )
24
+ }
19
25
resp := & extProcPb.ProcessingResponse {
20
26
Response : & extProcPb.ProcessingResponse_ResponseHeaders {
21
27
ResponseHeaders : & extProcPb.HeadersResponse {
@@ -66,22 +72,57 @@ func (s *Server) HandleResponseHeaders(reqCtx *RequestContext, req *extProcPb.Pr
66
72
}
67
73
}*/
68
74
func (s * Server ) HandleResponseBody (reqCtx * RequestContext , req * extProcPb.ProcessingRequest ) (* extProcPb.ProcessingResponse , error ) {
69
- klog .V (logutil .VERBOSE ).Info ("Processing HandleResponseBody" )
70
75
body := req .Request .(* extProcPb.ProcessingRequest_ResponseBody )
71
76
72
- res := Response {}
73
- if err := json .Unmarshal (body .ResponseBody .Body , & res ); err != nil {
74
- return nil , fmt .Errorf ("unmarshaling response body: %v" , err )
77
+ if reqCtx .Streaming {
78
+ responseText := string (reqCtx .prevResponse )
79
+ if strings .Contains (responseText , "[DONE]" ) {
80
+ lastResponse := Response {}
81
+
82
+ // Example message:
83
+ // data: {"id":"cmpl-d6392493-b56c-4d81-9f11-995a0dc93c5d","object":"text_completion","created":1739400043,"model":"tweet-summary-0","choices":[],"usage":{"prompt_tokens":7,"total_tokens":17,"completion_tokens":10}}
84
+ //
85
+ // data: [DONE]
86
+ // we need to strip the `data:` prefix and next Data: [DONE] message.
87
+
88
+ msgInStr := string (reqCtx .prevResponse )
89
+ // msgInStr = msgInStr[6:]
90
+ re := regexp .MustCompile (`\{.*(?:\{.*\}|[^\{]*)\}` ) // match for JSON object
91
+ match := re .FindString (msgInStr )
92
+
93
+ byteSlice := []byte (match )
94
+ if err := json .Unmarshal (byteSlice , & lastResponse ); err != nil {
95
+ return nil , fmt .Errorf ("unmarshaling response body: %v" , err )
96
+ }
97
+ klog .V (logutil .VERBOSE ).Infof ("[DONE] previous response is: %+v" , lastResponse )
98
+
99
+ reqCtx .Response = lastResponse
100
+ }
101
+
102
+ // This should be placed before checking [DONE] message because [DONE] message is produced
103
+ // after usage context.
104
+ reqCtx .prevResponse = body .ResponseBody .Body
105
+
106
+ if reqCtx .StreamingCompleted || body .ResponseBody .EndOfStream {
107
+ klog .V (logutil .VERBOSE ).Info ("Streaming is completed" )
108
+ reqCtx .ResponseComplete = true
109
+ } else {
110
+ reqCtx .ResponseSize += len (body .ResponseBody .Body )
111
+ }
112
+
113
+ } else {
114
+ klog .V (logutil .VERBOSE ).Info ("Processing HandleResponseBody" )
115
+
116
+ res := Response {}
117
+ if err := json .Unmarshal (body .ResponseBody .Body , & res ); err != nil {
118
+ return nil , fmt .Errorf ("unmarshaling response body: %v" , err )
119
+ }
120
+ reqCtx .Response = res
121
+ reqCtx .ResponseSize = len (body .ResponseBody .Body )
122
+ reqCtx .ResponseComplete = true
123
+
124
+ klog .V (logutil .VERBOSE ).Infof ("Response: %+v" , res )
75
125
}
76
- reqCtx .Response = res
77
- reqCtx .ResponseSize = len (body .ResponseBody .Body )
78
- // ResponseComplete is to indicate the response is complete. In non-streaming
79
- // case, it will be set to be true once the response is processed; in
80
- // streaming case, it will be set to be true once the last chunk is processed.
81
- // TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/178)
82
- // will add the processing for streaming case.
83
- reqCtx .ResponseComplete = true
84
- klog .V (logutil .VERBOSE ).Infof ("Response: %+v" , res )
85
126
86
127
resp := & extProcPb.ProcessingResponse {
87
128
Response : & extProcPb.ProcessingResponse_ResponseBody {
0 commit comments