Skip to content

Commit 2a52471

Browse files
committed
VertexAI handle streaming requests
WIP using shared context manager Properly implement uninstrument Shared code with a contextmanager tmp
1 parent 8b2558f commit 2a52471

14 files changed

+1573
-10
lines changed

instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def _methods_to_wrap(
8282
client_class.generate_content.__name__, # pyright: ignore[reportUnknownMemberType]
8383
method_wrappers.generate_content,
8484
)
85+
yield (
86+
client_class,
87+
client_class.stream_generate_content.__name__, # pyright: ignore[reportUnknownMemberType]
88+
method_wrappers.stream_generate_content,
89+
)
8590

8691
for client_class in (
8792
async_client.PredictionServiceAsyncClient,
@@ -92,6 +97,11 @@ def _methods_to_wrap(
9297
client_class.generate_content.__name__, # pyright: ignore[reportUnknownMemberType]
9398
method_wrappers.agenerate_content,
9499
)
100+
yield (
101+
client_class,
102+
client_class.stream_generate_content.__name__, # pyright: ignore[reportUnknownMemberType]
103+
method_wrappers.astream_generate_content,
104+
)
95105

96106

97107
class VertexAIInstrumentor(BaseInstrumentor):

instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/events.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,11 @@ def choice_event(
161161
https://github.com/open-telemetry/semantic-conventions/blob/v1.28.0/docs/gen-ai/gen-ai-events.md#event-gen_aichoice
162162
"""
163163
body: dict[str, AnyValue] = {
164-
"finish_reason": finish_reason,
165164
"index": index,
166165
"message": _asdict_filter_nulls(message),
167166
}
167+
if finish_reason:
168+
body["finish_reason"] = finish_reason
168169

169170
tool_calls_list = [
170171
_asdict_filter_nulls(tool_call) for tool_call in tool_calls

instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py

+64-4
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
from typing import (
1919
TYPE_CHECKING,
2020
Any,
21+
AsyncIterable,
2122
Awaitable,
2223
Callable,
24+
Iterable,
2325
MutableSequence,
2426
)
2527

@@ -36,11 +38,17 @@
3638
from opentelemetry.trace import SpanKind, Tracer
3739

3840
if TYPE_CHECKING:
39-
from google.cloud.aiplatform_v1.services.prediction_service import client
41+
from google.cloud.aiplatform_v1.services.prediction_service import (
42+
async_client,
43+
client,
44+
)
4045
from google.cloud.aiplatform_v1.types import (
4146
content,
4247
prediction_service,
4348
)
49+
from google.cloud.aiplatform_v1beta1.services.prediction_service import (
50+
async_client as async_client_v1beta1,
51+
)
4452
from google.cloud.aiplatform_v1beta1.services.prediction_service import (
4553
client as client_v1beta1,
4654
)
@@ -101,7 +109,9 @@ def __init__(
101109
def _with_instrumentation(
102110
self,
103111
instance: client.PredictionServiceClient
104-
| client_v1beta1.PredictionServiceClient,
112+
| client_v1beta1.PredictionServiceClient
113+
| async_client.PredictionServiceAsyncClient
114+
| async_client_v1beta1.PredictionServiceAsyncClient,
105115
args: Any,
106116
kwargs: Any,
107117
):
@@ -178,8 +188,8 @@ async def agenerate_content(
178188
| prediction_service_v1beta1.GenerateContentResponse
179189
],
180190
],
181-
instance: client.PredictionServiceClient
182-
| client_v1beta1.PredictionServiceClient,
191+
instance: async_client.PredictionServiceAsyncClient
192+
| async_client_v1beta1.PredictionServiceAsyncClient,
183193
args: Any,
184194
kwargs: Any,
185195
) -> (
@@ -192,3 +202,53 @@ async def agenerate_content(
192202
response = await wrapped(*args, **kwargs)
193203
handle_response(response)
194204
return response
205+
206+
def stream_generate_content(
207+
self,
208+
wrapped: Callable[
209+
...,
210+
Iterable[prediction_service.GenerateContentResponse]
211+
| Iterable[prediction_service_v1beta1.GenerateContentResponse],
212+
],
213+
instance: client.PredictionServiceClient
214+
| client_v1beta1.PredictionServiceClient,
215+
args: Any,
216+
kwargs: Any,
217+
) -> Iterable[
218+
prediction_service.GenerateContentResponse
219+
| prediction_service_v1beta1.GenerateContentResponse,
220+
]:
221+
with self._with_instrumentation(
222+
instance, args, kwargs
223+
) as handle_response:
224+
for response in wrapped(*args, **kwargs):
225+
handle_response(response)
226+
yield response
227+
228+
async def astream_generate_content(
229+
self,
230+
wrapped: Callable[
231+
...,
232+
Awaitable[
233+
AsyncIterable[prediction_service.GenerateContentResponse]
234+
]
235+
| Awaitable[
236+
AsyncIterable[
237+
prediction_service_v1beta1.GenerateContentResponse
238+
]
239+
],
240+
],
241+
instance: async_client.PredictionServiceAsyncClient
242+
| async_client_v1beta1.PredictionServiceAsyncClient,
243+
args: Any,
244+
kwargs: Any,
245+
) -> AsyncIterable[
246+
prediction_service.GenerateContentResponse
247+
| prediction_service_v1beta1.GenerateContentResponse,
248+
]:
249+
with self._with_instrumentation(
250+
instance, args, kwargs
251+
) as handle_response:
252+
async for response in await wrapped(*args, **kwargs):
253+
handle_response(response)
254+
yield response

instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,9 @@ def _map_finish_reason(
330330
| content_v1beta1.Candidate.FinishReason,
331331
) -> FinishReason | str:
332332
EnumType = type(finish_reason) # pylint: disable=invalid-name
333-
if (
334-
finish_reason is EnumType.FINISH_REASON_UNSPECIFIED
335-
or finish_reason is EnumType.OTHER
336-
):
333+
if finish_reason is EnumType.FINISH_REASON_UNSPECIFIED:
334+
return ""
335+
if finish_reason is EnumType.OTHER:
337336
return "error"
338337
if finish_reason is EnumType.STOP:
339338
return "stop"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
interactions:
2+
- request:
3+
body: |-
4+
{
5+
"contents": [
6+
{
7+
"role": "user",
8+
"parts": [
9+
{
10+
"text": "Get weather details in New Delhi and San Francisco?"
11+
}
12+
]
13+
}
14+
],
15+
"tools": [
16+
{
17+
"functionDeclarations": [
18+
{
19+
"name": "get_current_weather",
20+
"description": "Get the current weather in a given location",
21+
"parameters": {
22+
"type": 6,
23+
"properties": {
24+
"location": {
25+
"type": 1,
26+
"description": "The location for which to get the weather. It can be a city name, a city name and state, or a zip code. Examples: 'San Francisco', 'San Francisco, CA', '95616', etc."
27+
}
28+
},
29+
"propertyOrdering": [
30+
"location"
31+
]
32+
}
33+
}
34+
]
35+
}
36+
]
37+
}
38+
headers:
39+
Accept:
40+
- '*/*'
41+
Accept-Encoding:
42+
- gzip, deflate
43+
Connection:
44+
- keep-alive
45+
Content-Length:
46+
- '824'
47+
Content-Type:
48+
- application/json
49+
User-Agent:
50+
- python-requests/2.32.3
51+
method: POST
52+
uri: https://us-central1-aiplatform.googleapis.com/v1/projects/fake-project/locations/us-central1/publishers/google/models/gemini-1.5-flash-002:streamGenerateContent?%24alt=json%3Benum-encoding%3Dint
53+
response:
54+
body:
55+
string: |-
56+
[
57+
{
58+
"candidates": [
59+
{
60+
"content": {
61+
"role": "model",
62+
"parts": [
63+
{
64+
"functionCall": {
65+
"name": "get_current_weather",
66+
"args": {
67+
"location": "New Delhi"
68+
}
69+
}
70+
},
71+
{
72+
"functionCall": {
73+
"name": "get_current_weather",
74+
"args": {
75+
"location": "San Francisco"
76+
}
77+
}
78+
}
79+
]
80+
},
81+
"finishReason": 1
82+
}
83+
],
84+
"usageMetadata": {
85+
"promptTokenCount": 72,
86+
"candidatesTokenCount": 16,
87+
"totalTokenCount": 88,
88+
"promptTokensDetails": [
89+
{
90+
"modality": 1,
91+
"tokenCount": 72
92+
}
93+
],
94+
"candidatesTokensDetails": [
95+
{
96+
"modality": 1,
97+
"tokenCount": 16
98+
}
99+
]
100+
},
101+
"modelVersion": "gemini-1.5-flash-002",
102+
"createTime": "2025-03-05T04:44:12.226326Z",
103+
"responseId": "nNbHZ5boDZeTmecP49qwuQU"
104+
}
105+
]
106+
headers:
107+
Content-Type:
108+
- application/json; charset=UTF-8
109+
Transfer-Encoding:
110+
- chunked
111+
Vary:
112+
- Origin
113+
- X-Origin
114+
- Referer
115+
content-length:
116+
- '985'
117+
status:
118+
code: 200
119+
message: OK
120+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
interactions:
2+
- request:
3+
body: |-
4+
{
5+
"contents": [
6+
{
7+
"role": "user",
8+
"parts": [
9+
{
10+
"text": "Get weather details in New Delhi and San Francisco?"
11+
}
12+
]
13+
}
14+
],
15+
"tools": [
16+
{
17+
"functionDeclarations": [
18+
{
19+
"name": "get_current_weather",
20+
"description": "Get the current weather in a given location",
21+
"parameters": {
22+
"type": 6,
23+
"properties": {
24+
"location": {
25+
"type": 1,
26+
"description": "The location for which to get the weather. It can be a city name, a city name and state, or a zip code. Examples: 'San Francisco', 'San Francisco, CA', '95616', etc."
27+
}
28+
},
29+
"propertyOrdering": [
30+
"location"
31+
]
32+
}
33+
}
34+
]
35+
}
36+
]
37+
}
38+
headers:
39+
Accept:
40+
- '*/*'
41+
Accept-Encoding:
42+
- gzip, deflate
43+
Connection:
44+
- keep-alive
45+
Content-Length:
46+
- '824'
47+
Content-Type:
48+
- application/json
49+
User-Agent:
50+
- python-requests/2.32.3
51+
method: POST
52+
uri: https://us-central1-aiplatform.googleapis.com/v1/projects/fake-project/locations/us-central1/publishers/google/models/gemini-1.5-flash-002:streamGenerateContent?%24alt=json%3Benum-encoding%3Dint
53+
response:
54+
body:
55+
string: |-
56+
[
57+
{
58+
"candidates": [
59+
{
60+
"content": {
61+
"role": "model",
62+
"parts": [
63+
{
64+
"functionCall": {
65+
"name": "get_current_weather",
66+
"args": {
67+
"location": "New Delhi"
68+
}
69+
}
70+
},
71+
{
72+
"functionCall": {
73+
"name": "get_current_weather",
74+
"args": {
75+
"location": "San Francisco"
76+
}
77+
}
78+
}
79+
]
80+
},
81+
"finishReason": 1
82+
}
83+
],
84+
"usageMetadata": {
85+
"promptTokenCount": 72,
86+
"candidatesTokenCount": 16,
87+
"totalTokenCount": 88,
88+
"promptTokensDetails": [
89+
{
90+
"modality": 1,
91+
"tokenCount": 72
92+
}
93+
],
94+
"candidatesTokensDetails": [
95+
{
96+
"modality": 1,
97+
"tokenCount": 16
98+
}
99+
]
100+
},
101+
"modelVersion": "gemini-1.5-flash-002",
102+
"createTime": "2025-03-05T04:46:18.094334Z",
103+
"responseId": "GtfHZ_7gBe2Om9IPrJa3MQ"
104+
}
105+
]
106+
headers:
107+
Content-Type:
108+
- application/json; charset=UTF-8
109+
Transfer-Encoding:
110+
- chunked
111+
Vary:
112+
- Origin
113+
- X-Origin
114+
- Referer
115+
content-length:
116+
- '984'
117+
status:
118+
code: 200
119+
message: OK
120+
version: 1

0 commit comments

Comments
 (0)