open-telemetry
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/__init__.py
+35-18 b/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/__init__.py
+35-18
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/events.py
+2-1 b/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/events.py
+2-1
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py
+77-27 b/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py
+77-27
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py
+3-4 b/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py
+3-4
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/cassettes/test_stream_function_call_choice.yaml
+120 b/‎instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/cassettes/test_stream_function_call_choice.yaml
+120
@@ -39,42 +39,55 @@
 ---
 """
 
+from __future__ import annotations
+
 from typing import Any, Collection
 
 from wrapt import (
-    wrap_function_wrapper,  # type: ignore[reportUnknownVariableType]
+    wrap_function_wrapper,  # pyright: ignore[reportUnknownVariableType]
 )
 
 from opentelemetry._events import get_event_logger
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.utils import unwrap
 from opentelemetry.instrumentation.vertexai.package import _instruments
-from opentelemetry.instrumentation.vertexai.patch import (
-    generate_content_create,
-)
+from opentelemetry.instrumentation.vertexai.patch import MethodWrappers
 from opentelemetry.instrumentation.vertexai.utils import is_content_enabled
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 
 
-def _client_classes():
+def _methods_to_wrap(
+    method_wrappers: MethodWrappers,
+):
     # This import is very slow, do it lazily in case instrument() is not called
-
     # pylint: disable=import-outside-toplevel
-    from google.cloud.aiplatform_v1.services.prediction_service import (
-        client,
-    )
+    from google.cloud.aiplatform_v1.services.prediction_service import client
     from google.cloud.aiplatform_v1beta1.services.prediction_service import (
         client as client_v1beta1,
     )
 
-    return (
+    for client_class in (
         client.PredictionServiceClient,
         client_v1beta1.PredictionServiceClient,
-    )
+    ):
+        yield (
+            client_class,
+            client_class.generate_content.__name__,  # pyright: ignore[reportUnknownMemberType]
+            method_wrappers.generate_content,
+        )
+        yield (
+            client_class,
+            client_class.stream_generate_content.__name__,  # pyright: ignore[reportUnknownMemberType]
+            method_wrappers.stream_generate_content,
+        )
 
 
 class VertexAIInstrumentor(BaseInstrumentor):
+    def __init__(self) -> None:
+        super().__init__()
+        self._methods_to_unwrap: list[tuple[Any, str]] = []
+
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
 
@@ -95,15 +108,19 @@ def _instrument(self, **kwargs: Any):
             event_logger_provider=event_logger_provider,
         )
 
-        for client_class in _client_classes():
+        method_wrappers = MethodWrappers(
+            tracer, event_logger, is_content_enabled()
+        )
+        for client_class, method_name, wrapper in _methods_to_wrap(
+            method_wrappers
+        ):
             wrap_function_wrapper(
                 client_class,
-                name="generate_content",
-                wrapper=generate_content_create(
-                    tracer, event_logger, is_content_enabled()
-                ),
+                name=method_name,
+                wrapper=wrapper,
             )
+            self._methods_to_unwrap.append((client_class, method_name))
 
     def _uninstrument(self, **kwargs: Any) -> None:
-        for client_class in _client_classes():
-            unwrap(client_class, "generate_content")
+        for client_class, method_name in self._methods_to_unwrap:
+            unwrap(client_class, method_name)
@@ -161,10 +161,11 @@ def choice_event(
     https://github.com/open-telemetry/semantic-conventions/blob/v1.28.0/docs/gen-ai/gen-ai-events.md#event-gen_aichoice
     """
     body: dict[str, AnyValue] = {
-        "finish_reason": finish_reason,
         "index": index,
         "message": _asdict_filter_nulls(message),
     }
+    if finish_reason:
+        body["finish_reason"] = finish_reason
 
     tool_calls_list = [
         _asdict_filter_nulls(tool_call) for tool_call in tool_calls
 
@@ -14,10 +14,12 @@
 
 from __future__ import annotations
 
+from contextlib import contextmanager
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
+    Iterable,
     MutableSequence,
 )
 
@@ -87,17 +89,17 @@ def _extract_params(
     )
 
 
-def generate_content_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
-):
-    """Wrap the `generate_content` method of the `GenerativeModel` class to trace it."""
+class MethodWrappers:
+    def __init__(
+        self, tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    ) -> None:
+        self.tracer = tracer
+        self.event_logger = event_logger
+        self.capture_content = capture_content
 
-    def traced_method(
-        wrapped: Callable[
-            ...,
-            prediction_service.GenerateContentResponse
-            | prediction_service_v1beta1.GenerateContentResponse,
-        ],
+    @contextmanager
+    def _with_instrumentation(
+        self,
         instance: client.PredictionServiceClient
         | client_v1beta1.PredictionServiceClient,
         args: Any,
@@ -111,32 +113,80 @@ def traced_method(
         }
 
         span_name = get_span_name(span_attributes)
-        with tracer.start_as_current_span(
+
+        with self.tracer.start_as_current_span(
             name=span_name,
             kind=SpanKind.CLIENT,
             attributes=span_attributes,
         ) as span:
             for event in request_to_events(
-                params=params, capture_content=capture_content
+                params=params, capture_content=self.capture_content
             ):
-                event_logger.emit(event)
+                self.event_logger.emit(event)
 
             # TODO: set error.type attribute
             # https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md
-            response = wrapped(*args, **kwargs)
-            # TODO: handle streaming
-            # if is_streaming(kwargs):
-            #     return StreamWrapper(
-            #         result, span, event_logger, capture_content
-            #     )
-
-            if span.is_recording():
-                span.set_attributes(get_genai_response_attributes(response))
-            for event in response_to_events(
-                response=response, capture_content=capture_content
-            ):
-                event_logger.emit(event)
 
+            def handle_response(
+                response: prediction_service.GenerateContentResponse
+                | prediction_service_v1beta1.GenerateContentResponse,
+            ) -> None:
+                if span.is_recording():
+                    # When streaming, this is called multiple times so attributes would be
+                    # overwritten. In practice, it looks the API only returns the interesting
+                    # attributes on the last streamed response. However, I couldn't find
+                    # documentation for this and setting attributes shouldn't be too expensive.
+                    span.set_attributes(
+                        get_genai_response_attributes(response)
+                    )
+
+                for event in response_to_events(
+                    response=response, capture_content=self.capture_content
+                ):
+                    self.event_logger.emit(event)
+
+            yield handle_response
+
+    def generate_content(
+        self,
+        wrapped: Callable[
+            ...,
+            prediction_service.GenerateContentResponse
+            | prediction_service_v1beta1.GenerateContentResponse,
+        ],
+        instance: client.PredictionServiceClient
+        | client_v1beta1.PredictionServiceClient,
+        args: Any,
+        kwargs: Any,
+    ) -> (
+        prediction_service.GenerateContentResponse
+        | prediction_service_v1beta1.GenerateContentResponse
+    ):
+        with self._with_instrumentation(
+            instance, args, kwargs
+        ) as handle_response:
+            response = wrapped(*args, **kwargs)
+            handle_response(response)
             return response
 
-    return traced_method
+    def stream_generate_content(
+        self,
+        wrapped: Callable[
+            ...,
+            Iterable[prediction_service.GenerateContentResponse]
+            | Iterable[prediction_service_v1beta1.GenerateContentResponse],
+        ],
+        instance: client.PredictionServiceClient
+        | client_v1beta1.PredictionServiceClient,
+        args: Any,
+        kwargs: Any,
+    ) -> Iterable[
+        prediction_service.GenerateContentResponse
+        | prediction_service_v1beta1.GenerateContentResponse,
+    ]:
+        with self._with_instrumentation(
+            instance, args, kwargs
+        ) as handle_response:
+            for response in wrapped(*args, **kwargs):
+                handle_response(response)
+                yield response
@@ -330,10 +330,9 @@ def _map_finish_reason(
     | content_v1beta1.Candidate.FinishReason,
 ) -> FinishReason | str:
     EnumType = type(finish_reason)  # pylint: disable=invalid-name
-    if (
-        finish_reason is EnumType.FINISH_REASON_UNSPECIFIED
-        or finish_reason is EnumType.OTHER
-    ):
+    if finish_reason is EnumType.FINISH_REASON_UNSPECIFIED:
+        return ""
+    if finish_reason is EnumType.OTHER:
         return "error"
     if finish_reason is EnumType.STOP:
         return "stop"
 
@@ -0,0 +1,120 @@
+interactions:
+- request:
+    body: |-
+      {
+        "contents": [
+          {
+            "role": "user",
+            "parts": [
+              {
+                "text": "Get weather details in New Delhi and San Francisco?"
+              }
+            ]
+          }
+        ],
+        "tools": [
+          {
+            "functionDeclarations": [
+              {
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                  "type": 6,
+                  "properties": {
+                    "location": {
+                      "type": 1,
+                      "description": "The location for which to get the weather. It can be a city name, a city name and state, or a zip code. Examples: 'San Francisco', 'San Francisco, CA', '95616', etc."
+                    }
+                  },
+                  "propertyOrdering": [
+                    "location"
+                  ]
+                }
+              }
+            ]
+          }
+        ]
+      }
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '824'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://us-central1-aiplatform.googleapis.com/v1/projects/fake-project/locations/us-central1/publishers/google/models/gemini-1.5-flash-002:streamGenerateContent?%24alt=json%3Benum-encoding%3Dint
+  response:
+    body:
+      string: |-
+        [
+          {
+            "candidates": [
+              {
+                "content": {
+                  "role": "model",
+                  "parts": [
+                    {
+                      "functionCall": {
+                        "name": "get_current_weather",
+                        "args": {
+                          "location": "New Delhi"
+                        }
+                      }
+                    },
+                    {
+                      "functionCall": {
+                        "name": "get_current_weather",
+                        "args": {
+                          "location": "San Francisco"
+                        }
+                      }
+                    }
+                  ]
+                },
+                "finishReason": 1
+              }
+            ],
+            "usageMetadata": {
+              "promptTokenCount": 72,
+              "candidatesTokenCount": 16,
+              "totalTokenCount": 88,
+              "promptTokensDetails": [
+                {
+                  "modality": 1,
+                  "tokenCount": 72
+                }
+              ],
+              "candidatesTokensDetails": [
+                {
+                  "modality": 1,
+                  "tokenCount": 16
+                }
+              ]
+            },
+            "modelVersion": "gemini-1.5-flash-002",
+            "createTime": "2025-03-05T04:44:12.226326Z",
+            "responseId": "nNbHZ5boDZeTmecP49qwuQU"
+          }
+        ]
+    headers:
+      Content-Type:
+      - application/json; charset=UTF-8
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      content-length:
+      - '985'
+    status:
+      code: 200
+      message: OK
+version: 1