open-telemetry · lzchen · Jan 15, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 10, 2025
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add example to `opentelemetry-instrumentation-openai-v2`
   ([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
 - Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
+- Add metrics to the Python OpenAI instrumentation ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))
 
 ## Version 2.0b0 (2024-11-08)
 

@@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
    :target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/
 
 This library allows tracing LLM requests and logging of messages made by the
-`OpenAI Python API library <https://pypi.org/project/openai/>`_.
+`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures
+the duration of the operations and the number of tokens used as metrics.
 
 
 Installation

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/.env b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/.env
@@ -0,0 +1,14 @@
+# Update this with your real OpenAI API key
+OPENAI_API_KEY=sk-YOUR_API_KEY
+
+# Uncomment to use Ollama instead of OpenAI
+# OPENAI_BASE_URL=http://localhost:11434/v1
+# OPENAI_API_KEY=unused
+# CHAT_MODEL=qwen2.5:0.5b
+
+# Uncomment and change to your OTLP endpoint
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
+# OTEL_EXPORTER_OTLP_PROTOCOL=grpc
+
+OTEL_SERVICE_NAME=opentelemetry-python-openai
+
diff --git a/...ation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/README.rst b/...ation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/README.rst
@@ -0,0 +1,38 @@
+OpenTelemetry OpenAI Instrumentation Example
+============================================
+
+This is an example of how to instrument OpenAI calls when configuring OpenTelemetry SDK and Instrumentations manually for metrics.
+
+When `main.py <main.py>`_ is run, it exports metrics to an OTLP compatible endpoint. Metrics include details such as token usage and operation duration, with specific bucket boundaries for each metric.
+
+The bucket boundaries are defined as follows:
+
+- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
+- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]
+
+These are documented in the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.
+
+Setup
+-----
+
+Minimally, update the `.env <.env>`_ file with your "OPENAI_API_KEY". An OTLP compatible endpoint should be listening for metrics on http://localhost:4317. If not, update "OTEL_EXPORTER_OTLP_ENDPOINT" as well.
+
+Next, set up a virtual environment like this:
+
+::
+
+    python3 -m venv .venv
+    source .venv/bin/activate
+    pip install "python-dotenv[cli]"
+    pip install -r requirements.txt
+
+Run
+---
+
+Run the example like this:
+
+::
+
+    dotenv run -- python main.py
+
+You should see metrics being exported to your configured observability tool, with the specified bucket boundaries for token usage and operation duration.
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/main.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/main.py
@@ -0,0 +1,92 @@
+import os
+
+from openai import OpenAI
+
+from opentelemetry import metrics
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
+    OTLPMetricExporter,
+)
+from opentelemetry.instrumentation.openai_v2 import OpenAIInstrumentor
+from opentelemetry.sdk.metrics import Histogram, MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.sdk.metrics.view import (
+    ExplicitBucketHistogramAggregation,
+    View,
+)
+
+# configure metrics
+metric_exporter = OTLPMetricExporter()
+metric_reader = PeriodicExportingMetricReader(metric_exporter)
+
+TokenUsageHistogramView = View(
+    instrument_type=Histogram,
+    instrument_name="gen_ai.client.token.usage",
+    aggregation=ExplicitBucketHistogramAggregation(
+        boundaries=[
+            1,
+            4,
+            16,
+            64,
+            256,
+            1024,
+            4096,
+            16384,
+            65536,
+            262144,
+            1048576,
+            4194304,
+            16777216,
+            67108864,
+        ]
+    ),
+)
+
+DurationHistogramView = View(
+    instrument_type=Histogram,
+    instrument_name="gen_ai.client.operation.duration",
+    aggregation=ExplicitBucketHistogramAggregation(
+        boundaries=[
+            0.01,
+            0.02,
+            0.04,
+            0.08,
+            0.16,
+            0.32,
+            0.64,
+            1.28,
+            2.56,
+            5.12,
+            10.24,
+            20.48,
+            40.96,
+            81.92,
+        ]
+    ),
+)
+
+meter_provider = MeterProvider(
+    metric_readers=[metric_reader],
+    views=[TokenUsageHistogramView, DurationHistogramView],
+)
+metrics.set_meter_provider(meter_provider)
+
+# instrument OpenAI
+OpenAIInstrumentor().instrument(meter_provider=meter_provider)
+
+
+def main():
+    client = OpenAI()
+    chat_completion = client.chat.completions.create(
+        model=os.getenv("CHAT_MODEL", "gpt-4o-mini"),
+        messages=[
+            {
+                "role": "user",
+                "content": "Write a short poem on OpenTelemetry.",
+            },
+        ],
+    )
+    print(chat_completion.choices[0].message.content)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/...mentation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/requirements.txt b/...mentation-genai/opentelemetry-instrumentation-openai-v2/examples/buckets/requirements.txt
@@ -0,0 +1,5 @@
+openai~=1.57.3
+
+opentelemetry-sdk~=1.29.0
+opentelemetry-exporter-otlp-proto-grpc~=1.29.0
+opentelemetry-instrumentation-openai-v2~=2.0b0
@@ -49,13 +49,18 @@
 from opentelemetry.instrumentation.openai_v2.package import _instruments
 from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
 from opentelemetry.instrumentation.utils import unwrap
+from opentelemetry.metrics import get_meter
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 
+from .meters import Meters
 from .patch import async_chat_completions_create, chat_completions_create
 
 
 class OpenAIInstrumentor(BaseInstrumentor):
+    def __init__(self):
+        self._meter = None
+
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
 
@@ -75,20 +80,29 @@ def _instrument(self, **kwargs):
             schema_url=Schemas.V1_28_0.value,
             event_logger_provider=event_logger_provider,
         )
+        meter_provider = kwargs.get("meter_provider")
+        self._meter = get_meter(
+            __name__,
+            "",
+            meter_provider,
+            schema_url=Schemas.V1_28_0.value,
+        )
+
+        meters = Meters(self._meter)
 
         wrap_function_wrapper(
             module="openai.resources.chat.completions",
             name="Completions.create",
             wrapper=chat_completions_create(
-                tracer, event_logger, is_content_enabled()
+                tracer, event_logger, meters, is_content_enabled()
             ),
         )
 
         wrap_function_wrapper(
             module="openai.resources.chat.completions",
             name="AsyncCompletions.create",
             wrapper=async_chat_completions_create(
-                tracer, event_logger, is_content_enabled()
+                tracer, event_logger, meters, is_content_enabled()
             ),
         )
 

diff --git a/...telemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/meters.py b/...telemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/meters.py
@@ -0,0 +1,11 @@
+from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
+
+
+class Meters:
+    def __init__(self, meter):
+        self.operation_duration_histogram = (
+            gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
+        )
+        self.token_usage_histogram = (
+            gen_ai_metrics.create_gen_ai_client_token_usage(meter)
+        )
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+from timeit import default_timer
 from typing import Optional
 
 from openai import Stream
@@ -23,6 +24,7 @@
 )
 from opentelemetry.trace import Span, SpanKind, Tracer
 
+from .meters import Meters  # Import the Meters class
 from .utils import (
     choice_to_event,
     get_llm_request_attributes,
@@ -34,7 +36,10 @@
 
 
 def chat_completions_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    tracer: Tracer,
+    event_logger: EventLogger,
+    meters: Meters,
+    capture_content: bool,
 ):
     """Wrap the `create` method of the `ChatCompletion` class to trace it."""
 
@@ -54,6 +59,8 @@ def traced_method(wrapped, instance, args, kwargs):
                         message_to_event(message, capture_content)
                     )
 
+            start = default_timer()
+            result = None
             try:
                 result = wrapped(*args, **kwargs)
                 if is_streaming(kwargs):
@@ -71,12 +78,23 @@ def traced_method(wrapped, instance, args, kwargs):
             except Exception as error:
                 handle_span_exception(span, error)
                 raise
+            finally:
+                duration = max((default_timer() - start), 0)
+                _record_metrics(
+                    meters,
+                    duration,
+                    result,
+                    span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL],
+                )
 
     return traced_method
 
 
 def async_chat_completions_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    tracer: Tracer,
+    event_logger: EventLogger,
+    meters: Meters,
+    capture_content: bool,
 ):
     """Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""
 
@@ -96,6 +114,8 @@ async def traced_method(wrapped, instance, args, kwargs):
                         message_to_event(message, capture_content)
                     )
 
+            start = default_timer()
+            result = None
             try:
                 result = await wrapped(*args, **kwargs)
                 if is_streaming(kwargs):
@@ -113,10 +133,55 @@ async def traced_method(wrapped, instance, args, kwargs):
             except Exception as error:
                 handle_span_exception(span, error)
                 raise
+            finally:
+                duration = max((default_timer() - start), 0)
+                _record_metrics(
+                    meters,
+                    duration,
+                    result,
+                    span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL],
+                )
 
     return traced_method
 
 
+def _record_metrics(
+    meters: Meters, duration: float, result, request_model: str
+):
+    common_attributes = {
+        GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
+        GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
+        GenAIAttributes.GEN_AI_REQUEST_MODEL: request_model,
+    }
+
+    if result and getattr(result, "model", None):
+        common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model
+
+    meters.operation_duration_histogram.record(
+        duration,
+        attributes=common_attributes,
+    )
+
+    if result and getattr(result, "usage", None):
+        input_attributes = {
+            **common_attributes,
+            GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
+        }
+        meters.token_usage_histogram.record(
+            result.usage.prompt_tokens,
+            attributes=input_attributes,
+        )
+
+        completion_attributes = {
+            **common_attributes,
+            GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
+        }
+        meters.token_usage_histogram.record(
+            result.usage.completion_tokens,
+            attributes=completion_attributes,
+        )
+
+
 def _set_response_attributes(
     span, result, event_logger: EventLogger, capture_content: bool
 ):