more minor fixes

ezhang6811 · ezhang6811 · commit ab17f7c9fa04 · 2025-04-18T00:41:26.000-07:00
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py
@@ -31,9 +31,9 @@
     ConverseStreamWrapper,
     InvokeModelWithResponseStreamWrapper,
     _Choice,
+    estimate_token_count,
     genai_capture_message_content,
     message_to_event,
-    estimate_token_count,
 )
 from opentelemetry.instrumentation.botocore.extensions.types import (
     _AttributeMapT,
@@ -106,6 +106,7 @@
 
 _MODEL_ID_KEY: str = "modelId"
 
+
 class _BedrockRuntimeExtension(_AwsSdkExtension):
     """
     This class is an extension for <a
@@ -255,7 +256,9 @@ def _extract_titan_attributes(self, attributes, request_body):
             attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount")
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_STOP_SEQUENCES, config.get("stopSequences")
+            attributes,
+            GEN_AI_REQUEST_STOP_SEQUENCES,
+            config.get("stopSequences"),
         )
 
     def _extract_nova_attributes(self, attributes, request_body):
@@ -270,21 +273,29 @@ def _extract_nova_attributes(self, attributes, request_body):
             attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens")
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_STOP_SEQUENCES, config.get("stopSequences")
+            attributes,
+            GEN_AI_REQUEST_STOP_SEQUENCES,
+            config.get("stopSequences"),
         )
 
     def _extract_claude_attributes(self, attributes, request_body):
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
+            attributes,
+            GEN_AI_REQUEST_MAX_TOKENS,
+            request_body.get("max_tokens"),
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")
+            attributes,
+            GEN_AI_REQUEST_TEMPERATURE,
+            request_body.get("temperature"),
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_STOP_SEQUENCES, request_body.get("stop_sequences")
+            attributes,
+            GEN_AI_REQUEST_STOP_SEQUENCES,
+            request_body.get("stop_sequences"),
         )
 
     def _extract_command_r_attributes(self, attributes, request_body):
@@ -293,16 +304,22 @@ def _extract_command_r_attributes(self, attributes, request_body):
             attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
+            attributes,
+            GEN_AI_REQUEST_MAX_TOKENS,
+            request_body.get("max_tokens"),
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")
+            attributes,
+            GEN_AI_REQUEST_TEMPERATURE,
+            request_body.get("temperature"),
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p")
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_STOP_SEQUENCES, request_body.get("stop_sequences")
+            attributes,
+            GEN_AI_REQUEST_STOP_SEQUENCES,
+            request_body.get("stop_sequences"),
         )
 
     def _extract_command_attributes(self, attributes, request_body):
@@ -311,24 +328,34 @@ def _extract_command_attributes(self, attributes, request_body):
             attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
+            attributes,
+            GEN_AI_REQUEST_MAX_TOKENS,
+            request_body.get("max_tokens"),
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")
+            attributes,
+            GEN_AI_REQUEST_TEMPERATURE,
+            request_body.get("temperature"),
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p")
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_STOP_SEQUENCES, request_body.get("stop_sequences")
+            attributes,
+            GEN_AI_REQUEST_STOP_SEQUENCES,
+            request_body.get("stop_sequences"),
         )
 
     def _extract_llama_attributes(self, attributes, request_body):
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len")
+            attributes,
+            GEN_AI_REQUEST_MAX_TOKENS,
+            request_body.get("max_gen_len"),
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")
+            attributes,
+            GEN_AI_REQUEST_TEMPERATURE,
+            request_body.get("temperature"),
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
@@ -339,13 +366,19 @@ def _extract_mistral_attributes(self, attributes, request_body):
         prompt = request_body.get("prompt")
         if prompt:
             self._set_if_not_none(
-                attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
+                attributes,
+                GEN_AI_USAGE_INPUT_TOKENS,
+                estimate_token_count(prompt),
             )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
+            attributes,
+            GEN_AI_REQUEST_MAX_TOKENS,
+            request_body.get("max_tokens"),
         )
         self._set_if_not_none(
-            attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature")
+            attributes,
+            GEN_AI_REQUEST_TEMPERATURE,
+            request_body.get("temperature"),
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p")
@@ -361,7 +394,6 @@ def _set_if_not_none(attributes, key, value):
 
     def _get_request_messages(self):
         """Extracts and normalize system and user / assistant messages"""
-        input_text = None
         if system := self._call_context.params.get("system", []):
             system_messages = [{"role": "system", "content": system}]
         else:
@@ -390,20 +422,23 @@ def _get_request_messages(self):
                         messages = self._get_messages_from_input_text(
                             decoded_body, "message"
                         )
-                    elif "cohere.command" in model_id or "meta.llama" in model_id or "mistral.mistral" in model_id:
+                    elif (
+                        "cohere.command" in model_id
+                        or "meta.llama" in model_id
+                        or "mistral.mistral" in model_id
+                    ):
                         messages = self._get_messages_from_input_text(
                             decoded_body, "prompt"
                         )
 
         return system_messages + messages
 
+    # pylint: disable=no-self-use
     def _get_messages_from_input_text(
         self, decoded_body: dict[str, Any], input_name: str
     ):
         if input_text := decoded_body.get(input_name):
-            return [
-                {"role": "user", "content": [{"text": input_text}]}
-            ]
+            return [{"role": "user", "content": [{"text": input_text}]}]
         return []
 
     def before_service_call(
@@ -843,11 +878,13 @@ def _handle_cohere_command_r_response(
     ):
         if "text" in response_body:
             span.set_attribute(
-                GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(response_body["text"])
+                GEN_AI_USAGE_OUTPUT_TOKENS,
+                estimate_token_count(response_body["text"]),
             )
         if "finish_reason" in response_body:
             span.set_attribute(
-                GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]]
+                GEN_AI_RESPONSE_FINISH_REASONS,
+                [response_body["finish_reason"]],
             )
 
         event_logger = instrumentor_context.event_logger
@@ -867,11 +904,13 @@ def _handle_cohere_command_response(
             generations = response_body["generations"][0]
             if "text" in generations:
                 span.set_attribute(
-                    GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(generations["text"])
+                    GEN_AI_USAGE_OUTPUT_TOKENS,
+                    estimate_token_count(generations["text"]),
                 )
             if "finish_reason" in generations:
                 span.set_attribute(
-                    GEN_AI_RESPONSE_FINISH_REASONS, [generations["finish_reason"]]
+                    GEN_AI_RESPONSE_FINISH_REASONS,
+                    [generations["finish_reason"]],
                 )
 
         event_logger = instrumentor_context.event_logger
@@ -893,17 +932,16 @@ def _handle_meta_llama_response(
             )
         if "generation_token_count" in response_body:
             span.set_attribute(
-                GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"],
+                GEN_AI_USAGE_OUTPUT_TOKENS,
+                response_body["generation_token_count"],
             )
         if "stop_reason" in response_body:
             span.set_attribute(
                 GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]
             )
 
         event_logger = instrumentor_context.event_logger
-        choice = _Choice.from_invoke_meta_llama(
-            response_body, capture_content
-        )
+        choice = _Choice.from_invoke_meta_llama(response_body, capture_content)
         event_logger.emit(choice.to_choice_event())
 
     def _handle_mistral_ai_response(
@@ -916,9 +954,14 @@ def _handle_mistral_ai_response(
         if "outputs" in response_body:
             outputs = response_body["outputs"][0]
             if "text" in outputs:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(outputs["text"]))
+                span.set_attribute(
+                    GEN_AI_USAGE_OUTPUT_TOKENS,
+                    estimate_token_count(outputs["text"]),
+                )
             if "stop_reason" in outputs:
-                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
+                span.set_attribute(
+                    GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]]
+                )
 
         event_logger = instrumentor_context.event_logger
         choice = _Choice.from_invoke_mistral_mistral(
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py
@@ -358,9 +358,8 @@ def _process_anthropic_claude_chunk(self, chunk):
             self._stream_done_callback(self._response)
             return
 
-def estimate_token_count(
-    message: str
-) -> int:
+
+def estimate_token_count(message: str) -> int:
     # https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
     # use 6 chars per token to approximate token count when not provided in response body
     return math.ceil(len(message) / 6)
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py
@@ -1178,15 +1178,15 @@ def set_if_not_none(config, key, value):
             "max_tokens": max_tokens,
             "temperature": temperature,
             "p": top_p,
-            "stop_sequences": stop_sequences
+            "stop_sequences": stop_sequences,
         }
     elif "cohere.command" in llm_model:
         body = {
             "prompt": prompt,
             "max_tokens": max_tokens,
             "temperature": temperature,
             "p": top_p,
-            "stop_sequences": stop_sequences
+            "stop_sequences": stop_sequences,
         }
     elif "meta.llama" in llm_model:
         body = {
@@ -1201,7 +1201,7 @@ def set_if_not_none(config, key, value):
             "max_tokens": max_tokens,
             "temperature": temperature,
             "top_p": top_p,
-            "stop": stop_sequences
+            "stop": stop_sequences,
         }
     else:
         raise ValueError(f"No config for {llm_model}")
@@ -1224,7 +1224,15 @@ def get_model_name_from_family(llm_model):
 
 @pytest.mark.parametrize(
     "model_family",
-    ["amazon.nova", "amazon.titan", "anthropic.claude", "cohere.command-r", "cohere.command", "meta.llama", "mistral.mistral"],
+    [
+        "amazon.nova",
+        "amazon.titan",
+        "anthropic.claude",
+        "cohere.command-r",
+        "cohere.command",
+        "meta.llama",
+        "mistral.mistral",
+    ],
 )
 @pytest.mark.vcr()
 def test_invoke_model_with_content(
@@ -1236,7 +1244,12 @@ def test_invoke_model_with_content(
 ):
     # pylint:disable=too-many-locals
     llm_model_value = get_model_name_from_family(model_family)
-    max_tokens, temperature, top_p, stop_sequences = 10, 0.8, 0.99 if model_family == "cohere.command-r" else 1, ["|"]
+    max_tokens, temperature, top_p, stop_sequences = (
+        10,
+        0.8,
+        0.99 if model_family == "cohere.command-r" else 1,
+        ["|"],
+    )
     body = get_invoke_model_body(
         llm_model_value, max_tokens, temperature, top_p, stop_sequences
     )
@@ -1284,7 +1297,7 @@ def test_invoke_model_with_content(
         finish_reason = "max_tokens"
     elif model_family == "cohere.command-r":
         message = {
-            "content":  "This is a test. How are you doing today",
+            "content": "This is a test. How are you doing today",
         }
         finish_reason = "MAX_TOKENS"
     elif model_family == "cohere.command":
@@ -1835,7 +1848,15 @@ def test_invoke_model_with_content_tool_call(
 
 @pytest.mark.parametrize(
     "model_family",
-    ["amazon.nova", "amazon.titan", "anthropic.claude", "cohere.command-r", "cohere.command", "meta.llama", "mistral.mistral"],
+    [
+        "amazon.nova",
+        "amazon.titan",
+        "anthropic.claude",
+        "cohere.command-r",
+        "cohere.command",
+        "meta.llama",
+        "mistral.mistral",
+    ],
 )
 @pytest.mark.vcr()
 def test_invoke_model_no_content(
@@ -1847,7 +1868,12 @@ def test_invoke_model_no_content(
 ):
     # pylint:disable=too-many-locals
     llm_model_value = get_model_name_from_family(model_family)
-    max_tokens, temperature, top_p, stop_sequences = 10, 0.8, 0.99 if model_family == "cohere.command-r" else 1, ["|"]
+    max_tokens, temperature, top_p, stop_sequences = (
+        10,
+        0.8,
+        0.99 if model_family == "cohere.command-r" else 1,
+        ["|"],
+    )
     body = get_invoke_model_body(
         llm_model_value, max_tokens, temperature, top_p, stop_sequences
     )