Skip to content

Commit cb82483

Browse files
authored
Make use of SPANDATA consistent (#4373)
The AI integrations sometimes used plain strings for setting `SPANDATA` attributes. Changed to always use `SPANDATA`.
1 parent ca5ba89 commit cb82483

File tree

10 files changed

+147
-82
lines changed

10 files changed

+147
-82
lines changed

sentry_sdk/ai/monitoring.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import inspect
22
from functools import wraps
33

4+
from sentry_sdk.consts import SPANDATA
45
import sentry_sdk.utils
56
from sentry_sdk import start_span
67
from sentry_sdk.tracing import Span
@@ -39,7 +40,7 @@ def sync_wrapped(*args, **kwargs):
3940
for k, v in kwargs.pop("sentry_data", {}).items():
4041
span.set_data(k, v)
4142
if curr_pipeline:
42-
span.set_data("ai.pipeline.name", curr_pipeline)
43+
span.set_data(SPANDATA.AI_PIPELINE_NAME, curr_pipeline)
4344
return f(*args, **kwargs)
4445
else:
4546
_ai_pipeline_name.set(description)
@@ -68,7 +69,7 @@ async def async_wrapped(*args, **kwargs):
6869
for k, v in kwargs.pop("sentry_data", {}).items():
6970
span.set_data(k, v)
7071
if curr_pipeline:
71-
span.set_data("ai.pipeline.name", curr_pipeline)
72+
span.set_data(SPANDATA.AI_PIPELINE_NAME, curr_pipeline)
7273
return await f(*args, **kwargs)
7374
else:
7475
_ai_pipeline_name.set(description)
@@ -100,7 +101,7 @@ def record_token_usage(
100101
# type: (Span, Optional[int], Optional[int], Optional[int]) -> None
101102
ai_pipeline_name = get_ai_pipeline_name()
102103
if ai_pipeline_name:
103-
span.set_data("ai.pipeline.name", ai_pipeline_name)
104+
span.set_data(SPANDATA.AI_PIPELINE_NAME, ai_pipeline_name)
104105
if prompt_tokens is not None:
105106
span.set_measurement("ai_prompt_tokens_used", value=prompt_tokens)
106107
if completion_tokens is not None:

sentry_sdk/consts.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ class SPANDATA:
187187
For an AI model call, the format of the response
188188
"""
189189

190-
AI_LOGIT_BIAS = "ai.response_format"
190+
AI_LOGIT_BIAS = "ai.logit_bias"
191191
"""
192192
For an AI model call, the logit bias
193193
"""
@@ -204,7 +204,6 @@ class SPANDATA:
204204
Minimize pre-processing done to the prompt sent to the LLM.
205205
Example: true
206206
"""
207-
208207
AI_RESPONSES = "ai.responses"
209208
"""
210209
The responses to an AI model call. Always as a list.
@@ -217,6 +216,66 @@ class SPANDATA:
217216
Example: 123.45
218217
"""
219218

219+
AI_CITATIONS = "ai.citations"
220+
"""
221+
References or sources cited by the AI model in its response.
222+
Example: ["Smith et al. 2020", "Jones 2019"]
223+
"""
224+
225+
AI_DOCUMENTS = "ai.documents"
226+
"""
227+
Documents or content chunks used as context for the AI model.
228+
Example: ["doc1.txt", "doc2.pdf"]
229+
"""
230+
231+
AI_SEARCH_QUERIES = "ai.search_queries"
232+
"""
233+
Queries used to search for relevant context or documents.
234+
Example: ["climate change effects", "renewable energy"]
235+
"""
236+
237+
AI_SEARCH_RESULTS = "ai.search_results"
238+
"""
239+
Results returned from search queries for context.
240+
Example: ["Result 1", "Result 2"]
241+
"""
242+
243+
AI_GENERATION_ID = "ai.generation_id"
244+
"""
245+
Unique identifier for the completion.
246+
Example: "gen_123abc"
247+
"""
248+
249+
AI_SEARCH_REQUIRED = "ai.is_search_required"
250+
"""
251+
Boolean indicating if the model needs to perform a search.
252+
Example: true
253+
"""
254+
255+
AI_FINISH_REASON = "ai.finish_reason"
256+
"""
257+
The reason why the model stopped generating.
258+
Example: "length"
259+
"""
260+
261+
AI_PIPELINE_NAME = "ai.pipeline.name"
262+
"""
263+
Name of the AI pipeline or chain being executed.
264+
Example: "qa-pipeline"
265+
"""
266+
267+
AI_TEXTS = "ai.texts"
268+
"""
269+
Raw text inputs provided to the model.
270+
Example: ["What is machine learning?"]
271+
"""
272+
273+
AI_WARNINGS = "ai.warnings"
274+
"""
275+
Warning messages generated during model execution.
276+
Example: ["Token limit exceeded"]
277+
"""
278+
220279
DB_NAME = "db.name"
221280
"""
222281
The name of the database being accessed. For commands that switch the database, this should be set to the target database (even if the command fails).

sentry_sdk/integrations/cohere.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,17 @@
5252
}
5353

5454
COLLECTED_CHAT_RESP_ATTRS = {
55-
"generation_id": "ai.generation_id",
56-
"is_search_required": "ai.is_search_required",
57-
"finish_reason": "ai.finish_reason",
55+
"generation_id": SPANDATA.AI_GENERATION_ID,
56+
"is_search_required": SPANDATA.AI_SEARCH_REQUIRED,
57+
"finish_reason": SPANDATA.AI_FINISH_REASON,
5858
}
5959

6060
COLLECTED_PII_CHAT_RESP_ATTRS = {
61-
"citations": "ai.citations",
62-
"documents": "ai.documents",
63-
"search_queries": "ai.search_queries",
64-
"search_results": "ai.search_results",
65-
"tool_calls": "ai.tool_calls",
61+
"citations": SPANDATA.AI_CITATIONS,
62+
"documents": SPANDATA.AI_DOCUMENTS,
63+
"search_queries": SPANDATA.AI_SEARCH_QUERIES,
64+
"search_results": SPANDATA.AI_SEARCH_RESULTS,
65+
"tool_calls": SPANDATA.AI_TOOL_CALLS,
6666
}
6767

6868

@@ -127,7 +127,7 @@ def collect_chat_response_fields(span, res, include_pii):
127127
)
128128

129129
if hasattr(res.meta, "warnings"):
130-
set_data_normalized(span, "ai.warnings", res.meta.warnings)
130+
set_data_normalized(span, SPANDATA.AI_WARNINGS, res.meta.warnings)
131131

132132
@wraps(f)
133133
def new_chat(*args, **kwargs):
@@ -238,7 +238,7 @@ def new_embed(*args, **kwargs):
238238
should_send_default_pii() and integration.include_prompts
239239
):
240240
if isinstance(kwargs["texts"], str):
241-
set_data_normalized(span, "ai.texts", [kwargs["texts"]])
241+
set_data_normalized(span, SPANDATA.AI_TEXTS, [kwargs["texts"]])
242242
elif (
243243
isinstance(kwargs["texts"], list)
244244
and len(kwargs["texts"]) > 0

sentry_sdk/integrations/huggingface_hub.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def new_text_generation(*args, **kwargs):
9797
if should_send_default_pii() and integration.include_prompts:
9898
set_data_normalized(
9999
span,
100-
"ai.responses",
100+
SPANDATA.AI_RESPONSES,
101101
[res],
102102
)
103103
span.__exit__(None, None, None)
@@ -107,7 +107,7 @@ def new_text_generation(*args, **kwargs):
107107
if should_send_default_pii() and integration.include_prompts:
108108
set_data_normalized(
109109
span,
110-
"ai.responses",
110+
SPANDATA.AI_RESPONSES,
111111
[res.generated_text],
112112
)
113113
if res.details is not None and res.details.generated_tokens > 0:

sentry_sdk/integrations/openai.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def _new_chat_completion_common(f, *args, **kwargs):
155155
if should_send_default_pii() and integration.include_prompts:
156156
set_data_normalized(
157157
span,
158-
"ai.responses",
158+
SPANDATA.AI_RESPONSES,
159159
list(map(lambda x: x.message, res.choices)),
160160
)
161161
_calculate_chat_completion_usage(
@@ -329,15 +329,15 @@ def _new_embeddings_create_common(f, *args, **kwargs):
329329
should_send_default_pii() and integration.include_prompts
330330
):
331331
if isinstance(kwargs["input"], str):
332-
set_data_normalized(span, "ai.input_messages", [kwargs["input"]])
332+
set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, [kwargs["input"]])
333333
elif (
334334
isinstance(kwargs["input"], list)
335335
and len(kwargs["input"]) > 0
336336
and isinstance(kwargs["input"][0], str)
337337
):
338-
set_data_normalized(span, "ai.input_messages", kwargs["input"])
338+
set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, kwargs["input"])
339339
if "model" in kwargs:
340-
set_data_normalized(span, "ai.model_id", kwargs["model"])
340+
set_data_normalized(span, SPANDATA.AI_MODEL_ID, kwargs["model"])
341341

342342
response = yield f, args, kwargs
343343

tests/integrations/anthropic/test_anthropic.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def test_nonstreaming_create_message(
128128
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
129129
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 20
130130
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
131-
assert span["data"]["ai.streaming"] is False
131+
assert span["data"][SPANDATA.AI_STREAMING] is False
132132

133133

134134
@pytest.mark.asyncio
@@ -196,7 +196,7 @@ async def test_nonstreaming_create_message_async(
196196
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
197197
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 20
198198
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
199-
assert span["data"]["ai.streaming"] is False
199+
assert span["data"][SPANDATA.AI_STREAMING] is False
200200

201201

202202
@pytest.mark.parametrize(
@@ -296,7 +296,7 @@ def test_streaming_create_message(
296296
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
297297
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 30
298298
assert span["measurements"]["ai_total_tokens_used"]["value"] == 40
299-
assert span["data"]["ai.streaming"] is True
299+
assert span["data"][SPANDATA.AI_STREAMING] is True
300300

301301

302302
@pytest.mark.asyncio
@@ -399,7 +399,7 @@ async def test_streaming_create_message_async(
399399
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
400400
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 30
401401
assert span["measurements"]["ai_total_tokens_used"]["value"] == 40
402-
assert span["data"]["ai.streaming"] is True
402+
assert span["data"][SPANDATA.AI_STREAMING] is True
403403

404404

405405
@pytest.mark.skipif(
@@ -528,7 +528,7 @@ def test_streaming_create_message_with_input_json_delta(
528528
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 366
529529
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 51
530530
assert span["measurements"]["ai_total_tokens_used"]["value"] == 417
531-
assert span["data"]["ai.streaming"] is True
531+
assert span["data"][SPANDATA.AI_STREAMING] is True
532532

533533

534534
@pytest.mark.asyncio
@@ -665,7 +665,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
665665
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 366
666666
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 51
667667
assert span["measurements"]["ai_total_tokens_used"]["value"] == 417
668-
assert span["data"]["ai.streaming"] is True
668+
assert span["data"][SPANDATA.AI_STREAMING] is True
669669

670670

671671
def test_exception_message_create(sentry_init, capture_events):
@@ -810,7 +810,7 @@ def test_add_ai_data_to_span_with_input_json_delta(sentry_init):
810810
assert span._data.get(SPANDATA.AI_RESPONSES) == [
811811
{"type": "text", "text": "{'test': 'data','more': 'json'}"}
812812
]
813-
assert span._data.get("ai.streaming") is True
813+
assert span._data.get(SPANDATA.AI_STREAMING) is True
814814
assert span._measurements.get("ai_prompt_tokens_used")["value"] == 10
815815
assert span._measurements.get("ai_completion_tokens_used")["value"] == 20
816816
assert span._measurements.get("ai_total_tokens_used")["value"] == 30

tests/integrations/cohere/test_cohere.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from cohere import Client, ChatMessage
66

77
from sentry_sdk import start_transaction
8+
from sentry_sdk.consts import SPANDATA
89
from sentry_sdk.integrations.cohere import CohereIntegration
910

1011
from unittest import mock # python 3.3 and above
@@ -53,15 +54,15 @@ def test_nonstreaming_chat(
5354
assert tx["type"] == "transaction"
5455
span = tx["spans"][0]
5556
assert span["op"] == "ai.chat_completions.create.cohere"
56-
assert span["data"]["ai.model_id"] == "some-model"
57+
assert span["data"][SPANDATA.AI_MODEL_ID] == "some-model"
5758

5859
if send_default_pii and include_prompts:
59-
assert "some context" in span["data"]["ai.input_messages"][0]["content"]
60-
assert "hello" in span["data"]["ai.input_messages"][1]["content"]
61-
assert "the model response" in span["data"]["ai.responses"]
60+
assert "some context" in span["data"][SPANDATA.AI_INPUT_MESSAGES][0]["content"]
61+
assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES][1]["content"]
62+
assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
6263
else:
63-
assert "ai.input_messages" not in span["data"]
64-
assert "ai.responses" not in span["data"]
64+
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
65+
assert SPANDATA.AI_RESPONSES not in span["data"]
6566

6667
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
6768
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
@@ -124,15 +125,15 @@ def test_streaming_chat(sentry_init, capture_events, send_default_pii, include_p
124125
assert tx["type"] == "transaction"
125126
span = tx["spans"][0]
126127
assert span["op"] == "ai.chat_completions.create.cohere"
127-
assert span["data"]["ai.model_id"] == "some-model"
128+
assert span["data"][SPANDATA.AI_MODEL_ID] == "some-model"
128129

129130
if send_default_pii and include_prompts:
130-
assert "some context" in span["data"]["ai.input_messages"][0]["content"]
131-
assert "hello" in span["data"]["ai.input_messages"][1]["content"]
132-
assert "the model response" in span["data"]["ai.responses"]
131+
assert "some context" in span["data"][SPANDATA.AI_INPUT_MESSAGES][0]["content"]
132+
assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES][1]["content"]
133+
assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
133134
else:
134-
assert "ai.input_messages" not in span["data"]
135-
assert "ai.responses" not in span["data"]
135+
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
136+
assert SPANDATA.AI_RESPONSES not in span["data"]
136137

137138
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
138139
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
@@ -194,9 +195,9 @@ def test_embed(sentry_init, capture_events, send_default_pii, include_prompts):
194195
span = tx["spans"][0]
195196
assert span["op"] == "ai.embeddings.create.cohere"
196197
if send_default_pii and include_prompts:
197-
assert "hello" in span["data"]["ai.input_messages"]
198+
assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
198199
else:
199-
assert "ai.input_messages" not in span["data"]
200+
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
200201

201202
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
202203
assert span["measurements"]["ai_total_tokens_used"]["value"] == 10

tests/integrations/huggingface_hub/test_huggingface_hub.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from huggingface_hub.errors import OverloadedError
99

1010
from sentry_sdk import start_transaction
11+
from sentry_sdk.consts import SPANDATA
1112
from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
1213

1314

@@ -67,11 +68,11 @@ def test_nonstreaming_chat_completion(
6768
assert span["op"] == "ai.chat_completions.create.huggingface_hub"
6869

6970
if send_default_pii and include_prompts:
70-
assert "hello" in span["data"]["ai.input_messages"]
71-
assert "the model response" in span["data"]["ai.responses"]
71+
assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
72+
assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
7273
else:
73-
assert "ai.input_messages" not in span["data"]
74-
assert "ai.responses" not in span["data"]
74+
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
75+
assert SPANDATA.AI_RESPONSES not in span["data"]
7576

7677
if details_arg:
7778
assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
@@ -126,11 +127,11 @@ def test_streaming_chat_completion(
126127
assert span["op"] == "ai.chat_completions.create.huggingface_hub"
127128

128129
if send_default_pii and include_prompts:
129-
assert "hello" in span["data"]["ai.input_messages"]
130-
assert "the model response" in span["data"]["ai.responses"]
130+
assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
131+
assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
131132
else:
132-
assert "ai.input_messages" not in span["data"]
133-
assert "ai.responses" not in span["data"]
133+
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
134+
assert SPANDATA.AI_RESPONSES not in span["data"]
134135

135136
if details_arg:
136137
assert span["measurements"]["ai_total_tokens_used"]["value"] == 10

0 commit comments

Comments
 (0)