Skip to content

feat(spans): track and report spans that were dropped #4005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 4, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 81 additions & 3 deletions sentry_sdk/_types.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,88 @@
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, TypeVar, Union


# Re-exported for compat, since code out there in the wild might use this variable.
MYPY = TYPE_CHECKING


SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"


class AnnotatedValue:
"""
Meta information for a data field in the event payload.
This is to tell Relay that we have tampered with the fields value.
See:
https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423
"""

__slots__ = ("value", "metadata")

def __init__(self, value, metadata):
# type: (Optional[Any], Dict[str, Any]) -> None
self.value = value
self.metadata = metadata

def __eq__(self, other):
# type: (Any) -> bool
if not isinstance(other, AnnotatedValue):
return False

return self.value == other.value and self.metadata == other.metadata

@classmethod
def removed_because_raw_data(cls):
# type: () -> AnnotatedValue
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
return AnnotatedValue(
value="",
metadata={
"rem": [ # Remark
[
"!raw", # Unparsable raw data
"x", # The fields original value was removed
]
]
},
)

@classmethod
def removed_because_over_size_limit(cls):
# type: () -> AnnotatedValue
"""The actual value was removed because the size of the field exceeded the configured maximum size (specified with the max_request_body_size sdk option)"""
return AnnotatedValue(
value="",
metadata={
"rem": [ # Remark
[
"!config", # Because of configured maximum size
"x", # The fields original value was removed
]
]
},
)

@classmethod
def substituted_because_contains_sensitive_data(cls):
# type: () -> AnnotatedValue
"""The actual value was removed because it contained sensitive information."""
return AnnotatedValue(
value=SENSITIVE_DATA_SUBSTITUTE,
metadata={
"rem": [ # Remark
[
"!config", # Because of SDK configuration (in this case the config is the hard coded removal of certain django cookies)
"s", # The fields original value was substituted
]
]
},
)


T = TypeVar("T")
Annotated = Union[AnnotatedValue, T]


if TYPE_CHECKING:
from collections.abc import Container, MutableMapping, Sequence

@@ -19,7 +97,6 @@
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union
from typing_extensions import Literal, TypedDict

class SDKInfo(TypedDict):
@@ -101,7 +178,7 @@ class SDKInfo(TypedDict):
"request": dict[str, object],
"sdk": Mapping[str, object],
"server_name": str,
"spans": list[dict[str, object]],
"spans": Annotated[list[dict[str, object]]],
"stacktrace": dict[
str, object
], # We access this key in the code, but I am unsure whether we ever set it
@@ -118,6 +195,7 @@ class SDKInfo(TypedDict):
"transaction_info": Mapping[str, Any], # TODO: We can expand on this type
"type": Literal["check_in", "transaction"],
"user": dict[str, object],
"_dropped_spans": int,
"_metrics_summary": dict[str, object],
},
total=False,
26 changes: 18 additions & 8 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
@@ -5,11 +5,12 @@
from collections.abc import Mapping
from datetime import datetime, timezone
from importlib import import_module
from typing import cast, overload
from typing import TYPE_CHECKING, List, Dict, cast, overload
import warnings

from sentry_sdk._compat import PY37, check_uwsgi_thread_support
from sentry_sdk.utils import (
AnnotatedValue,
ContextVar,
capture_internal_exceptions,
current_stacktrace,
@@ -45,12 +46,9 @@
from sentry_sdk.monitor import Monitor
from sentry_sdk.spotlight import setup_spotlight

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any
from typing import Callable
from typing import Dict
from typing import Optional
from typing import Sequence
from typing import Type
@@ -483,12 +481,14 @@ def _prepare_event(
):
# type: (...) -> Optional[Event]

previous_total_spans = None # type: Optional[int]

if event.get("timestamp") is None:
event["timestamp"] = datetime.now(timezone.utc)

if scope is not None:
is_transaction = event.get("type") == "transaction"
spans_before = len(event.get("spans", []))
spans_before = len(cast(List[Dict[str, object]], event.get("spans", [])))
event_ = scope.apply_to_event(event, hint, self.options)

# one of the event/error processors returned None
@@ -507,13 +507,18 @@ def _prepare_event(
return None

event = event_

spans_delta = spans_before - len(event.get("spans", []))
spans_delta = spans_before - len(
cast(List[Dict[str, object]], event.get("spans", []))
)
if is_transaction and spans_delta > 0 and self.transport is not None:
self.transport.record_lost_event(
"event_processor", data_category="span", quantity=spans_delta
)

dropped_spans = event.pop("_dropped_spans", 0) + spans_delta # type: int
if dropped_spans > 0:
previous_total_spans = spans_before + dropped_spans

if (
self.options["attach_stacktrace"]
and "exception" not in event
@@ -561,6 +566,11 @@ def _prepare_event(
if event_scrubber:
event_scrubber.scrub_event(event)

if previous_total_spans is not None:
event["spans"] = AnnotatedValue(
event.get("spans", []), {"len": previous_total_spans}
)

# Postprocess the event here so that annotated types do
# generally not surface in before_send
if event is not None:
@@ -598,7 +608,7 @@ def _prepare_event(
and event.get("type") == "transaction"
):
new_event = None
spans_before = len(event.get("spans", []))
spans_before = len(cast(List[Dict[str, object]], event.get("spans", [])))
with capture_internal_exceptions():
new_event = before_send_transaction(event, hint or {})
if new_event is None:
5 changes: 2 additions & 3 deletions sentry_sdk/scrubber.py
Original file line number Diff line number Diff line change
@@ -4,11 +4,10 @@
iter_event_frames,
)

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast, List, Dict

if TYPE_CHECKING:
from sentry_sdk._types import Event
from typing import List
from typing import Optional


@@ -161,7 +160,7 @@ def scrub_spans(self, event):
# type: (Event) -> None
with capture_internal_exceptions():
if "spans" in event:
for span in event["spans"]:
for span in cast(List[Dict[str, object]], event["spans"]):
if "data" in span:
self.scrub_dict(span["data"])

10 changes: 9 additions & 1 deletion sentry_sdk/tracing.py
Original file line number Diff line number Diff line change
@@ -193,7 +193,7 @@ def get_span_status_from_http_code(http_status_code):
class _SpanRecorder:
"""Limits the number of spans recorded in a transaction."""

__slots__ = ("maxlen", "spans")
__slots__ = ("maxlen", "spans", "dropped_spans")

def __init__(self, maxlen):
# type: (int) -> None
@@ -204,11 +204,13 @@ def __init__(self, maxlen):
# limits: either transaction+spans or only child spans.
self.maxlen = maxlen - 1
self.spans = [] # type: List[Span]
self.dropped_spans = 0 # type: int

def add(self, span):
# type: (Span) -> None
if len(self.spans) > self.maxlen:
span._span_recorder = None
self.dropped_spans += 1
else:
self.spans.append(span)

@@ -972,6 +974,9 @@ def finish(
if span.timestamp is not None
]

len_diff = len(self._span_recorder.spans) - len(finished_spans)
dropped_spans = len_diff + self._span_recorder.dropped_spans

# we do this to break the circular reference of transaction -> span
# recorder -> span -> containing transaction (which is where we started)
# before either the spans or the transaction goes out of scope and has
@@ -996,6 +1001,9 @@ def finish(
"spans": finished_spans,
} # type: Event

if dropped_spans > 0:
event["_dropped_spans"] = dropped_spans

if self._profile is not None and self._profile.valid():
event["profile"] = self._profile
self._profile = None
8 changes: 4 additions & 4 deletions sentry_sdk/transport.py
Original file line number Diff line number Diff line change
@@ -24,15 +24,13 @@
from sentry_sdk.worker import BackgroundWorker
from sentry_sdk.envelope import Envelope, Item, PayloadRef

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, cast, List, Dict

if TYPE_CHECKING:
from typing import Any
from typing import Callable
from typing import Dict
from typing import DefaultDict
from typing import Iterable
from typing import List
from typing import Mapping
from typing import Optional
from typing import Self
@@ -280,7 +278,9 @@ def record_lost_event(
event = item.get_transaction_event() or {}

# +1 for the transaction itself
span_count = len(event.get("spans") or []) + 1
span_count = (
len(cast(List[Dict[str, object]], event.get("spans") or [])) + 1
)
self.record_lost_event(reason, "span", quantity=span_count)

elif data_category == "attachment":
81 changes: 1 addition & 80 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@
DEFAULT_MAX_VALUE_LENGTH,
EndpointType,
)
from sentry_sdk._types import Annotated, AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE

from typing import TYPE_CHECKING

@@ -73,8 +74,6 @@

BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$")

SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"

FALSY_ENV_VALUES = frozenset(("false", "f", "n", "no", "off", "0"))
TRUTHY_ENV_VALUES = frozenset(("true", "t", "y", "yes", "on", "1"))

@@ -404,84 +403,6 @@ def to_header(self):
return "Sentry " + ", ".join("%s=%s" % (key, value) for key, value in rv)


class AnnotatedValue:
"""
Meta information for a data field in the event payload.
This is to tell Relay that we have tampered with the fields value.
See:
https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423
"""

__slots__ = ("value", "metadata")

def __init__(self, value, metadata):
# type: (Optional[Any], Dict[str, Any]) -> None
self.value = value
self.metadata = metadata

def __eq__(self, other):
# type: (Any) -> bool
if not isinstance(other, AnnotatedValue):
return False

return self.value == other.value and self.metadata == other.metadata

@classmethod
def removed_because_raw_data(cls):
# type: () -> AnnotatedValue
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
return AnnotatedValue(
value="",
metadata={
"rem": [ # Remark
[
"!raw", # Unparsable raw data
"x", # The fields original value was removed
]
]
},
)

@classmethod
def removed_because_over_size_limit(cls):
# type: () -> AnnotatedValue
"""The actual value was removed because the size of the field exceeded the configured maximum size (specified with the max_request_body_size sdk option)"""
return AnnotatedValue(
value="",
metadata={
"rem": [ # Remark
[
"!config", # Because of configured maximum size
"x", # The fields original value was removed
]
]
},
)

@classmethod
def substituted_because_contains_sensitive_data(cls):
# type: () -> AnnotatedValue
"""The actual value was removed because it contained sensitive information."""
return AnnotatedValue(
value=SENSITIVE_DATA_SUBSTITUTE,
metadata={
"rem": [ # Remark
[
"!config", # Because of SDK configuration (in this case the config is the hard coded removal of certain django cookies)
"s", # The fields original value was substituted
]
]
},
)


if TYPE_CHECKING:
from typing import TypeVar

T = TypeVar("T")
Annotated = Union[AnnotatedValue, T]


def get_type_name(cls):
# type: (Optional[type]) -> Optional[str]
return getattr(cls, "__qualname__", None) or getattr(cls, "__name__", None)
Loading