Skip to content

Commit 0d23b72

Browse files
feat(tracing): Backfill missing sample_rand on PropagationContext (#4038)
Whenever the `PropagationContext` continues an incoming trace (i.e. whenever the `trace_id` is set, rather than being randomly generated as for a new trace), check if the `sample_rand` is present and valid in the incoming DSC. If the `sample_rand` is missing, generate it deterministically based on the `trace_id` and backfill it into the DSC on the `PropagationContext`. When generating the backfilled `sample_rand`, we ensure the generated value is consistent with the incoming trace's sampling decision and sample rate, if both of these are present. Otherwise, we generate a new value in the range [0, 1). Additionally, we propagate the `sample_rand` to transactions generated with `continue_trace` (allowing the `sample_rand` to be propagated on outgoing traces), and also allow `sample_rand` to be used for making sampling decisions. Ref #3998 --------- Co-authored-by: Ivana Kellyer <[email protected]>
1 parent 5d26201 commit 0d23b72

16 files changed

+474
-87
lines changed

Diff for: sentry_sdk/scope.py

+13
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
logger,
4444
)
4545

46+
import typing
4647
from typing import TYPE_CHECKING
4748

4849
if TYPE_CHECKING:
@@ -1146,8 +1147,20 @@ def continue_trace(
11461147
"""
11471148
self.generate_propagation_context(environ_or_headers)
11481149

1150+
# When we generate the propagation context, the sample_rand value is set
1151+
# if missing or invalid (we use the original value if it's valid).
1152+
# We want the transaction to use the same sample_rand value. Due to duplicated
1153+
# propagation logic in the transaction, we pass it in to avoid recomputing it
1154+
# in the transaction.
1155+
# TYPE SAFETY: self.generate_propagation_context() ensures that self._propagation_context
1156+
# is not None.
1157+
sample_rand = typing.cast(
1158+
PropagationContext, self._propagation_context
1159+
)._sample_rand()
1160+
11491161
transaction = Transaction.continue_from_headers(
11501162
normalize_incoming_data(environ_or_headers),
1163+
_sample_rand=sample_rand,
11511164
op=op,
11521165
origin=origin,
11531166
name=name,

Diff for: sentry_sdk/tracing.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import uuid
2-
import random
32
import warnings
43
from datetime import datetime, timedelta, timezone
54
from enum import Enum
@@ -477,6 +476,8 @@ def continue_from_environ(
477476
def continue_from_headers(
478477
cls,
479478
headers, # type: Mapping[str, str]
479+
*,
480+
_sample_rand=None, # type: Optional[str]
480481
**kwargs, # type: Any
481482
):
482483
# type: (...) -> Transaction
@@ -485,6 +486,8 @@ def continue_from_headers(
485486
the ``sentry-trace`` and ``baggage`` headers).
486487
487488
:param headers: The dictionary with the HTTP headers to pull information from.
489+
:param _sample_rand: If provided, we override the sample_rand value from the
490+
incoming headers with this value. (internal use only)
488491
"""
489492
# TODO move this to the Transaction class
490493
if cls is Span:
@@ -495,7 +498,9 @@ def continue_from_headers(
495498

496499
# TODO-neel move away from this kwargs stuff, it's confusing and opaque
497500
# make more explicit
498-
baggage = Baggage.from_incoming_header(headers.get(BAGGAGE_HEADER_NAME))
501+
baggage = Baggage.from_incoming_header(
502+
headers.get(BAGGAGE_HEADER_NAME), _sample_rand=_sample_rand
503+
)
499504
kwargs.update({BAGGAGE_HEADER_NAME: baggage})
500505

501506
sentrytrace_kwargs = extract_sentrytrace_data(
@@ -779,6 +784,7 @@ class Transaction(Span):
779784
"_profile",
780785
"_continuous_profile",
781786
"_baggage",
787+
"_sample_rand",
782788
)
783789

784790
def __init__( # type: ignore[misc]
@@ -803,6 +809,14 @@ def __init__( # type: ignore[misc]
803809
self._continuous_profile = None # type: Optional[ContinuousProfile]
804810
self._baggage = baggage
805811

812+
baggage_sample_rand = (
813+
None if self._baggage is None else self._baggage._sample_rand()
814+
)
815+
if baggage_sample_rand is not None:
816+
self._sample_rand = baggage_sample_rand
817+
else:
818+
self._sample_rand = _generate_sample_rand(self.trace_id)
819+
806820
def __repr__(self):
807821
# type: () -> str
808822
return (
@@ -1173,10 +1187,10 @@ def _set_initial_sampling_decision(self, sampling_context):
11731187
self.sampled = False
11741188
return
11751189

1176-
# Now we roll the dice. random.random is inclusive of 0, but not of 1,
1190+
# Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1,
11771191
# so strict < is safe here. In case sample_rate is a boolean, cast it
11781192
# to a float (True becomes 1.0 and False becomes 0.0)
1179-
self.sampled = random.random() < self.sample_rate
1193+
self.sampled = self._sample_rand < self.sample_rate
11801194

11811195
if self.sampled:
11821196
logger.debug(
@@ -1333,6 +1347,7 @@ async def my_async_function():
13331347
Baggage,
13341348
EnvironHeaders,
13351349
extract_sentrytrace_data,
1350+
_generate_sample_rand,
13361351
has_tracing_enabled,
13371352
maybe_create_breadcrumbs_from_span,
13381353
)

Diff for: sentry_sdk/tracing_utils.py

+139-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import sys
66
from collections.abc import Mapping
77
from datetime import timedelta
8+
from decimal import ROUND_DOWN, Decimal
89
from functools import wraps
10+
from random import Random
911
from urllib.parse import quote, unquote
1012
import uuid
1113

@@ -19,6 +21,7 @@
1921
match_regex_list,
2022
qualname_from_function,
2123
to_string,
24+
try_convert,
2225
is_sentry_url,
2326
_is_external_source,
2427
_is_in_project_root,
@@ -45,6 +48,7 @@
4548
"[ \t]*$" # whitespace
4649
)
4750

51+
4852
# This is a normal base64 regex, modified to reflect that fact that we strip the
4953
# trailing = or == off
5054
base64_stripped = (
@@ -418,13 +422,17 @@ def from_incoming_data(cls, incoming_data):
418422
propagation_context = PropagationContext()
419423
propagation_context.update(sentrytrace_data)
420424

425+
if propagation_context is not None:
426+
propagation_context._fill_sample_rand()
427+
421428
return propagation_context
422429

423430
@property
424431
def trace_id(self):
425432
# type: () -> str
426433
"""The trace id of the Sentry trace."""
427434
if not self._trace_id:
435+
# New trace, don't fill in sample_rand
428436
self._trace_id = uuid.uuid4().hex
429437

430438
return self._trace_id
@@ -469,6 +477,68 @@ def __repr__(self):
469477
self.dynamic_sampling_context,
470478
)
471479

480+
def _fill_sample_rand(self):
481+
# type: () -> None
482+
"""
483+
Ensure that there is a valid sample_rand value in the dynamic_sampling_context.
484+
485+
If there is a valid sample_rand value in the dynamic_sampling_context, we keep it.
486+
Otherwise, we generate a sample_rand value according to the following:
487+
488+
- If we have a parent_sampled value and a sample_rate in the DSC, we compute
489+
a sample_rand value randomly in the range:
490+
- [0, sample_rate) if parent_sampled is True,
491+
- or, in the range [sample_rate, 1) if parent_sampled is False.
492+
493+
- If either parent_sampled or sample_rate is missing, we generate a random
494+
value in the range [0, 1).
495+
496+
The sample_rand is deterministically generated from the trace_id, if present.
497+
498+
This function does nothing if there is no dynamic_sampling_context.
499+
"""
500+
if self.dynamic_sampling_context is None:
501+
return
502+
503+
sample_rand = try_convert(
504+
Decimal, self.dynamic_sampling_context.get("sample_rand")
505+
)
506+
if sample_rand is not None and 0 <= sample_rand < 1:
507+
# sample_rand is present and valid, so don't overwrite it
508+
return
509+
510+
# Get the sample rate and compute the transformation that will map the random value
511+
# to the desired range: [0, 1), [0, sample_rate), or [sample_rate, 1).
512+
sample_rate = try_convert(
513+
float, self.dynamic_sampling_context.get("sample_rate")
514+
)
515+
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)
516+
517+
try:
518+
sample_rand = _generate_sample_rand(self.trace_id, interval=(lower, upper))
519+
except ValueError:
520+
# ValueError is raised if the interval is invalid, i.e. lower >= upper.
521+
# lower >= upper might happen if the incoming trace's sampled flag
522+
# and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True.
523+
# We cannot generate a sensible sample_rand value in this case.
524+
logger.debug(
525+
f"Could not backfill sample_rand, since parent_sampled={self.parent_sampled} "
526+
f"and sample_rate={sample_rate}."
527+
)
528+
return
529+
530+
self.dynamic_sampling_context["sample_rand"] = (
531+
f"{sample_rand:.6f}" # noqa: E231
532+
)
533+
534+
def _sample_rand(self):
535+
# type: () -> Optional[str]
536+
"""Convenience method to get the sample_rand value from the dynamic_sampling_context."""
537+
if self.dynamic_sampling_context is None:
538+
return None
539+
540+
return self.dynamic_sampling_context.get("sample_rand")
541+
472542

473543
class Baggage:
474544
"""
@@ -491,8 +561,13 @@ def __init__(
491561
self.mutable = mutable
492562

493563
@classmethod
494-
def from_incoming_header(cls, header):
495-
# type: (Optional[str]) -> Baggage
564+
def from_incoming_header(
565+
cls,
566+
header, # type: Optional[str]
567+
*,
568+
_sample_rand=None, # type: Optional[str]
569+
):
570+
# type: (...) -> Baggage
496571
"""
497572
freeze if incoming header already has sentry baggage
498573
"""
@@ -515,6 +590,10 @@ def from_incoming_header(cls, header):
515590
else:
516591
third_party_items += ("," if third_party_items else "") + item
517592

593+
if _sample_rand is not None:
594+
sentry_items["sample_rand"] = str(_sample_rand)
595+
mutable = False
596+
518597
return Baggage(sentry_items, third_party_items, mutable)
519598

520599
@classmethod
@@ -566,6 +645,7 @@ def populate_from_transaction(cls, transaction):
566645
options = client.options or {}
567646

568647
sentry_items["trace_id"] = transaction.trace_id
648+
sentry_items["sample_rand"] = str(transaction._sample_rand)
569649

570650
if options.get("environment"):
571651
sentry_items["environment"] = options["environment"]
@@ -638,6 +718,20 @@ def strip_sentry_baggage(header):
638718
)
639719
)
640720

721+
def _sample_rand(self):
722+
# type: () -> Optional[Decimal]
723+
"""Convenience method to get the sample_rand value from the sentry_items.
724+
725+
We validate the value and parse it as a Decimal before returning it. The value is considered
726+
valid if it is a Decimal in the range [0, 1).
727+
"""
728+
sample_rand = try_convert(Decimal, self.sentry_items.get("sample_rand"))
729+
730+
if sample_rand is not None and Decimal(0) <= sample_rand < Decimal(1):
731+
return sample_rand
732+
733+
return None
734+
641735
def __repr__(self):
642736
# type: () -> str
643737
return f'<Baggage "{self.serialize(include_third_party=True)}", mutable={self.mutable}>'
@@ -748,6 +842,49 @@ def get_current_span(scope=None):
748842
return current_span
749843

750844

845+
def _generate_sample_rand(
846+
trace_id, # type: Optional[str]
847+
*,
848+
interval=(0.0, 1.0), # type: tuple[float, float]
849+
):
850+
# type: (...) -> Decimal
851+
"""Generate a sample_rand value from a trace ID.
852+
853+
The generated value will be pseudorandomly chosen from the provided
854+
interval. Specifically, given (lower, upper) = interval, the generated
855+
value will be in the range [lower, upper). The value has 6-digit precision,
856+
so when printing with .6f, the value will never be rounded up.
857+
858+
The pseudorandom number generator is seeded with the trace ID.
859+
"""
860+
lower, upper = interval
861+
if not lower < upper: # using `if lower >= upper` would handle NaNs incorrectly
862+
raise ValueError("Invalid interval: lower must be less than upper")
863+
864+
rng = Random(trace_id)
865+
sample_rand = upper
866+
while sample_rand >= upper:
867+
sample_rand = rng.uniform(lower, upper)
868+
869+
# Round down to exactly six decimal-digit precision.
870+
return Decimal(sample_rand).quantize(Decimal("0.000001"), rounding=ROUND_DOWN)
871+
872+
873+
def _sample_rand_range(parent_sampled, sample_rate):
874+
# type: (Optional[bool], Optional[float]) -> tuple[float, float]
875+
"""
876+
Compute the lower (inclusive) and upper (exclusive) bounds of the range of values
877+
that a generated sample_rand value must fall into, given the parent_sampled and
878+
sample_rate values.
879+
"""
880+
if parent_sampled is None or sample_rate is None:
881+
return 0.0, 1.0
882+
elif parent_sampled is True:
883+
return 0.0, sample_rate
884+
else: # parent_sampled is False
885+
return sample_rate, 1.0
886+
887+
751888
# Circular imports
752889
from sentry_sdk.tracing import (
753890
BAGGAGE_HEADER_NAME,

Diff for: sentry_sdk/utils.py

+17
Original file line numberDiff line numberDiff line change
@@ -1888,3 +1888,20 @@ def should_be_treated_as_error(ty, value):
18881888
return False
18891889

18901890
return True
1891+
1892+
1893+
if TYPE_CHECKING:
1894+
T = TypeVar("T")
1895+
1896+
1897+
def try_convert(convert_func, value):
1898+
# type: (Callable[[Any], T], Any) -> Optional[T]
1899+
"""
1900+
Attempt to convert from an unknown type to a specific type, using the
1901+
given function. Return None if the conversion fails, i.e. if the function
1902+
raises an exception.
1903+
"""
1904+
try:
1905+
return convert_func(value)
1906+
except Exception:
1907+
return None

Diff for: tests/integrations/aiohttp/test_aiohttp.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -626,18 +626,19 @@ async def handler(request):
626626

627627
raw_server = await aiohttp_raw_server(handler)
628628

629-
with start_transaction(
630-
name="/interactions/other-dogs/new-dog",
631-
op="greeting.sniff",
632-
trace_id="0123456789012345678901234567890",
633-
):
634-
client = await aiohttp_client(raw_server)
635-
resp = await client.get("/", headers={"bagGage": "custom=value"})
636-
637-
assert (
638-
resp.request_info.headers["baggage"]
639-
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
640-
)
629+
with mock.patch("sentry_sdk.tracing_utils.Random.uniform", return_value=0.5):
630+
with start_transaction(
631+
name="/interactions/other-dogs/new-dog",
632+
op="greeting.sniff",
633+
trace_id="0123456789012345678901234567890",
634+
):
635+
client = await aiohttp_client(raw_server)
636+
resp = await client.get("/", headers={"bagGage": "custom=value"})
637+
638+
assert (
639+
resp.request_info.headers["baggage"]
640+
== "custom=value,sentry-trace_id=0123456789012345678901234567890,sentry-sample_rand=0.500000,sentry-environment=production,sentry-release=d08ebdb9309e1b004c6f52202de58a09c2268e42,sentry-transaction=/interactions/other-dogs/new-dog,sentry-sample_rate=1.0,sentry-sampled=true"
641+
)
641642

642643

643644
@pytest.mark.asyncio

0 commit comments

Comments
 (0)