Skip to content

Commit 48d7767

Browse files
authored
feat(profiling): Add thread data to spans (#2843)
As per getsentry/rfc#75, this adds the thread data to the spans. This will be needed for the continuous profiling mode in #2830.
1 parent 500e087 commit 48d7767

26 files changed

+599
-367
lines changed

sentry_sdk/consts.py

+12
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,18 @@ class SPANDATA:
191191
Example: "http.handler"
192192
"""
193193

194+
THREAD_ID = "thread.id"
195+
"""
196+
Identifier of a thread from where the span originated. This should be a string.
197+
Example: "7972576320"
198+
"""
199+
200+
THREAD_NAME = "thread.name"
201+
"""
202+
Label identifying a thread from where the span originated. This should be a string.
203+
Example: "MainThread"
204+
"""
205+
194206

195207
class OP:
196208
CACHE_GET_ITEM = "cache.get_item"

sentry_sdk/profiler.py

+5-65
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
from sentry_sdk.utils import (
4343
capture_internal_exception,
4444
filename_for_module,
45+
get_current_thread_meta,
46+
is_gevent,
4547
is_valid_sample_rate,
4648
logger,
4749
nanosecond_time,
@@ -126,32 +128,16 @@
126128

127129

128130
try:
129-
from gevent import get_hub as get_gevent_hub # type: ignore
130-
from gevent.monkey import get_original, is_module_patched # type: ignore
131+
from gevent.monkey import get_original # type: ignore
131132
from gevent.threadpool import ThreadPool # type: ignore
132133

133134
thread_sleep = get_original("time", "sleep")
134135
except ImportError:
135-
136-
def get_gevent_hub():
137-
# type: () -> Any
138-
return None
139-
140136
thread_sleep = time.sleep
141137

142-
def is_module_patched(*args, **kwargs):
143-
# type: (*Any, **Any) -> bool
144-
# unable to import from gevent means no modules have been patched
145-
return False
146-
147138
ThreadPool = None
148139

149140

150-
def is_gevent():
151-
# type: () -> bool
152-
return is_module_patched("threading") or is_module_patched("_thread")
153-
154-
155141
_scheduler = None # type: Optional[Scheduler]
156142

157143
# The default sampling frequency to use. This is set at 101 in order to
@@ -389,52 +375,6 @@ def get_frame_name(frame):
389375
MAX_PROFILE_DURATION_NS = int(3e10) # 30 seconds
390376

391377

392-
def get_current_thread_id(thread=None):
393-
# type: (Optional[threading.Thread]) -> Optional[int]
394-
"""
395-
Try to get the id of the current thread, with various fall backs.
396-
"""
397-
398-
# if a thread is specified, that takes priority
399-
if thread is not None:
400-
try:
401-
thread_id = thread.ident
402-
if thread_id is not None:
403-
return thread_id
404-
except AttributeError:
405-
pass
406-
407-
# if the app is using gevent, we should look at the gevent hub first
408-
# as the id there differs from what the threading module reports
409-
if is_gevent():
410-
gevent_hub = get_gevent_hub()
411-
if gevent_hub is not None:
412-
try:
413-
# this is undocumented, so wrap it in try except to be safe
414-
return gevent_hub.thread_ident
415-
except AttributeError:
416-
pass
417-
418-
# use the current thread's id if possible
419-
try:
420-
current_thread_id = threading.current_thread().ident
421-
if current_thread_id is not None:
422-
return current_thread_id
423-
except AttributeError:
424-
pass
425-
426-
# if we can't get the current thread id, fall back to the main thread id
427-
try:
428-
main_thread_id = threading.main_thread().ident
429-
if main_thread_id is not None:
430-
return main_thread_id
431-
except AttributeError:
432-
pass
433-
434-
# we've tried everything, time to give up
435-
return None
436-
437-
438378
class Profile(object):
439379
def __init__(
440380
self,
@@ -456,7 +396,7 @@ def __init__(
456396

457397
# Various framework integrations are capable of overwriting the active thread id.
458398
# If it is set to `None` at the end of the profile, we fall back to the default.
459-
self._default_active_thread_id = get_current_thread_id() or 0 # type: int
399+
self._default_active_thread_id = get_current_thread_meta()[0] or 0 # type: int
460400
self.active_thread_id = None # type: Optional[int]
461401

462402
try:
@@ -479,7 +419,7 @@ def __init__(
479419

480420
def update_active_thread_id(self):
481421
# type: () -> None
482-
self.active_thread_id = get_current_thread_id()
422+
self.active_thread_id = get_current_thread_meta()[0]
483423
logger.debug(
484424
"[Profiling] updating active thread id to {tid}".format(
485425
tid=self.active_thread_id

sentry_sdk/tracing.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55

66
import sentry_sdk
77
from sentry_sdk.consts import INSTRUMENTER
8-
from sentry_sdk.utils import is_valid_sample_rate, logger, nanosecond_time
8+
from sentry_sdk.utils import (
9+
get_current_thread_meta,
10+
is_valid_sample_rate,
11+
logger,
12+
nanosecond_time,
13+
)
914
from sentry_sdk._compat import datetime_utcnow, utc_from_timestamp, PY2
1015
from sentry_sdk.consts import SPANDATA
1116
from sentry_sdk._types import TYPE_CHECKING
@@ -172,6 +177,9 @@ def __init__(
172177
self._span_recorder = None # type: Optional[_SpanRecorder]
173178
self._local_aggregator = None # type: Optional[LocalAggregator]
174179

180+
thread_id, thread_name = get_current_thread_meta()
181+
self.set_thread(thread_id, thread_name)
182+
175183
# TODO this should really live on the Transaction class rather than the Span
176184
# class
177185
def init_span_recorder(self, maxlen):
@@ -418,6 +426,15 @@ def set_status(self, value):
418426
# type: (str) -> None
419427
self.status = value
420428

429+
def set_thread(self, thread_id, thread_name):
430+
# type: (Optional[int], Optional[str]) -> None
431+
432+
if thread_id is not None:
433+
self.set_data(SPANDATA.THREAD_ID, str(thread_id))
434+
435+
if thread_name is not None:
436+
self.set_data(SPANDATA.THREAD_NAME, thread_name)
437+
421438
def set_http_status(self, http_status):
422439
# type: (int) -> None
423440
self.set_tag(

sentry_sdk/utils.py

+56
Original file line numberDiff line numberDiff line change
@@ -1746,9 +1746,14 @@ def now():
17461746

17471747

17481748
try:
1749+
from gevent import get_hub as get_gevent_hub
17491750
from gevent.monkey import is_module_patched
17501751
except ImportError:
17511752

1753+
def get_gevent_hub():
1754+
# type: () -> Any
1755+
return None
1756+
17521757
def is_module_patched(*args, **kwargs):
17531758
# type: (*Any, **Any) -> bool
17541759
# unable to import from gevent means no modules have been patched
@@ -1758,3 +1763,54 @@ def is_module_patched(*args, **kwargs):
17581763
def is_gevent():
17591764
# type: () -> bool
17601765
return is_module_patched("threading") or is_module_patched("_thread")
1766+
1767+
1768+
def get_current_thread_meta(thread=None):
1769+
# type: (Optional[threading.Thread]) -> Tuple[Optional[int], Optional[str]]
1770+
"""
1771+
Try to get the id of the current thread, with various fall backs.
1772+
"""
1773+
1774+
# if a thread is specified, that takes priority
1775+
if thread is not None:
1776+
try:
1777+
thread_id = thread.ident
1778+
thread_name = thread.name
1779+
if thread_id is not None:
1780+
return thread_id, thread_name
1781+
except AttributeError:
1782+
pass
1783+
1784+
# if the app is using gevent, we should look at the gevent hub first
1785+
# as the id there differs from what the threading module reports
1786+
if is_gevent():
1787+
gevent_hub = get_gevent_hub()
1788+
if gevent_hub is not None:
1789+
try:
1790+
# this is undocumented, so wrap it in try except to be safe
1791+
return gevent_hub.thread_ident, None
1792+
except AttributeError:
1793+
pass
1794+
1795+
# use the current thread's id if possible
1796+
try:
1797+
thread = threading.current_thread()
1798+
thread_id = thread.ident
1799+
thread_name = thread.name
1800+
if thread_id is not None:
1801+
return thread_id, thread_name
1802+
except AttributeError:
1803+
pass
1804+
1805+
# if we can't get the current thread id, fall back to the main thread id
1806+
try:
1807+
thread = threading.main_thread()
1808+
thread_id = thread.ident
1809+
thread_name = thread.name
1810+
if thread_id is not None:
1811+
return thread_id, thread_name
1812+
except AttributeError:
1813+
pass
1814+
1815+
# we've tried everything, time to give up
1816+
return None, None

tests/conftest.py

+12
Original file line numberDiff line numberDiff line change
@@ -652,3 +652,15 @@ def patch_start_tracing_child(fake_transaction_is_none=False):
652652
return_value=fake_transaction,
653653
):
654654
yield fake_start_child
655+
656+
657+
class ApproxDict(dict):
658+
def __eq__(self, other):
659+
# For an ApproxDict to equal another dict, the other dict just needs to contain
660+
# all the keys from the ApproxDict with the same values.
661+
#
662+
# The other dict may contain additional keys with any value.
663+
return all(key in other and other[key] == value for key, value in self.items())
664+
665+
def __ne__(self, other):
666+
return not self.__eq__(other)

tests/integrations/aiohttp/test_aiohttp.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from sentry_sdk import capture_message, start_transaction
1111
from sentry_sdk.integrations.aiohttp import AioHttpIntegration
12+
from tests.conftest import ApproxDict
1213

1314
try:
1415
from unittest import mock # python 3.3 and above
@@ -495,15 +496,17 @@ async def handler(request):
495496
crumb = event["breadcrumbs"]["values"][0]
496497
assert crumb["type"] == "http"
497498
assert crumb["category"] == "httplib"
498-
assert crumb["data"] == {
499-
"url": "http://127.0.0.1:{}/".format(raw_server.port),
500-
"http.fragment": "",
501-
"http.method": "GET",
502-
"http.query": "",
503-
"http.response.status_code": 200,
504-
"reason": "OK",
505-
"extra": "foo",
506-
}
499+
assert crumb["data"] == ApproxDict(
500+
{
501+
"url": "http://127.0.0.1:{}/".format(raw_server.port),
502+
"http.fragment": "",
503+
"http.method": "GET",
504+
"http.query": "",
505+
"http.response.status_code": 200,
506+
"reason": "OK",
507+
"extra": "foo",
508+
}
509+
)
507510

508511

509512
@pytest.mark.asyncio

tests/integrations/asyncpg/test_asyncpg.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from sentry_sdk.consts import SPANDATA
3535
from sentry_sdk.tracing_utils import record_sql_queries
3636
from sentry_sdk._compat import contextmanager
37+
from tests.conftest import ApproxDict
3738

3839
try:
3940
from unittest import mock
@@ -46,13 +47,15 @@
4647
)
4748
CRUMBS_CONNECT = {
4849
"category": "query",
49-
"data": {
50-
"db.name": PG_NAME,
51-
"db.system": "postgresql",
52-
"db.user": PG_USER,
53-
"server.address": PG_HOST,
54-
"server.port": PG_PORT,
55-
},
50+
"data": ApproxDict(
51+
{
52+
"db.name": PG_NAME,
53+
"db.system": "postgresql",
54+
"db.user": PG_USER,
55+
"server.address": PG_HOST,
56+
"server.port": PG_PORT,
57+
}
58+
),
5659
"message": "connect",
5760
"type": "default",
5861
}

tests/integrations/boto3/test_s3.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from sentry_sdk import Hub
66
from sentry_sdk.integrations.boto3 import Boto3Integration
7+
from tests.conftest import ApproxDict
78
from tests.integrations.boto3.aws_mock import MockResponse
89
from tests.integrations.boto3 import read_fixture
910

@@ -65,12 +66,14 @@ def test_streaming(sentry_init, capture_events):
6566
span1 = event["spans"][0]
6667
assert span1["op"] == "http.client"
6768
assert span1["description"] == "aws.s3.GetObject"
68-
assert span1["data"] == {
69-
"http.method": "GET",
70-
"aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf",
71-
"http.fragment": "",
72-
"http.query": "",
73-
}
69+
assert span1["data"] == ApproxDict(
70+
{
71+
"http.method": "GET",
72+
"aws.request.url": "https://bucket.s3.amazonaws.com/foo.pdf",
73+
"http.fragment": "",
74+
"http.query": "",
75+
}
76+
)
7477

7578
span2 = event["spans"][1]
7679
assert span2["op"] == "http.client.stream"
@@ -123,7 +126,13 @@ def test_omit_url_data_if_parsing_fails(sentry_init, capture_events):
123126
transaction.finish()
124127

125128
(event,) = events
126-
assert event["spans"][0]["data"] == {
127-
"http.method": "GET",
128-
# no url data
129-
}
129+
assert event["spans"][0]["data"] == ApproxDict(
130+
{
131+
"http.method": "GET",
132+
# no url data
133+
}
134+
)
135+
136+
assert "aws.request.url" not in event["spans"][0]["data"]
137+
assert "http.fragment" not in event["spans"][0]["data"]
138+
assert "http.query" not in event["spans"][0]["data"]

tests/integrations/celery/test_celery.py

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111

1212
from sentry_sdk._compat import text_type
13+
from tests.conftest import ApproxDict
1314

1415
from celery import Celery, VERSION
1516
from celery.bin import worker
@@ -218,6 +219,7 @@ def dummy_task(x, y):
218219
assert execution_event["spans"] == []
219220
assert submission_event["spans"] == [
220221
{
222+
"data": ApproxDict(),
221223
"description": "dummy_task",
222224
"op": "queue.submit.celery",
223225
"parent_span_id": submission_event["contexts"]["trace"]["span_id"],

0 commit comments

Comments
 (0)