Skip to content

Commit 2b067e9

Browse files
authored
feat(profiling): Continuous profiling lifecycle (#4017)
This introduces auto lifecycle setting for continuous profiling to only profile while there is an active transaction. This replaces the experimental auto start setting.
1 parent 7a1c010 commit 2b067e9

File tree

6 files changed

+347
-43
lines changed

6 files changed

+347
-43
lines changed

Diff for: sentry_sdk/consts.py

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class CompressionAlgo(Enum):
3838
from typing import Any
3939
from typing import Sequence
4040
from typing import Tuple
41+
from typing_extensions import Literal
4142
from typing_extensions import TypedDict
4243

4344
from sentry_sdk._types import (
@@ -528,6 +529,7 @@ def __init__(
528529
profiles_sample_rate=None, # type: Optional[float]
529530
profiles_sampler=None, # type: Optional[TracesSampler]
530531
profiler_mode=None, # type: Optional[ProfilerMode]
532+
profile_lifecycle="manual", # type: Literal["manual", "trace"]
531533
profile_session_sample_rate=None, # type: Optional[float]
532534
auto_enabling_integrations=True, # type: bool
533535
disabled_integrations=None, # type: Optional[Sequence[sentry_sdk.integrations.Integration]]

Diff for: sentry_sdk/profiler/continuous_profiler.py

+146-26
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import threading
66
import time
77
import uuid
8+
from collections import deque
89
from datetime import datetime, timezone
910

1011
from sentry_sdk.consts import VERSION
@@ -27,9 +28,11 @@
2728
if TYPE_CHECKING:
2829
from typing import Any
2930
from typing import Callable
31+
from typing import Deque
3032
from typing import Dict
3133
from typing import List
3234
from typing import Optional
35+
from typing import Set
3336
from typing import Type
3437
from typing import Union
3538
from typing_extensions import TypedDict
@@ -120,6 +123,9 @@ def setup_continuous_profiler(options, sdk_info, capture_func):
120123

121124
def try_autostart_continuous_profiler():
122125
# type: () -> None
126+
127+
# TODO: deprecate this as it'll be replaced by the auto lifecycle option
128+
123129
if _scheduler is None:
124130
return
125131

@@ -129,6 +135,14 @@ def try_autostart_continuous_profiler():
129135
_scheduler.manual_start()
130136

131137

138+
def try_profile_lifecycle_trace_start():
139+
# type: () -> Union[ContinuousProfile, None]
140+
if _scheduler is None:
141+
return None
142+
143+
return _scheduler.auto_start()
144+
145+
132146
def start_profiler():
133147
# type: () -> None
134148
if _scheduler is None:
@@ -170,6 +184,14 @@ def determine_profile_session_sampling_decision(sample_rate):
170184
return random.random() < float(sample_rate)
171185

172186

187+
class ContinuousProfile:
188+
active: bool = True
189+
190+
def stop(self):
191+
# type: () -> None
192+
self.active = False
193+
194+
173195
class ContinuousScheduler:
174196
mode = "unknown" # type: ContinuousProfilerMode
175197

@@ -179,16 +201,21 @@ def __init__(self, frequency, options, sdk_info, capture_func):
179201
self.options = options
180202
self.sdk_info = sdk_info
181203
self.capture_func = capture_func
204+
205+
self.lifecycle = self.options.get("profile_lifecycle")
206+
profile_session_sample_rate = self.options.get("profile_session_sample_rate")
207+
self.sampled = determine_profile_session_sampling_decision(
208+
profile_session_sample_rate
209+
)
210+
182211
self.sampler = self.make_sampler()
183212
self.buffer = None # type: Optional[ProfileBuffer]
184213
self.pid = None # type: Optional[int]
185214

186215
self.running = False
187216

188-
profile_session_sample_rate = self.options.get("profile_session_sample_rate")
189-
self.sampled = determine_profile_session_sampling_decision(
190-
profile_session_sample_rate
191-
)
217+
self.new_profiles = deque(maxlen=128) # type: Deque[ContinuousProfile]
218+
self.active_profiles = set() # type: Set[ContinuousProfile]
192219

193220
def is_auto_start_enabled(self):
194221
# type: () -> bool
@@ -207,15 +234,38 @@ def is_auto_start_enabled(self):
207234

208235
return experiments.get("continuous_profiling_auto_start")
209236

237+
def auto_start(self):
238+
# type: () -> Union[ContinuousProfile, None]
239+
if not self.sampled:
240+
return None
241+
242+
if self.lifecycle != "trace":
243+
return None
244+
245+
logger.debug("[Profiling] Auto starting profiler")
246+
247+
profile = ContinuousProfile()
248+
249+
self.new_profiles.append(profile)
250+
self.ensure_running()
251+
252+
return profile
253+
210254
def manual_start(self):
211255
# type: () -> None
212256
if not self.sampled:
213257
return
214258

259+
if self.lifecycle != "manual":
260+
return
261+
215262
self.ensure_running()
216263

217264
def manual_stop(self):
218265
# type: () -> None
266+
if self.lifecycle != "manual":
267+
return
268+
219269
self.teardown()
220270

221271
def ensure_running(self):
@@ -249,28 +299,97 @@ def make_sampler(self):
249299

250300
cache = LRUCache(max_size=256)
251301

252-
def _sample_stack(*args, **kwargs):
253-
# type: (*Any, **Any) -> None
254-
"""
255-
Take a sample of the stack on all the threads in the process.
256-
This should be called at a regular interval to collect samples.
257-
"""
258-
259-
ts = now()
260-
261-
try:
262-
sample = [
263-
(str(tid), extract_stack(frame, cache, cwd))
264-
for tid, frame in sys._current_frames().items()
265-
]
266-
except AttributeError:
267-
# For some reason, the frame we get doesn't have certain attributes.
268-
# When this happens, we abandon the current sample as it's bad.
269-
capture_internal_exception(sys.exc_info())
270-
return
271-
272-
if self.buffer is not None:
273-
self.buffer.write(ts, sample)
302+
if self.lifecycle == "trace":
303+
304+
def _sample_stack(*args, **kwargs):
305+
# type: (*Any, **Any) -> None
306+
"""
307+
Take a sample of the stack on all the threads in the process.
308+
This should be called at a regular interval to collect samples.
309+
"""
310+
311+
# no profiles taking place, so we can stop early
312+
if not self.new_profiles and not self.active_profiles:
313+
self.running = False
314+
return
315+
316+
# This is the number of profiles we want to pop off.
317+
# It's possible another thread adds a new profile to
318+
# the list and we spend longer than we want inside
319+
# the loop below.
320+
#
321+
# Also make sure to set this value before extracting
322+
# frames so we do not write to any new profiles that
323+
# were started after this point.
324+
new_profiles = len(self.new_profiles)
325+
326+
ts = now()
327+
328+
try:
329+
sample = [
330+
(str(tid), extract_stack(frame, cache, cwd))
331+
for tid, frame in sys._current_frames().items()
332+
]
333+
except AttributeError:
334+
# For some reason, the frame we get doesn't have certain attributes.
335+
# When this happens, we abandon the current sample as it's bad.
336+
capture_internal_exception(sys.exc_info())
337+
return
338+
339+
# Move the new profiles into the active_profiles set.
340+
#
341+
# We cannot directly add the to active_profiles set
342+
# in `start_profiling` because it is called from other
343+
# threads which can cause a RuntimeError when it the
344+
# set sizes changes during iteration without a lock.
345+
#
346+
# We also want to avoid using a lock here so threads
347+
# that are starting profiles are not blocked until it
348+
# can acquire the lock.
349+
for _ in range(new_profiles):
350+
self.active_profiles.add(self.new_profiles.popleft())
351+
inactive_profiles = []
352+
353+
for profile in self.active_profiles:
354+
if profile.active:
355+
pass
356+
else:
357+
# If a profile is marked inactive, we buffer it
358+
# to `inactive_profiles` so it can be removed.
359+
# We cannot remove it here as it would result
360+
# in a RuntimeError.
361+
inactive_profiles.append(profile)
362+
363+
for profile in inactive_profiles:
364+
self.active_profiles.remove(profile)
365+
366+
if self.buffer is not None:
367+
self.buffer.write(ts, sample)
368+
369+
else:
370+
371+
def _sample_stack(*args, **kwargs):
372+
# type: (*Any, **Any) -> None
373+
"""
374+
Take a sample of the stack on all the threads in the process.
375+
This should be called at a regular interval to collect samples.
376+
"""
377+
378+
ts = now()
379+
380+
try:
381+
sample = [
382+
(str(tid), extract_stack(frame, cache, cwd))
383+
for tid, frame in sys._current_frames().items()
384+
]
385+
except AttributeError:
386+
# For some reason, the frame we get doesn't have certain attributes.
387+
# When this happens, we abandon the current sample as it's bad.
388+
capture_internal_exception(sys.exc_info())
389+
return
390+
391+
if self.buffer is not None:
392+
self.buffer.write(ts, sample)
274393

275394
return _sample_stack
276395

@@ -294,6 +413,7 @@ def run(self):
294413

295414
if self.buffer is not None:
296415
self.buffer.flush()
416+
self.buffer = None
297417

298418

299419
class ThreadContinuousScheduler(ContinuousScheduler):

Diff for: sentry_sdk/profiler/transaction_profiler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ def _sample_stack(*args, **kwargs):
644644
if profile.active:
645645
profile.write(now, sample)
646646
else:
647-
# If a thread is marked inactive, we buffer it
647+
# If a profile is marked inactive, we buffer it
648648
# to `inactive_profiles` so it can be removed.
649649
# We cannot remove it here as it would result
650650
# in a RuntimeError.

Diff for: sentry_sdk/scope.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
from sentry_sdk.attachments import Attachment
1313
from sentry_sdk.consts import DEFAULT_MAX_BREADCRUMBS, FALSE_VALUES, INSTRUMENTER
1414
from sentry_sdk.feature_flags import FlagBuffer, DEFAULT_FLAG_CAPACITY
15-
from sentry_sdk.profiler.continuous_profiler import try_autostart_continuous_profiler
15+
from sentry_sdk.profiler.continuous_profiler import (
16+
get_profiler_id,
17+
try_autostart_continuous_profiler,
18+
try_profile_lifecycle_trace_start,
19+
)
1620
from sentry_sdk.profiler.transaction_profiler import Profile
1721
from sentry_sdk.session import Session
1822
from sentry_sdk.tracing_utils import (
@@ -1063,6 +1067,14 @@ def start_transaction(
10631067

10641068
transaction._profile = profile
10651069

1070+
transaction._continuous_profile = try_profile_lifecycle_trace_start()
1071+
1072+
# Typically, the profiler is set when the transaction is created. But when
1073+
# using the auto lifecycle, the profiler isn't running when the first
1074+
# transaction is started. So make sure we update the profiler id on it.
1075+
if transaction._continuous_profile is not None:
1076+
transaction.set_profiler_id(get_profiler_id())
1077+
10661078
# we don't bother to keep spans if we already know we're not going to
10671079
# send the transaction
10681080
max_spans = (client.options["_experiments"].get("max_spans")) or 1000

Diff for: sentry_sdk/tracing.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
P = ParamSpec("P")
3535
R = TypeVar("R")
3636

37-
import sentry_sdk.profiler
37+
from sentry_sdk.profiler.continuous_profiler import ContinuousProfile
38+
from sentry_sdk.profiler.transaction_profiler import Profile
3839
from sentry_sdk._types import (
3940
Event,
4041
MeasurementUnit,
@@ -767,6 +768,7 @@ class Transaction(Span):
767768
"_measurements",
768769
"_contexts",
769770
"_profile",
771+
"_continuous_profile",
770772
"_baggage",
771773
)
772774

@@ -788,9 +790,8 @@ def __init__( # type: ignore[misc]
788790
self.parent_sampled = parent_sampled
789791
self._measurements = {} # type: Dict[str, MeasurementValue]
790792
self._contexts = {} # type: Dict[str, Any]
791-
self._profile = (
792-
None
793-
) # type: Optional[sentry_sdk.profiler.transaction_profiler.Profile]
793+
self._profile = None # type: Optional[Profile]
794+
self._continuous_profile = None # type: Optional[ContinuousProfile]
794795
self._baggage = baggage
795796

796797
def __repr__(self):
@@ -843,6 +844,9 @@ def __exit__(self, ty, value, tb):
843844
if self._profile is not None:
844845
self._profile.__exit__(ty, value, tb)
845846

847+
if self._continuous_profile is not None:
848+
self._continuous_profile.stop()
849+
846850
super().__exit__(ty, value, tb)
847851

848852
@property

0 commit comments

Comments
 (0)