9
9
from typing import Optional
10
10
11
11
from sentry_sdk ._types import Event , Hint
12
+ from pyspark import SparkContext
12
13
13
14
14
15
class SparkIntegration (Integration ):
@@ -17,7 +18,7 @@ class SparkIntegration(Integration):
17
18
@staticmethod
18
19
def setup_once ():
19
20
# type: () -> None
20
- patch_spark_context_init ()
21
+ _setup_sentry_tracing ()
21
22
22
23
23
24
def _set_app_properties ():
@@ -37,7 +38,7 @@ def _set_app_properties():
37
38
38
39
39
40
def _start_sentry_listener (sc ):
40
- # type: (Any ) -> None
41
+ # type: (SparkContext ) -> None
41
42
"""
42
43
Start java gateway server to add custom `SparkListener`
43
44
"""
@@ -49,7 +50,51 @@ def _start_sentry_listener(sc):
49
50
sc ._jsc .sc ().addSparkListener (listener )
50
51
51
52
52
- def patch_spark_context_init ():
53
+ def _add_event_processor (sc ):
54
+ # type: (SparkContext) -> None
55
+ scope = sentry_sdk .get_isolation_scope ()
56
+
57
+ @scope .add_event_processor
58
+ def process_event (event , hint ):
59
+ # type: (Event, Hint) -> Optional[Event]
60
+ with capture_internal_exceptions ():
61
+ if sentry_sdk .get_client ().get_integration (SparkIntegration ) is None :
62
+ return event
63
+
64
+ if sc ._active_spark_context is None :
65
+ return event
66
+
67
+ event .setdefault ("user" , {}).setdefault ("id" , sc .sparkUser ())
68
+
69
+ event .setdefault ("tags" , {}).setdefault (
70
+ "executor.id" , sc ._conf .get ("spark.executor.id" )
71
+ )
72
+ event ["tags" ].setdefault (
73
+ "spark-submit.deployMode" ,
74
+ sc ._conf .get ("spark.submit.deployMode" ),
75
+ )
76
+ event ["tags" ].setdefault ("driver.host" , sc ._conf .get ("spark.driver.host" ))
77
+ event ["tags" ].setdefault ("driver.port" , sc ._conf .get ("spark.driver.port" ))
78
+ event ["tags" ].setdefault ("spark_version" , sc .version )
79
+ event ["tags" ].setdefault ("app_name" , sc .appName )
80
+ event ["tags" ].setdefault ("application_id" , sc .applicationId )
81
+ event ["tags" ].setdefault ("master" , sc .master )
82
+ event ["tags" ].setdefault ("spark_home" , sc .sparkHome )
83
+
84
+ event .setdefault ("extra" , {}).setdefault ("web_url" , sc .uiWebUrl )
85
+
86
+ return event
87
+
88
+
89
+ def _activate_integration (sc ):
90
+ # type: (SparkContext) -> None
91
+
92
+ _start_sentry_listener (sc )
93
+ _set_app_properties ()
94
+ _add_event_processor (sc )
95
+
96
+
97
+ def _patch_spark_context_init ():
53
98
# type: () -> None
54
99
from pyspark import SparkContext
55
100
@@ -59,51 +104,22 @@ def patch_spark_context_init():
59
104
def _sentry_patched_spark_context_init (self , * args , ** kwargs ):
60
105
# type: (SparkContext, *Any, **Any) -> Optional[Any]
61
106
rv = spark_context_init (self , * args , ** kwargs )
62
- _start_sentry_listener (self )
63
- _set_app_properties ()
64
-
65
- scope = sentry_sdk .get_isolation_scope ()
66
-
67
- @scope .add_event_processor
68
- def process_event (event , hint ):
69
- # type: (Event, Hint) -> Optional[Event]
70
- with capture_internal_exceptions ():
71
- if sentry_sdk .get_client ().get_integration (SparkIntegration ) is None :
72
- return event
73
-
74
- if self ._active_spark_context is None :
75
- return event
76
-
77
- event .setdefault ("user" , {}).setdefault ("id" , self .sparkUser ())
78
-
79
- event .setdefault ("tags" , {}).setdefault (
80
- "executor.id" , self ._conf .get ("spark.executor.id" )
81
- )
82
- event ["tags" ].setdefault (
83
- "spark-submit.deployMode" ,
84
- self ._conf .get ("spark.submit.deployMode" ),
85
- )
86
- event ["tags" ].setdefault (
87
- "driver.host" , self ._conf .get ("spark.driver.host" )
88
- )
89
- event ["tags" ].setdefault (
90
- "driver.port" , self ._conf .get ("spark.driver.port" )
91
- )
92
- event ["tags" ].setdefault ("spark_version" , self .version )
93
- event ["tags" ].setdefault ("app_name" , self .appName )
94
- event ["tags" ].setdefault ("application_id" , self .applicationId )
95
- event ["tags" ].setdefault ("master" , self .master )
96
- event ["tags" ].setdefault ("spark_home" , self .sparkHome )
97
-
98
- event .setdefault ("extra" , {}).setdefault ("web_url" , self .uiWebUrl )
99
-
100
- return event
101
-
107
+ _activate_integration (self )
102
108
return rv
103
109
104
110
SparkContext ._do_init = _sentry_patched_spark_context_init
105
111
106
112
113
+ def _setup_sentry_tracing ():
114
+ # type: () -> None
115
+ from pyspark import SparkContext
116
+
117
+ if SparkContext ._active_spark_context is not None :
118
+ _activate_integration (SparkContext ._active_spark_context )
119
+ return
120
+ _patch_spark_context_init ()
121
+
122
+
107
123
class SparkListener :
108
124
def onApplicationEnd (self , applicationEnd ): # noqa: N802,N803
109
125
# type: (Any) -> None
@@ -208,10 +224,21 @@ class Java:
208
224
209
225
210
226
class SentryListener (SparkListener ):
227
+ def _add_breadcrumb (
228
+ self ,
229
+ level , # type: str
230
+ message , # type: str
231
+ data = None , # type: Optional[dict[str, Any]]
232
+ ):
233
+ # type: (...) -> None
234
+ sentry_sdk .get_global_scope ().add_breadcrumb (
235
+ level = level , message = message , data = data
236
+ )
237
+
211
238
def onJobStart (self , jobStart ): # noqa: N802,N803
212
239
# type: (Any) -> None
213
240
message = "Job {} Started" .format (jobStart .jobId ())
214
- sentry_sdk . add_breadcrumb (level = "info" , message = message )
241
+ self . _add_breadcrumb (level = "info" , message = message )
215
242
_set_app_properties ()
216
243
217
244
def onJobEnd (self , jobEnd ): # noqa: N802,N803
@@ -227,14 +254,14 @@ def onJobEnd(self, jobEnd): # noqa: N802,N803
227
254
level = "warning"
228
255
message = "Job {} Failed" .format (jobEnd .jobId ())
229
256
230
- sentry_sdk . add_breadcrumb (level = level , message = message , data = data )
257
+ self . _add_breadcrumb (level = level , message = message , data = data )
231
258
232
259
def onStageSubmitted (self , stageSubmitted ): # noqa: N802,N803
233
260
# type: (Any) -> None
234
261
stage_info = stageSubmitted .stageInfo ()
235
262
message = "Stage {} Submitted" .format (stage_info .stageId ())
236
263
data = {"attemptId" : stage_info .attemptId (), "name" : stage_info .name ()}
237
- sentry_sdk . add_breadcrumb (level = "info" , message = message , data = data )
264
+ self . _add_breadcrumb (level = "info" , message = message , data = data )
238
265
_set_app_properties ()
239
266
240
267
def onStageCompleted (self , stageCompleted ): # noqa: N802,N803
@@ -255,4 +282,4 @@ def onStageCompleted(self, stageCompleted): # noqa: N802,N803
255
282
message = "Stage {} Completed" .format (stage_info .stageId ())
256
283
level = "info"
257
284
258
- sentry_sdk . add_breadcrumb (level = level , message = message , data = data )
285
+ self . _add_breadcrumb (level = level , message = message , data = data )
0 commit comments