5
5
# We have to use the default JSON interface to enable pretty-printing on export. When loading JSON,
6
6
# we still use the one from `sentry.utils`, imported as `sentry_json` below.
7
7
import json as builtin_json # noqa: S003
8
+ from abc import ABC , abstractmethod
8
9
from typing import IO
9
10
10
11
import orjson
11
12
12
- from sentry .backup .crypto import Encryptor , create_encrypted_export_tarball
13
+ from sentry .backup .crypto import Encryptor , EncryptorDecryptorPair , create_encrypted_export_tarball
13
14
from sentry .backup .dependencies import (
15
+ ImportKind ,
16
+ NormalizedModelName ,
14
17
PrimaryKeyMap ,
15
18
dependencies ,
16
19
get_model_name ,
20
23
from sentry .backup .scopes import ExportScope
21
24
from sentry .backup .services .import_export .model import (
22
25
RpcExportError ,
26
+ RpcExportOk ,
23
27
RpcExportScope ,
24
28
RpcFilter ,
25
29
RpcPrimaryKeyMap ,
@@ -41,6 +45,69 @@ def __init__(self, context: RpcExportError) -> None:
41
45
self .context = context
42
46
43
47
48
+ class ExportCheckpointerError (Exception ):
49
+ pass
50
+
51
+
52
+ class ExportCheckpointer (ABC ):
53
+ """
54
+ For very large exports, the exporting environment may fall over half-way through the process:
55
+ the thread running it may hit some timeout, or it may OOM, or fail for some other ephemeral
56
+ reason. To help in such situations, we'd like an API for saving "checkpoints" during the export.
57
+
58
+ This class provides per-model checkpointing support for exports. Since there is a topologically
59
+ sorted order of models being exported, as we move through this list, we can save the exported
60
+ JSON for each kind of model in order to some stable media (disk, GCP, etc). If there is a
61
+ failure late in the export process, when it is retried, the exporter can check if that
62
+ particular model already exists in the checkpointer's cache, thereby avoiding redoing the work
63
+ of pulling the models from the database, processing them, etc. This ensures that in most retry
64
+ situations, we can quickly "re-ingest" already-exported models in memory and pick up where we
65
+ left off.
66
+ """
67
+
68
+ def _parse_cached_json (self , json_data : bytes ) -> RpcExportOk | None :
69
+ max_pk = 0
70
+ pk_map = PrimaryKeyMap ()
71
+ models = orjson .loads (json_data )
72
+ for model in models :
73
+ model_name = model .get ("model" , None )
74
+ pk = model .get ("pk" , None )
75
+ if model_name is None or pk is None :
76
+ raise ExportCheckpointerError ("Improperly formatted entry" )
77
+
78
+ pk_map .insert (model_name , pk , pk , ImportKind .Inserted )
79
+ if pk > max_pk :
80
+ max_pk = pk
81
+
82
+ return RpcExportOk (
83
+ mapped_pks = RpcPrimaryKeyMap .into_rpc (pk_map ), max_pk = max_pk , json_data = json_data
84
+ )
85
+
86
+ @abstractmethod
87
+ def get (self , model_name : NormalizedModelName ) -> RpcExportOk | None :
88
+ pass
89
+
90
+ @abstractmethod
91
+ def add (self , model_name : NormalizedModelName , json_data : str ) -> None :
92
+ pass
93
+
94
+
95
+ class NoopExportCheckpointer (ExportCheckpointer ):
96
+ """
97
+ A noop checkpointer - that is, it doesn't write or read any checkpoints, always returning None.
98
+ This means that no checkpointing ever occurs.
99
+ """
100
+
101
+ def __init__ (self , crypto : EncryptorDecryptorPair | None , printer : Printer ):
102
+ pass
103
+
104
+ def get (self , model_name : NormalizedModelName ) -> RpcExportOk | None :
105
+ return None
106
+
107
+ def add (self , model_name : NormalizedModelName , json_data : str ) -> None :
108
+ return None
109
+
110
+
44
111
def _export (
45
112
dest : IO [bytes ],
46
113
scope : ExportScope ,
@@ -49,6 +116,7 @@ def _export(
49
116
indent : int = 2 ,
50
117
filter_by : Filter | None = None ,
51
118
printer : Printer ,
119
+ checkpointer : ExportCheckpointer | None = None ,
52
120
):
53
121
"""
54
122
Exports core data for the Sentry installation.
@@ -68,6 +136,7 @@ def _export(
68
136
printer .echo (errText , err = True )
69
137
raise RuntimeError (errText )
70
138
139
+ cache = checkpointer if checkpointer is not None else NoopExportCheckpointer (None , printer )
71
140
json_export = []
72
141
pk_map = PrimaryKeyMap ()
73
142
allowed_relocation_scopes = scope .value
@@ -119,25 +188,33 @@ def _export(
119
188
120
189
dep_models = {get_model_name (d ) for d in model_relations .get_dependencies_for_relocation ()}
121
190
export_by_model = ImportExportService .get_exporter_for_model (model )
122
- result = export_by_model (
123
- export_model_name = str (model_name ),
124
- scope = RpcExportScope .into_rpc (scope ),
125
- from_pk = 0 ,
126
- filter_by = [RpcFilter .into_rpc (f ) for f in filters ],
127
- pk_map = RpcPrimaryKeyMap .into_rpc (pk_map .partition (dep_models )),
128
- indent = indent ,
191
+ cached_result = cache .get (model_name )
192
+ result = (
193
+ cached_result
194
+ if cached_result is not None
195
+ else export_by_model (
196
+ export_model_name = str (model_name ),
197
+ scope = RpcExportScope .into_rpc (scope ),
198
+ from_pk = 0 ,
199
+ filter_by = [RpcFilter .into_rpc (f ) for f in filters ],
200
+ pk_map = RpcPrimaryKeyMap .into_rpc (pk_map .partition (dep_models )),
201
+ indent = indent ,
202
+ )
129
203
)
130
204
131
205
if isinstance (result , RpcExportError ):
132
206
printer .echo (result .pretty (), err = True )
133
207
raise ExportingError (result )
134
208
135
209
pk_map .extend (result .mapped_pks .from_rpc ())
210
+ json_models = orjson .loads (result .json_data )
211
+ if cached_result is None :
212
+ cache .add (model_name , json_models )
136
213
137
214
# TODO(getsentry/team-ospo#190): Since the structure of this data is very predictable (an
138
215
# array of serialized model objects), we could probably avoid re-ingesting the JSON string
139
216
# as a future optimization.
140
- for json_model in orjson . loads ( result . json_data ) :
217
+ for json_model in json_models :
141
218
json_export .append (json_model )
142
219
143
220
# If no `encryptor` argument was passed in, this is an unencrypted export, so we can just dump
@@ -158,6 +235,7 @@ def export_in_user_scope(
158
235
user_filter : set [str ] | None = None ,
159
236
indent : int = 2 ,
160
237
printer : Printer ,
238
+ checkpointer : ExportCheckpointer | None = None ,
161
239
):
162
240
"""
163
241
Perform an export in the `User` scope, meaning that only models with `RelocationScope.User` will
@@ -174,6 +252,7 @@ def export_in_user_scope(
174
252
filter_by = Filter (User , "username" , user_filter ) if user_filter is not None else None ,
175
253
indent = indent ,
176
254
printer = printer ,
255
+ checkpointer = checkpointer ,
177
256
)
178
257
179
258
@@ -184,6 +263,7 @@ def export_in_organization_scope(
184
263
org_filter : set [str ] | None = None ,
185
264
indent : int = 2 ,
186
265
printer : Printer ,
266
+ checkpointer : ExportCheckpointer | None = None ,
187
267
):
188
268
"""
189
269
Perform an export in the `Organization` scope, meaning that only models with
@@ -201,6 +281,7 @@ def export_in_organization_scope(
201
281
filter_by = Filter (Organization , "slug" , org_filter ) if org_filter is not None else None ,
202
282
indent = indent ,
203
283
printer = printer ,
284
+ checkpointer = checkpointer ,
204
285
)
205
286
206
287
@@ -210,6 +291,7 @@ def export_in_config_scope(
210
291
encryptor : Encryptor | None = None ,
211
292
indent : int = 2 ,
212
293
printer : Printer ,
294
+ checkpointer : ExportCheckpointer | None = None ,
213
295
):
214
296
"""
215
297
Perform an export in the `Config` scope, meaning that only models directly related to the global
@@ -226,6 +308,7 @@ def export_in_config_scope(
226
308
filter_by = Filter (User , "pk" , import_export_service .get_all_globally_privileged_users ()),
227
309
indent = indent ,
228
310
printer = printer ,
311
+ checkpointer = checkpointer ,
229
312
)
230
313
231
314
@@ -235,6 +318,7 @@ def export_in_global_scope(
235
318
encryptor : Encryptor | None = None ,
236
319
indent : int = 2 ,
237
320
printer : Printer ,
321
+ checkpointer : ExportCheckpointer | None = None ,
238
322
):
239
323
"""
240
324
Perform an export in the `Global` scope, meaning that all models will be exported from the
@@ -246,4 +330,5 @@ def export_in_global_scope(
246
330
encryptor = encryptor ,
247
331
indent = indent ,
248
332
printer = printer ,
333
+ checkpointer = checkpointer ,
249
334
)
0 commit comments