Skip to content

Commit 60f2390

Browse files
authored
feat(backup): Add ForeignKeyComparator (#54558)
Given two instances of a model that are being validated for equality, we want the foreign keys to be correct relatively (ie, they point to the same model in the respective models' JSON blobs), not absolutely (ie, they are literally the same integer). By creating maps that store the relations between pks and ordinals, we can easily check that the models point to match their respective ordinals regardless of the actual pk numbers. Issue: getsentry/team-ospo#171
1 parent fcb4f4f commit 60f2390

File tree

6 files changed

+425
-1
lines changed

6 files changed

+425
-1
lines changed

src/sentry/backup/comparators.py

+79
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from dateutil import parser
88
from django.db import models
99

10+
from sentry.backup.dependencies import PrimaryKeyMap, dependencies
1011
from sentry.backup.findings import ComparatorFinding, ComparatorFindingKind, InstanceID
1112
from sentry.backup.helpers import Side, get_exportable_final_derivations_of
1213
from sentry.db.models import BaseModel
@@ -200,6 +201,73 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
200201
return findings
201202

202203

204+
class ForeignKeyComparator(JSONScrubbingComparator):
205+
"""Ensures that foreign keys match in a relative (they refer to the same other model in their
206+
respective JSON blobs) rather than absolute (they have literally the same integer value)
207+
sense."""
208+
209+
left_pk_map: PrimaryKeyMap | None = None
210+
right_pk_map: PrimaryKeyMap | None = None
211+
212+
def __init__(self, foreign_fields: dict[str, models.base.ModelBase]):
213+
super().__init__(*(foreign_fields.keys()))
214+
self.foreign_fields = foreign_fields
215+
216+
def set_primary_key_maps(self, left_pk_map: PrimaryKeyMap, right_pk_map: PrimaryKeyMap):
217+
"""Call this function before running the comparator, to ensure that it has access to the latest mapping information for both sides of the comparison."""
218+
219+
self.left_pk_map = left_pk_map
220+
self.right_pk_map = right_pk_map
221+
222+
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
223+
findings = []
224+
fields = sorted(self.fields)
225+
for f in fields:
226+
field_model_name = "sentry." + self.foreign_fields[f].__name__.lower()
227+
if left["fields"].get(f) is None and right["fields"].get(f) is None:
228+
continue
229+
230+
if self.left_pk_map is None or self.right_pk_map is None:
231+
raise RuntimeError("must call `set_primary_key_maps` before comparing")
232+
233+
left_fk_as_ordinal = self.left_pk_map.get(field_model_name, left["fields"][f])
234+
right_fk_as_ordinal = self.right_pk_map.get(field_model_name, right["fields"][f])
235+
if left_fk_as_ordinal is None or right_fk_as_ordinal is None:
236+
if left_fk_as_ordinal is None:
237+
findings.append(
238+
ComparatorFinding(
239+
kind=self.get_kind(),
240+
on=on,
241+
left_pk=left["pk"],
242+
right_pk=right["pk"],
243+
reason=f"""the left foreign key ordinal for `{f}` model with pk `{left["fields"][f]}` could not be found""",
244+
)
245+
)
246+
if right_fk_as_ordinal is None:
247+
findings.append(
248+
ComparatorFinding(
249+
kind=self.get_kind(),
250+
on=on,
251+
left_pk=left["pk"],
252+
right_pk=right["pk"],
253+
reason=f"""the right foreign key ordinal for `{f}` model with pk `{right["fields"][f]}` could not be found""",
254+
)
255+
)
256+
continue
257+
258+
if left_fk_as_ordinal != right_fk_as_ordinal:
259+
findings.append(
260+
ComparatorFinding(
261+
kind=self.get_kind(),
262+
on=on,
263+
left_pk=left["pk"],
264+
right_pk=right["pk"],
265+
reason=f"""the left foreign key ordinal ({left_fk_as_ordinal}) for `{f}` was not equal to the right foreign key ordinal ({right_fk_as_ordinal})""",
266+
)
267+
)
268+
return findings
269+
270+
203271
class ObfuscatingComparator(JSONScrubbingComparator, ABC):
204272
"""Comparator that compares private values, but then safely truncates them to ensure that they
205273
do not leak out in logs, stack traces, etc."""
@@ -338,6 +406,16 @@ def auto_assign_email_obfuscating_comparators(comps: ComparatorMap) -> None:
338406
comps[name].append(EmailObfuscatingComparator(*assign))
339407

340408

409+
def auto_assign_foreign_key_comparators(comps: ComparatorMap) -> None:
410+
"""Automatically assigns the ForeignKeyComparator or to all appropriate model fields (see
411+
dependencies.py for more on what "appropriate" means in this context)."""
412+
413+
for model_name, rels in dependencies().items():
414+
comps[model_name.lower()].append(
415+
ForeignKeyComparator({k: v.model for k, v in rels.foreign_keys.items()})
416+
)
417+
418+
341419
ComparatorList = List[JSONScrubbingComparator]
342420
ComparatorMap = Dict[str, ComparatorList]
343421

@@ -380,6 +458,7 @@ def build_default_comparators():
380458
# to the `DEFAULT_COMPARATORS` map.
381459
auto_assign_datetime_equality_comparators(comparators)
382460
auto_assign_email_obfuscating_comparators(comparators)
461+
auto_assign_foreign_key_comparators(comparators)
383462

384463
return comparators
385464

src/sentry/backup/dependencies.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from collections import defaultdict
34
from enum import Enum, auto, unique
45
from typing import NamedTuple
56

@@ -71,6 +72,37 @@ def default(self, obj):
7172
return super().default(obj)
7273

7374

75+
class PrimaryKeyMap:
76+
"""
77+
A map between a primary key in one primary key sequence (like a database) and another (like the
78+
ordinals in a backup JSON file). As models are moved between databases, their absolute contents
79+
may stay the same, but their relative identifiers may change. This class allows us to track how
80+
those relations have been transformed, thereby preserving the model references between one
81+
another.
82+
83+
Note that the map assumes that the primary keys in question are integers. In particular, natural
84+
keys are not supported!
85+
"""
86+
87+
mapping: dict[str, dict[int, int]]
88+
89+
def __init__(self):
90+
self.mapping = defaultdict(dict)
91+
92+
def get(self, model: str, old: int) -> int | None:
93+
"""Get the new, post-mapping primary key from an old primary key."""
94+
95+
pk_map = self.mapping.get(model)
96+
if pk_map is None:
97+
return None
98+
return pk_map.get(old)
99+
100+
def insert(self, model: str, old: int, new: int):
101+
"""Create a new OLD_PK -> NEW_PK mapping for the given model."""
102+
103+
self.mapping[model][old] = new
104+
105+
74106
def dependencies() -> dict[str, ModelRelations]:
75107
"""Produce a dictionary mapping model type definitions to a `ModelDeps` describing their dependencies."""
76108

src/sentry/backup/findings.py

+7
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ class ComparatorFindingKind(IntEnum):
6262
# `None`.
6363
HashObfuscatingComparatorExistenceCheck = auto()
6464

65+
# Foreign key field comparison failed.
66+
ForeignKeyComparator = auto()
67+
68+
# Failed to compare foreign key fields because one of the fields being compared was not present
69+
# or `None`.
70+
ForeignKeyComparatorExistenceCheck = auto()
71+
6572
# Failed to compare an ignored field.
6673
IgnoredComparator = auto()
6774

src/sentry/backup/validate.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from difflib import unified_diff
66
from typing import Dict, Tuple
77

8-
from sentry.backup.comparators import DEFAULT_COMPARATORS, ComparatorMap
8+
from sentry.backup.comparators import DEFAULT_COMPARATORS, ComparatorMap, ForeignKeyComparator
9+
from sentry.backup.dependencies import PrimaryKeyMap
910
from sentry.backup.findings import (
1011
ComparatorFinding,
1112
ComparatorFindingKind,
@@ -115,10 +116,30 @@ def json_lines(obj: JSONData) -> list[str]:
115116
if not findings.empty():
116117
return findings
117118

119+
# As models are compared, we will add their pk mapping to separate `PrimaryKeyMaps`. Then, when
120+
# a foreign keyed field into the specific model is encountered, we will be able to ensure that
121+
# both sides reference the correct model.
122+
#
123+
# For instance, we encounter the first `sentry.User` model on both the left and right side, with
124+
# the left side having a `pk` of 123, and the right having `456`. This means that we want to map
125+
# `[sentry.User][123] = 1` on the left and `[sentry.User][456] = 1`. Later, when we encounter
126+
# foreign keys to a user model with `pk` 123 on the left and 456 on the right, we'll be able to
127+
# dereference the map to ensure that those both point to the same model on their respective
128+
# sides.
129+
left_pk_map = PrimaryKeyMap()
130+
right_pk_map = PrimaryKeyMap()
131+
118132
# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
119133
# outputs.
120134
for id, right in right_models.items():
135+
if id.ordinal is None:
136+
raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
137+
138+
# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
139+
# model that we encounter later.
121140
left = left_models[id]
141+
left_pk_map.insert(id.model, left["pk"], id.ordinal)
142+
right_pk_map.insert(id.model, right["pk"], id.ordinal)
122143

123144
# Try comparators applicable for this specific model.
124145
if id.model in comparators:
@@ -134,6 +155,9 @@ def json_lines(obj: JSONData) -> list[str]:
134155
findings.extend(ex)
135156
continue
136157

158+
if isinstance(cmp, ForeignKeyComparator):
159+
cmp.set_primary_key_maps(left_pk_map, right_pk_map)
160+
137161
res = cmp.compare(id, left, right)
138162
if res:
139163
findings.extend(res)

0 commit comments

Comments
 (0)