Skip to content

feat(backup): Add ForeignKeyComparator #54558

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/sentry/backup/comparators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dateutil import parser
from django.db import models

from sentry.backup.dependencies import PrimaryKeyMap, dependencies
from sentry.backup.findings import ComparatorFinding, ComparatorFindingKind, InstanceID
from sentry.backup.helpers import Side, get_exportable_final_derivations_of
from sentry.db.models import BaseModel
Expand Down Expand Up @@ -200,6 +201,73 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
return findings


class ForeignKeyComparator(JSONScrubbingComparator):
"""Ensures that foreign keys match in a relative (they refer to the same other model in their
respective JSON blobs) rather than absolute (they have literally the same integer value)
sense."""

left_pk_map: PrimaryKeyMap | None = None
right_pk_map: PrimaryKeyMap | None = None

def __init__(self, foreign_fields: dict[str, models.base.ModelBase]):
super().__init__(*(foreign_fields.keys()))
self.foreign_fields = foreign_fields

def set_primary_key_maps(self, left_pk_map: PrimaryKeyMap, right_pk_map: PrimaryKeyMap):
"""Call this function before running the comparator, to ensure that it has access to the latest mapping information for both sides of the comparison."""

self.left_pk_map = left_pk_map
self.right_pk_map = right_pk_map

def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
findings = []
fields = sorted(self.fields)
for f in fields:
field_model_name = "sentry." + self.foreign_fields[f].__name__.lower()
if left["fields"].get(f) is None and right["fields"].get(f) is None:
continue

if self.left_pk_map is None or self.right_pk_map is None:
raise RuntimeError("must call `set_primary_key_maps` before comparing")

left_fk_as_ordinal = self.left_pk_map.get(field_model_name, left["fields"][f])
right_fk_as_ordinal = self.right_pk_map.get(field_model_name, right["fields"][f])
if left_fk_as_ordinal is None or right_fk_as_ordinal is None:
if left_fk_as_ordinal is None:
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
left_pk=left["pk"],
right_pk=right["pk"],
reason=f"""the left foreign key ordinal for `{f}` model with pk `{left["fields"][f]}` could not be found""",
)
)
if right_fk_as_ordinal is None:
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
left_pk=left["pk"],
right_pk=right["pk"],
reason=f"""the right foreign key ordinal for `{f}` model with pk `{right["fields"][f]}` could not be found""",
)
)
continue

if left_fk_as_ordinal != right_fk_as_ordinal:
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
left_pk=left["pk"],
right_pk=right["pk"],
reason=f"""the left foreign key ordinal ({left_fk_as_ordinal}) for `{f}` was not equal to the right foreign key ordinal ({right_fk_as_ordinal})""",
)
)
return findings


class ObfuscatingComparator(JSONScrubbingComparator, ABC):
"""Comparator that compares private values, but then safely truncates them to ensure that they
do not leak out in logs, stack traces, etc."""
Expand Down Expand Up @@ -338,6 +406,16 @@ def auto_assign_email_obfuscating_comparators(comps: ComparatorMap) -> None:
comps[name].append(EmailObfuscatingComparator(*assign))


def auto_assign_foreign_key_comparators(comps: ComparatorMap) -> None:
"""Automatically assigns the ForeignKeyComparator or to all appropriate model fields (see
dependencies.py for more on what "appropriate" means in this context)."""

for model_name, rels in dependencies().items():
comps[model_name.lower()].append(
ForeignKeyComparator({k: v.model for k, v in rels.foreign_keys.items()})
)


ComparatorList = List[JSONScrubbingComparator]
ComparatorMap = Dict[str, ComparatorList]

Expand Down Expand Up @@ -380,6 +458,7 @@ def build_default_comparators():
# to the `DEFAULT_COMPARATORS` map.
auto_assign_datetime_equality_comparators(comparators)
auto_assign_email_obfuscating_comparators(comparators)
auto_assign_foreign_key_comparators(comparators)

return comparators

Expand Down
32 changes: 32 additions & 0 deletions src/sentry/backup/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from collections import defaultdict
from enum import Enum, auto, unique
from typing import NamedTuple

Expand Down Expand Up @@ -71,6 +72,37 @@ def default(self, obj):
return super().default(obj)


class PrimaryKeyMap:
"""
A map between a primary key in one primary key sequence (like a database) and another (like the
ordinals in a backup JSON file). As models are moved between databases, their absolute contents
may stay the same, but their relative identifiers may change. This class allows us to track how
those relations have been transformed, thereby preserving the model references between one
another.

Note that the map assumes that the primary keys in question are integers. In particular, natural
keys are not supported!
"""

mapping: dict[str, dict[int, int]]

def __init__(self):
self.mapping = defaultdict(dict)

def get(self, model: str, old: int) -> int | None:
"""Get the new, post-mapping primary key from an old primary key."""

pk_map = self.mapping.get(model)
if pk_map is None:
return None
return pk_map.get(old)

def insert(self, model: str, old: int, new: int):
"""Create a new OLD_PK -> NEW_PK mapping for the given model."""

self.mapping[model][old] = new


def dependencies() -> dict[str, ModelRelations]:
"""Produce a dictionary mapping model type definitions to a `ModelDeps` describing their dependencies."""

Expand Down
7 changes: 7 additions & 0 deletions src/sentry/backup/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ class ComparatorFindingKind(IntEnum):
# `None`.
HashObfuscatingComparatorExistenceCheck = auto()

# Foreign key field comparison failed.
ForeignKeyComparator = auto()

# Failed to compare foreign key fields because one of the fields being compared was not present
# or `None`.
ForeignKeyComparatorExistenceCheck = auto()

# Failed to compare an ignored field.
IgnoredComparator = auto()

Expand Down
26 changes: 25 additions & 1 deletion src/sentry/backup/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from difflib import unified_diff
from typing import Dict, Tuple

from sentry.backup.comparators import DEFAULT_COMPARATORS, ComparatorMap
from sentry.backup.comparators import DEFAULT_COMPARATORS, ComparatorMap, ForeignKeyComparator
from sentry.backup.dependencies import PrimaryKeyMap
from sentry.backup.findings import (
ComparatorFinding,
ComparatorFindingKind,
Expand Down Expand Up @@ -115,10 +116,30 @@ def json_lines(obj: JSONData) -> list[str]:
if not findings.empty():
return findings

# As models are compared, we will add their pk mapping to separate `PrimaryKeyMaps`. Then, when
# a foreign keyed field into the specific model is encountered, we will be able to ensure that
# both sides reference the correct model.
#
# For instance, we encounter the first `sentry.User` model on both the left and right side, with
# the left side having a `pk` of 123, and the right having `456`. This means that we want to map
# `[sentry.User][123] = 1` on the left and `[sentry.User][456] = 1`. Later, when we encounter
# foreign keys to a user model with `pk` 123 on the left and 456 on the right, we'll be able to
# dereference the map to ensure that those both point to the same model on their respective
# sides.
left_pk_map = PrimaryKeyMap()
right_pk_map = PrimaryKeyMap()

# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
# outputs.
for id, right in right_models.items():
if id.ordinal is None:
raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")

# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
# model that we encounter later.
left = left_models[id]
left_pk_map.insert(id.model, left["pk"], id.ordinal)
right_pk_map.insert(id.model, right["pk"], id.ordinal)

# Try comparators applicable for this specific model.
if id.model in comparators:
Expand All @@ -134,6 +155,9 @@ def json_lines(obj: JSONData) -> list[str]:
findings.extend(ex)
continue

if isinstance(cmp, ForeignKeyComparator):
cmp.set_primary_key_maps(left_pk_map, right_pk_map)

res = cmp.compare(id, left, right)
if res:
findings.extend(res)
Expand Down
Loading