From 258c205e99a12e26da2b540ef3288731438d67c8 Mon Sep 17 00:00:00 2001 From: Alex Zaslavsky Date: Wed, 9 Aug 2023 11:30:52 -0700 Subject: [PATCH] feat(backup): Add ForeignKeyComparator Given two instances of a model that are being validated for equality, we want the foreign keys to be correct relatively (ie, they point to the same model in the respective models' JSON blobs), not absolutely (ie, they are literally the same integer). By creating maps that store the relations between pks and ordinals, we can easily check that the models point to match their respective ordinals regardless of the actual pk numbers. Issue: getsentry/team-ospo#171 --- src/sentry/backup/comparators.py | 79 +++++++++ src/sentry/backup/dependencies.py | 32 ++++ src/sentry/backup/findings.py | 7 + src/sentry/backup/validate.py | 26 ++- tests/sentry/backup/test_comparators.py | 209 ++++++++++++++++++++++++ tests/sentry/backup/test_correctness.py | 73 +++++++++ 6 files changed, 425 insertions(+), 1 deletion(-) diff --git a/src/sentry/backup/comparators.py b/src/sentry/backup/comparators.py index a305904bd80ca3..d3e00668bfc432 100644 --- a/src/sentry/backup/comparators.py +++ b/src/sentry/backup/comparators.py @@ -7,6 +7,7 @@ from dateutil import parser from django.db import models +from sentry.backup.dependencies import PrimaryKeyMap, dependencies from sentry.backup.findings import ComparatorFinding, ComparatorFindingKind, InstanceID from sentry.backup.helpers import Side, get_exportable_final_derivations_of from sentry.db.models import BaseModel @@ -200,6 +201,73 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa return findings +class ForeignKeyComparator(JSONScrubbingComparator): + """Ensures that foreign keys match in a relative (they refer to the same other model in their + respective JSON blobs) rather than absolute (they have literally the same integer value) + sense.""" + + left_pk_map: PrimaryKeyMap | None = None + right_pk_map: PrimaryKeyMap | None = None + + def __init__(self, foreign_fields: dict[str, models.base.ModelBase]): + super().__init__(*(foreign_fields.keys())) + self.foreign_fields = foreign_fields + + def set_primary_key_maps(self, left_pk_map: PrimaryKeyMap, right_pk_map: PrimaryKeyMap): + """Call this function before running the comparator, to ensure that it has access to the latest mapping information for both sides of the comparison.""" + + self.left_pk_map = left_pk_map + self.right_pk_map = right_pk_map + + def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]: + findings = [] + fields = sorted(self.fields) + for f in fields: + field_model_name = "sentry." + self.foreign_fields[f].__name__.lower() + if left["fields"].get(f) is None and right["fields"].get(f) is None: + continue + + if self.left_pk_map is None or self.right_pk_map is None: + raise RuntimeError("must call `set_primary_key_maps` before comparing") + + left_fk_as_ordinal = self.left_pk_map.get(field_model_name, left["fields"][f]) + right_fk_as_ordinal = self.right_pk_map.get(field_model_name, right["fields"][f]) + if left_fk_as_ordinal is None or right_fk_as_ordinal is None: + if left_fk_as_ordinal is None: + findings.append( + ComparatorFinding( + kind=self.get_kind(), + on=on, + left_pk=left["pk"], + right_pk=right["pk"], + reason=f"""the left foreign key ordinal for `{f}` model with pk `{left["fields"][f]}` could not be found""", + ) + ) + if right_fk_as_ordinal is None: + findings.append( + ComparatorFinding( + kind=self.get_kind(), + on=on, + left_pk=left["pk"], + right_pk=right["pk"], + reason=f"""the right foreign key ordinal for `{f}` model with pk `{right["fields"][f]}` could not be found""", + ) + ) + continue + + if left_fk_as_ordinal != right_fk_as_ordinal: + findings.append( + ComparatorFinding( + kind=self.get_kind(), + on=on, + left_pk=left["pk"], + right_pk=right["pk"], + reason=f"""the left foreign key ordinal ({left_fk_as_ordinal}) for `{f}` was not equal to the right foreign key ordinal ({right_fk_as_ordinal})""", + ) + ) + return findings + + class ObfuscatingComparator(JSONScrubbingComparator, ABC): """Comparator that compares private values, but then safely truncates them to ensure that they do not leak out in logs, stack traces, etc.""" @@ -338,6 +406,16 @@ def auto_assign_email_obfuscating_comparators(comps: ComparatorMap) -> None: comps[name].append(EmailObfuscatingComparator(*assign)) +def auto_assign_foreign_key_comparators(comps: ComparatorMap) -> None: + """Automatically assigns the ForeignKeyComparator or to all appropriate model fields (see + dependencies.py for more on what "appropriate" means in this context).""" + + for model_name, rels in dependencies().items(): + comps[model_name.lower()].append( + ForeignKeyComparator({k: v.model for k, v in rels.foreign_keys.items()}) + ) + + ComparatorList = List[JSONScrubbingComparator] ComparatorMap = Dict[str, ComparatorList] @@ -380,6 +458,7 @@ def build_default_comparators(): # to the `DEFAULT_COMPARATORS` map. auto_assign_datetime_equality_comparators(comparators) auto_assign_email_obfuscating_comparators(comparators) + auto_assign_foreign_key_comparators(comparators) return comparators diff --git a/src/sentry/backup/dependencies.py b/src/sentry/backup/dependencies.py index 7421f4af8bb5fc..fe8e312ded627d 100644 --- a/src/sentry/backup/dependencies.py +++ b/src/sentry/backup/dependencies.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import defaultdict from enum import Enum, auto, unique from typing import NamedTuple @@ -71,6 +72,37 @@ def default(self, obj): return super().default(obj) +class PrimaryKeyMap: + """ + A map between a primary key in one primary key sequence (like a database) and another (like the + ordinals in a backup JSON file). As models are moved between databases, their absolute contents + may stay the same, but their relative identifiers may change. This class allows us to track how + those relations have been transformed, thereby preserving the model references between one + another. + + Note that the map assumes that the primary keys in question are integers. In particular, natural + keys are not supported! + """ + + mapping: dict[str, dict[int, int]] + + def __init__(self): + self.mapping = defaultdict(dict) + + def get(self, model: str, old: int) -> int | None: + """Get the new, post-mapping primary key from an old primary key.""" + + pk_map = self.mapping.get(model) + if pk_map is None: + return None + return pk_map.get(old) + + def insert(self, model: str, old: int, new: int): + """Create a new OLD_PK -> NEW_PK mapping for the given model.""" + + self.mapping[model][old] = new + + def dependencies() -> dict[str, ModelRelations]: """Produce a dictionary mapping model type definitions to a `ModelDeps` describing their dependencies.""" diff --git a/src/sentry/backup/findings.py b/src/sentry/backup/findings.py index 0c818d2d7894ac..502453c4aaaa0a 100644 --- a/src/sentry/backup/findings.py +++ b/src/sentry/backup/findings.py @@ -62,6 +62,13 @@ class ComparatorFindingKind(IntEnum): # `None`. HashObfuscatingComparatorExistenceCheck = auto() + # Foreign key field comparison failed. + ForeignKeyComparator = auto() + + # Failed to compare foreign key fields because one of the fields being compared was not present + # or `None`. + ForeignKeyComparatorExistenceCheck = auto() + # Failed to compare an ignored field. IgnoredComparator = auto() diff --git a/src/sentry/backup/validate.py b/src/sentry/backup/validate.py index 93c1d86922fffd..0c202a7c9b0edc 100644 --- a/src/sentry/backup/validate.py +++ b/src/sentry/backup/validate.py @@ -5,7 +5,8 @@ from difflib import unified_diff from typing import Dict, Tuple -from sentry.backup.comparators import DEFAULT_COMPARATORS, ComparatorMap +from sentry.backup.comparators import DEFAULT_COMPARATORS, ComparatorMap, ForeignKeyComparator +from sentry.backup.dependencies import PrimaryKeyMap from sentry.backup.findings import ( ComparatorFinding, ComparatorFindingKind, @@ -115,10 +116,30 @@ def json_lines(obj: JSONData) -> list[str]: if not findings.empty(): return findings + # As models are compared, we will add their pk mapping to separate `PrimaryKeyMaps`. Then, when + # a foreign keyed field into the specific model is encountered, we will be able to ensure that + # both sides reference the correct model. + # + # For instance, we encounter the first `sentry.User` model on both the left and right side, with + # the left side having a `pk` of 123, and the right having `456`. This means that we want to map + # `[sentry.User][123] = 1` on the left and `[sentry.User][456] = 1`. Later, when we encounter + # foreign keys to a user model with `pk` 123 on the left and 456 on the right, we'll be able to + # dereference the map to ensure that those both point to the same model on their respective + # sides. + left_pk_map = PrimaryKeyMap() + right_pk_map = PrimaryKeyMap() + # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both # outputs. for id, right in right_models.items(): + if id.ordinal is None: + raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set") + + # Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this + # model that we encounter later. left = left_models[id] + left_pk_map.insert(id.model, left["pk"], id.ordinal) + right_pk_map.insert(id.model, right["pk"], id.ordinal) # Try comparators applicable for this specific model. if id.model in comparators: @@ -134,6 +155,9 @@ def json_lines(obj: JSONData) -> list[str]: findings.extend(ex) continue + if isinstance(cmp, ForeignKeyComparator): + cmp.set_primary_key_maps(left_pk_map, right_pk_map) + res = cmp.compare(id, left, right) if res: findings.extend(res) diff --git a/tests/sentry/backup/test_comparators.py b/tests/sentry/backup/test_comparators.py index 471c4b503939a8..ff84fed4282a3a 100644 --- a/tests/sentry/backup/test_comparators.py +++ b/tests/sentry/backup/test_comparators.py @@ -1,13 +1,17 @@ from copy import deepcopy +import pytest + from sentry.backup.comparators import ( DatetimeEqualityComparator, DateUpdatedComparator, EmailObfuscatingComparator, + ForeignKeyComparator, HashObfuscatingComparator, IgnoredComparator, ScrubbedData, ) +from sentry.backup.dependencies import PrimaryKeyMap, dependencies from sentry.backup.findings import ComparatorFindingKind, InstanceID from sentry.utils.json import JSONData @@ -396,6 +400,211 @@ def test_good_hash_obfuscating_comparator_scrubbed(): ] +DEPENDENCIES = dependencies() + + +def test_good_foreign_key_comparator(): + cmp = ForeignKeyComparator( + {k: v.model for k, v in DEPENDENCIES["sentry.UserEmail"].foreign_keys.items()} + ) + id = InstanceID("sentry.useremail", 0) + left_pk_map = PrimaryKeyMap() + left_pk_map.insert("sentry.user", 12, 1) + right_pk_map = PrimaryKeyMap() + right_pk_map.insert("sentry.user", 34, 1) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 34, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + cmp.set_primary_key_maps(left_pk_map, right_pk_map) + + assert not cmp.compare(id, left, right) + + +def test_good_foreign_key_comparator_scrubbed(): + cmp = ForeignKeyComparator( + {k: v.model for k, v in DEPENDENCIES["sentry.UserEmail"].foreign_keys.items()} + ) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + right = deepcopy(left) + cmp.scrub(left, right) + assert left["scrubbed"] + assert left["scrubbed"]["ForeignKeyComparator::user"] is ScrubbedData() + + assert right["scrubbed"] + assert right["scrubbed"]["ForeignKeyComparator::user"] is ScrubbedData() + + +def test_bad_foreign_key_comparator_set_primary_key_maps_not_called(): + cmp = ForeignKeyComparator( + {k: v.model for k, v in DEPENDENCIES["sentry.UserEmail"].foreign_keys.items()} + ) + id = InstanceID("sentry.useremail", 0) + left_pk_map = PrimaryKeyMap() + left_pk_map.insert("sentry.user", 12, 1) + right_pk_map = PrimaryKeyMap() + right_pk_map.insert("sentry.user", 34, 1) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 34, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + + with pytest.raises(RuntimeError): + cmp.compare(id, left, right) + + +def test_bad_foreign_key_comparator_unequal_mapping(): + cmp = ForeignKeyComparator( + {k: v.model for k, v in DEPENDENCIES["sentry.UserEmail"].foreign_keys.items()} + ) + id = InstanceID("sentry.useremail", 0) + left_pk_map = PrimaryKeyMap() + left_pk_map.insert("sentry.user", 12, 1) + right_pk_map = PrimaryKeyMap() + right_pk_map.insert("sentry.user", 34, 2) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 34, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + cmp.set_primary_key_maps(left_pk_map, right_pk_map) + + res = cmp.compare(id, left, right) + assert res + assert res[0] + assert res[0].kind == ComparatorFindingKind.ForeignKeyComparator + assert res[0].on == id + assert res[0].left_pk == 1 + assert res[0].right_pk == 1 + assert "`user`" in res[0].reason + assert "left foreign key ordinal (1)" in res[0].reason + assert "right foreign key ordinal (2)" in res[0].reason + + +def test_bad_foreign_key_comparator_missing_mapping(): + cmp = ForeignKeyComparator( + {k: v.model for k, v in DEPENDENCIES["sentry.UserEmail"].foreign_keys.items()} + ) + id = InstanceID("sentry.useremail", 0) + left_pk_map = PrimaryKeyMap() + right_pk_map = PrimaryKeyMap() + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "user": 34, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": True, + }, + } + cmp.set_primary_key_maps(left_pk_map, right_pk_map) + + res = cmp.compare(id, left, right) + assert len(res) == 2 + assert res[0] + assert res[0].kind == ComparatorFindingKind.ForeignKeyComparator + assert res[0].on == id + assert res[0].left_pk == 1 + assert res[0].right_pk == 1 + assert "`user`" in res[0].reason + assert "left foreign key ordinal" in res[0].reason + assert "pk `12`" in res[0].reason + + assert res[1] + assert res[1].kind == ComparatorFindingKind.ForeignKeyComparator + assert res[1].on == id + assert res[1].left_pk == 1 + assert res[1].right_pk == 1 + assert "`user`" in res[1].reason + assert "right foreign key ordinal" in res[1].reason + assert "pk `34`" in res[1].reason + + def test_good_ignored_comparator(): cmp = IgnoredComparator("ignored_field") id = InstanceID("test", 0) diff --git a/tests/sentry/backup/test_correctness.py b/tests/sentry/backup/test_correctness.py index f0ce07e9418924..44973b53898b71 100644 --- a/tests/sentry/backup/test_correctness.py +++ b/tests/sentry/backup/test_correctness.py @@ -430,6 +430,79 @@ def test_auto_assign_date_updated_comparator(tmp_path): assert not findings +def test_auto_assign_foreign_key_comparator(tmp_path): + left = [ + json.loads( + """ + { + "model": "sentry.user", + "pk": 12, + "fields": { + "password": "abc123", + "last_login": null, + "username": "testing@example.com", + "name": "", + "email": "testing@example.com" + } + } + """ + ) + ] + right = [ + json.loads( + """ + { + "model": "sentry.user", + "pk": 34, + "fields": { + "password": "abc123", + "last_login": null, + "username": "testing@example.com", + "name": "", + "email": "testing@example.com" + } + } + """ + ) + ] + + userrole_left = json.loads( + """ + { + "model": "sentry.useremail", + "pk": 56, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": true + } + } + """ + ) + userrole_right = json.loads( + """ + { + "model": "sentry.useremail", + "pk": 78, + "fields": { + "user": 34, + "email": "testing@example.com", + "validation_hash": "ABC123", + "date_hash_added": "2023-06-23T00:00:00.000Z", + "is_verified": true + } + } + """ + ) + left.append(userrole_left) + right.append(userrole_right) + out = validate(left, right) + findings = out.findings + assert not findings + + def test_auto_assign_ignored_comparator(tmp_path): left = [ json.loads(