|
| 1 | +---------------------------- MODULE wal_replication --------------------------- |
| 2 | +\* A formal specification of write-ahead log (WAL) replication algorithm. |
| 3 | +\* |
| 4 | +\* The algorithm assumes the presence of a write-ahead log (WAL), like the one |
| 5 | +\* used in SQLite, where transactions append modified pages to a WAL. Each |
| 6 | +\* modified page within the WAL is referred to as a frame and is assigned a |
| 7 | +\* monotonically increasing frame index. |
| 8 | +\* |
| 9 | +\* A write is not guaranateed durability until it is backed up. Therefore, |
| 10 | +\* when recovering, primary and replicas revert back to the latest durable |
| 11 | +\* index. |
| 12 | + |
| 13 | +EXTENDS Integers |
| 14 | + |
| 15 | +VARIABLE |
| 16 | + \* Index to the latest ephemeral frame. |
| 17 | + latestIndex, |
| 18 | + \* Index to the latest durable frame. |
| 19 | + durableIndex, |
| 20 | + \* Durable frames. |
| 21 | + durableFrames, |
| 22 | + \* The primary WAL. |
| 23 | + primaryWAL, |
| 24 | + \* The replica WAL. |
| 25 | + replicaWAL |
| 26 | + |
| 27 | +Maximum(s) == |
| 28 | + IF s = {} THEN |
| 29 | + 0 |
| 30 | + ELSE |
| 31 | + CHOOSE x \in s : \A y \in s : x >= y |
| 32 | + |
| 33 | +\* Recovery reverts to the durable index and all ephemeral writes are lost. |
| 34 | +Recover == |
| 35 | + /\ latestIndex' = durableIndex |
| 36 | + /\ primaryWAL' = {} |
| 37 | + /\ replicaWAL' = {} |
| 38 | + /\ UNCHANGED(<<durableFrames, durableIndex>>) |
| 39 | + |
| 40 | +\* Checkpoint the primary database and store frames to durable storage. |
| 41 | +Checkpoint == |
| 42 | + /\ durableFrames' = durableFrames \union primaryWAL |
| 43 | + /\ durableIndex' = Maximum(durableFrames') |
| 44 | + /\ primaryWAL' = {} |
| 45 | + /\ UNCHANGED(<<latestIndex, replicaWAL>>) |
| 46 | + |
| 47 | +\* Replication pulls frames from the primary WAL. All durable frames |
| 48 | +\* are also visible to the replica. |
| 49 | +Replicate == |
| 50 | + /\ replicaWAL' = primaryWAL |
| 51 | + /\ UNCHANGED(<<latestIndex, durableIndex, durableFrames>>) |
| 52 | + |
| 53 | +\* Append a frame to the primary WAL. |
| 54 | +AppendToPrimaryWAL(index) == |
| 55 | + /\ primaryWAL' = primaryWAL \union {index} |
| 56 | + |
| 57 | +\* Update the primary WAL. |
| 58 | +Update == |
| 59 | + /\ AppendToPrimaryWAL(latestIndex + 1) |
| 60 | + /\ latestIndex' = latestIndex + 1 |
| 61 | + /\ UNCHANGED(<<durableIndex, durableFrames, replicaWAL>>) |
| 62 | + |
| 63 | +Next == |
| 64 | + \/ Update |
| 65 | + \/ Checkpoint |
| 66 | + \/ Replicate |
| 67 | + \/ Recover |
| 68 | + |
| 69 | +Init == |
| 70 | + /\ latestIndex = 0 |
| 71 | + /\ durableIndex = 0 |
| 72 | + /\ durableFrames = {} |
| 73 | + /\ primaryWAL = {} |
| 74 | + /\ replicaWAL = {} |
| 75 | + |
| 76 | +\* Invariants |
| 77 | + |
| 78 | +\* INVARIANT: No durable frames are lost. |
| 79 | +NoDurableFramesLost == |
| 80 | + \A i \in 1..durableIndex : i \in durableFrames |
| 81 | + |
| 82 | +\* INVARIANT: Replica is not ahead of primary. |
| 83 | +ReplicaIsNotAhead == |
| 84 | + Maximum(replicaWAL) <= latestIndex |
| 85 | + |
| 86 | +\* TODO INVARIANT: Read your writes on replica |
| 87 | + |
| 88 | +==== |
0 commit comments