|
| 1 | +---------------------------- MODULE wal_replication --------------------------- |
| 2 | +\* A formal specification of write-ahead log (WAL) replication algorithm. |
| 3 | +\* |
| 4 | +\* The algorithm assumes the presence of a write-ahead log (WAL), like the one |
| 5 | +\* used in SQLite, where transactions append modified pages to a WAL. Each |
| 6 | +\* modified page within the WAL is referred to as a frame and is assigned a |
| 7 | +\* monotonically increasing frame index. |
| 8 | +\* |
| 9 | +\* A write is not considered durable when it is on the local server log, which |
| 10 | +\* represents fsync'd SQLite WAL, because we assume volumes can disappear in |
| 11 | +\* disaster recovery scenarios. A write is durable when it appears in the |
| 12 | +\* durable log, which represents AWS S3 or similar service. We write |
| 13 | +\* asynchronously to the durable storage, which is why on recovery we will lose |
| 14 | +\* non-durable writes and trim the local logs. |
| 15 | + |
| 16 | +EXTENDS Naturals, FiniteSetsExt, Sequences, SequencesExt, Bags, Functions, TLC |
| 17 | + |
| 18 | +VARIABLE |
| 19 | + txID, |
| 20 | + commitIndex, |
| 21 | + commitServer, |
| 22 | + messages, |
| 23 | + logs, |
| 24 | + durableLog |
| 25 | + |
| 26 | +---------------------------------------------------------------------------------- |
| 27 | +\* Cluster topology. |
| 28 | + |
| 29 | +CONSTANT |
| 30 | + Servers |
| 31 | + |
| 32 | +CONSTANTS |
| 33 | + Node1, |
| 34 | + Node2, |
| 35 | + Node3 |
| 36 | + |
| 37 | +IsPrimary(d) == |
| 38 | + d = Node1 |
| 39 | + |
| 40 | +Primary == Node1 |
| 41 | + |
| 42 | +---------------------------------------------------------------------------------- |
| 43 | +\* Message passing. We assume an ordered networking with no duplicates. |
| 44 | + |
| 45 | +InitMessageVar == |
| 46 | + messages = [ s \in Servers |-> <<>>] |
| 47 | + |
| 48 | +WithMessage(m, msgs) == |
| 49 | + IF \E i \in 1..Len(msgs[m.dest]) : msgs[m.dest][i] = m THEN |
| 50 | + msgs |
| 51 | + ELSE |
| 52 | + [ msgs EXCEPT ![m.dest] = Append(@, m) ] |
| 53 | + |
| 54 | +WithoutMessage(m, msgs) == |
| 55 | + IF \E i \in 1..Len(msgs[m.dest]) : msgs[m.dest][i] = m THEN |
| 56 | + [ msgs EXCEPT ![m.dest] = RemoveAt(@, SelectInSeq(@, LAMBDA e: e = m)) ] |
| 57 | + ELSE |
| 58 | + msgs |
| 59 | + |
| 60 | +Messages == |
| 61 | + UNION { Range(messages[s]) : s \in Servers } |
| 62 | + |
| 63 | +MessagesTo(dest, source) == |
| 64 | + IF \E i \in 1..Len(messages[dest]) : messages[dest][i].source = source THEN |
| 65 | + {messages[dest][SelectInSeq(messages[dest], LAMBDA e: e.source = source)]} |
| 66 | + ELSE |
| 67 | + {} |
| 68 | + |
| 69 | +Send(m) == |
| 70 | + /\ messages' = WithMessage(m, messages) |
| 71 | + |
| 72 | +Discard(m) == |
| 73 | + messages' = WithoutMessage(m, messages) |
| 74 | + |
| 75 | +---------------------------------------------------------------------------------- |
| 76 | +\* Protocol |
| 77 | + |
| 78 | +\* Message types: |
| 79 | +CONSTANTS |
| 80 | + GetFramesMsg, |
| 81 | + ExecuteMsg |
| 82 | + |
| 83 | +RecoverLog(s) == |
| 84 | + /\ logs' = [logs EXCEPT ![s] = durableLog] |
| 85 | + |
| 86 | +Recover == |
| 87 | + /\ \A s \in Servers: RecoverLog(s) |
| 88 | + /\ commitIndex = IF Len(durableLog) > 0 THEN Max(ToSet(durableLog)) ELSE 0 |
| 89 | + /\ UNCHANGED(<<txID, commitIndex, commitServer, messages, durableLog>>) |
| 90 | + |
| 91 | +SyncLog(s) == |
| 92 | + /\ logs' = [logs EXCEPT ![s] = logs[Primary]] |
| 93 | + |
| 94 | +SyncDurable(l) == |
| 95 | + /\ durableLog' = durableLog \o l |
| 96 | + |
| 97 | +AppendToLog(s, i) == |
| 98 | + /\ logs' = [logs EXCEPT ![s] = Append(logs[s], i)] |
| 99 | + |
| 100 | +HandleExecuteMsg(m) == |
| 101 | + /\ IF IsPrimary(m.dest) THEN |
| 102 | + \* Append the write to the local WAL. |
| 103 | + /\ AppendToLog(m.dest, commitIndex + 1) |
| 104 | + ELSE |
| 105 | + \* Append the write to the WAL on the primary... |
| 106 | + /\ AppendToLog(Primary, commitIndex + 1) |
| 107 | + \* ...but also sync local WAL for read your writes. |
| 108 | + /\ SyncLog(m.dest) |
| 109 | + /\ SyncDurable(logs'[Primary]) |
| 110 | + /\ commitIndex' = commitIndex + 1 |
| 111 | + /\ commitServer' = m.dest |
| 112 | + /\ Discard(m) |
| 113 | + /\ UNCHANGED(<<txID>>) |
| 114 | + |
| 115 | +RcvExecuteMsg(i, j) == |
| 116 | + \E m \in MessagesTo(i, j) : |
| 117 | + /\ m.type = ExecuteMsg |
| 118 | + /\ HandleExecuteMsg(m) |
| 119 | + |
| 120 | +HandleGetFramesMsg(m) == |
| 121 | + /\ IsPrimary(m.dest) |
| 122 | + /\ SyncLog(m.source) |
| 123 | + /\ Discard(m) |
| 124 | + /\ UNCHANGED(<<txID, commitServer, commitIndex, durableLog>>) |
| 125 | + |
| 126 | +RcvGetFramesMsg(i, j) == |
| 127 | + \E m \in MessagesTo(i, j) : |
| 128 | + /\ m.type = GetFramesMsg |
| 129 | + /\ HandleGetFramesMsg(m) |
| 130 | + |
| 131 | +Receive(i, j) == |
| 132 | + \/ RcvGetFramesMsg(i, j) |
| 133 | + \/ RcvExecuteMsg(i, j) |
| 134 | + |
| 135 | +SendGetFrames(s) == |
| 136 | + LET |
| 137 | + msg == [ |
| 138 | + type |-> GetFramesMsg, |
| 139 | + dest |-> Primary, |
| 140 | + source |-> s |
| 141 | + ] |
| 142 | + IN |
| 143 | + /\ Send(msg) |
| 144 | + /\ UNCHANGED(<<txID, commitIndex, commitServer, logs, durableLog>>) |
| 145 | + |
| 146 | +SendExecute(i, j) == |
| 147 | + LET |
| 148 | + msg == [ |
| 149 | + type |-> ExecuteMsg, |
| 150 | + txId |-> txID, |
| 151 | + dest |-> i, |
| 152 | + source |-> j |
| 153 | + ] |
| 154 | + IN |
| 155 | + /\ txID' = txID + 1 |
| 156 | + /\ Send(msg) |
| 157 | + /\ UNCHANGED(<<commitIndex, commitServer, logs, durableLog>>) |
| 158 | + |
| 159 | +Next == |
| 160 | + \/ Recover |
| 161 | + \/ \E i, j \in Servers: SendExecute(i, j) |
| 162 | + \/ \E s \in Servers: SendGetFrames(s) |
| 163 | + \/ \E i, j \in Servers: Receive(i, j) |
| 164 | + |
| 165 | +Init == |
| 166 | + /\ txID = 0 |
| 167 | + /\ commitIndex = 0 |
| 168 | + /\ commitServer = Primary |
| 169 | + /\ InitMessageVar |
| 170 | + /\ logs = [s \in Servers |-> <<>>] |
| 171 | + /\ durableLog = <<>> |
| 172 | + |
| 173 | +---------------------------------------------------------------------------------- |
| 174 | +\* Invariants |
| 175 | + |
| 176 | +ReadYourWritesInv == |
| 177 | + commitIndex = 0 \/ Contains(logs[commitServer], commitIndex) |
| 178 | + |
| 179 | +LogsAreContinuousInv == |
| 180 | + \A s \in Servers: Len(logs[s]) = 0 \/ \A i \in 1..Max(ToSet(logs[s])) : Contains(logs[s], i) |
| 181 | + |
| 182 | +NoServerIsAheadOfPrimaryInv == |
| 183 | + \A s \in Servers: Len(logs[s]) <= Len(logs[Primary]) |
| 184 | + |
| 185 | +NoDurableFramesLostInv == |
| 186 | + \A i \in 1..commitIndex : i \in ToSet(durableLog) |
| 187 | + |
| 188 | +---------------------------------------------------------------------------------- |
| 189 | +\* Temporal properties |
| 190 | + |
| 191 | +WriteLivenessProp == <>(commitIndex > 0) |
| 192 | + |
| 193 | +ReplicationProp == |
| 194 | + [] (commitIndex > 0 => \A s \in Servers : <> (Len(logs[s]) > 0)) |
| 195 | + |
| 196 | +==== |
0 commit comments