lightningdevkit · TheBlueMatt · Sep 21, 2023 · Sep 10, 2023 · Sep 10, 2023 · Aug 30, 2023
diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs
@@ -138,7 +138,7 @@ impl TestChainMonitor {
 	}
 }
 impl chain::Watch<TestChannelSigner> for TestChainMonitor {
-	fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> chain::ChannelMonitorUpdateStatus {
+	fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> Result<chain::ChannelMonitorUpdateStatus, ()> {
 		let mut ser = VecWriter(Vec::new());
 		monitor.write(&mut ser).unwrap();
 		if let Some(_) = self.latest_monitors.lock().unwrap().insert(funding_txo, (monitor.get_latest_update_id(), ser.0)) {
@@ -500,7 +500,7 @@ pub fn do_test<Out: Output>(data: &[u8], underlying_out: Out) {
 			let res = (<(BlockHash, ChanMan)>::read(&mut Cursor::new(&$ser.0), read_args).expect("Failed to read manager").1, chain_monitor.clone());
 			for (funding_txo, mon) in monitors.drain() {
 				assert_eq!(chain_monitor.chain_monitor.watch_channel(funding_txo, mon),
-					ChannelMonitorUpdateStatus::Completed);
+					Ok(ChannelMonitorUpdateStatus::Completed));
 			}
 			res
 		} }

diff --git a/lightning-persister/src/fs_store.rs b/lightning-persister/src/fs_store.rs
@@ -436,7 +436,7 @@ mod tests {
 	}
 
 	// Test that if the store's path to channel data is read-only, writing a
-	// monitor to it results in the store returning a PermanentFailure.
+	// monitor to it results in the store returning an InProgress.
 	// Windows ignores the read-only flag for folders, so this test is Unix-only.
 	#[cfg(not(target_os = "windows"))]
 	#[test]
@@ -470,7 +470,7 @@ mod tests {
 			index: 0
 		};
 		match store.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
-			ChannelMonitorUpdateStatus::PermanentFailure => {},
+			ChannelMonitorUpdateStatus::InProgress => {},
 			_ => panic!("unexpected result from persisting new channel")
 		}
 
@@ -507,7 +507,7 @@ mod tests {
 			index: 0
 		};
 		match store.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
-			ChannelMonitorUpdateStatus::PermanentFailure => {},
+			ChannelMonitorUpdateStatus::InProgress => {},
 			_ => panic!("unexpected result from persisting new channel")
 		}
 

diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs
@@ -78,7 +78,7 @@ impl MonitorUpdateId {
 /// `Persist` defines behavior for persisting channel monitors: this could mean
 /// writing once to disk, and/or uploading to one or more backup services.
 ///
-/// Each method can return three possible values:
+/// Each method can return two possible values:
 ///  * If persistence (including any relevant `fsync()` calls) happens immediately, the
 ///    implementation should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal
 ///    channel operation should continue.
@@ -91,10 +91,9 @@ impl MonitorUpdateId {
 ///    Note that unlike the direct [`chain::Watch`] interface,
 ///    [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
 ///
-///  * If persistence fails for some reason, implementations should return
-///    [`ChannelMonitorUpdateStatus::PermanentFailure`], in which case the channel will likely be
-///    closed without broadcasting the latest state. See
-///    [`ChannelMonitorUpdateStatus::PermanentFailure`] for more details.
+///    If persistence fails for some reason, implementations should still return
+///    [`ChannelMonitorUpdateStatus::InProgress`] and attempt to shut down or otherwise resolve the
+///    situation ASAP.
 ///
 /// Third-party watchtowers may be built as a part of an implementation of this trait, with the
 /// advantage that you can control whether to resume channel operation depending on if an update
@@ -335,11 +334,6 @@ where C::Target: chain::Filter,
 			match self.persister.update_persisted_channel(*funding_outpoint, None, monitor, update_id) {
 				ChannelMonitorUpdateStatus::Completed =>
 					log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
-				ChannelMonitorUpdateStatus::PermanentFailure => {
-					monitor_state.channel_perm_failed.store(true, Ordering::Release);
-					self.pending_monitor_events.lock().unwrap().push((*funding_outpoint, vec![MonitorEvent::UpdateFailed(*funding_outpoint)], monitor.get_counterparty_node_id()));
-					self.event_notifier.notify();
-				}
 				ChannelMonitorUpdateStatus::InProgress => {
 					log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
 					pending_monitor_updates.push(update_id);
@@ -673,12 +667,12 @@ where C::Target: chain::Filter,
 	///
 	/// Note that we persist the given `ChannelMonitor` while holding the `ChainMonitor`
 	/// monitors lock.
-	fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> ChannelMonitorUpdateStatus {
+	fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> Result<ChannelMonitorUpdateStatus, ()> {
 		let mut monitors = self.monitors.write().unwrap();
 		let entry = match monitors.entry(funding_outpoint) {
 			hash_map::Entry::Occupied(_) => {
 				log_error!(self.logger, "Failed to add new channel data: channel monitor for given outpoint is already present");
-				return ChannelMonitorUpdateStatus::PermanentFailure
+				return Err(());
 			},
 			hash_map::Entry::Vacant(e) => e,
 		};
@@ -691,10 +685,6 @@ where C::Target: chain::Filter,
 				log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} in progress", log_funding_info!(monitor));
 				pending_monitor_updates.push(update_id);
 			},
-			ChannelMonitorUpdateStatus::PermanentFailure => {
-				log_error!(self.logger, "Persistence of new ChannelMonitor for channel {} failed", log_funding_info!(monitor));
-				return persist_res;
-			},
 			ChannelMonitorUpdateStatus::Completed => {
 				log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} completed", log_funding_info!(monitor));
 			}
@@ -708,7 +698,7 @@ where C::Target: chain::Filter,
 			channel_perm_failed: AtomicBool::new(false),
 			last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)),
 		});
-		persist_res
+		Ok(persist_res)
 	}
 
 	/// Note that we persist the given `ChannelMonitor` update while holding the
@@ -723,10 +713,10 @@ where C::Target: chain::Filter,
 				// We should never ever trigger this from within ChannelManager. Technically a
 				// user could use this object with some proxying in between which makes this
 				// possible, but in tests and fuzzing, this should be a panic.
-				#[cfg(any(test, fuzzing))]
+				#[cfg(debug_assertions)]
 				panic!("ChannelManager generated a channel update for a channel that was not yet registered!");
-				#[cfg(not(any(test, fuzzing)))]
-				ChannelMonitorUpdateStatus::PermanentFailure
+				#[cfg(not(debug_assertions))]
+				ChannelMonitorUpdateStatus::InProgress
 			},
 			Some(monitor_state) => {
 				let monitor = &monitor_state.monitor;
@@ -745,18 +735,14 @@ where C::Target: chain::Filter,
 						pending_monitor_updates.push(update_id);
 						log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} in progress", log_funding_info!(monitor));
 					},
-					ChannelMonitorUpdateStatus::PermanentFailure => {
-						monitor_state.channel_perm_failed.store(true, Ordering::Release);
-						log_error!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} failed", log_funding_info!(monitor));
-					},
 					ChannelMonitorUpdateStatus::Completed => {
 						log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} completed", log_funding_info!(monitor));
 					},
 				}
 				if update_res.is_err() {
-					ChannelMonitorUpdateStatus::PermanentFailure
+					ChannelMonitorUpdateStatus::InProgress
 				} else if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
-					ChannelMonitorUpdateStatus::PermanentFailure
+					ChannelMonitorUpdateStatus::InProgress
 				} else {
 					persist_res
 				}
@@ -831,12 +817,12 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L
 
 #[cfg(test)]
 mod tests {
-	use crate::{check_added_monitors, check_closed_broadcast, check_closed_event};
+	use crate::check_added_monitors;
 	use crate::{expect_payment_claimed, expect_payment_path_successful, get_event_msg};
 	use crate::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
 	use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Watch};
 	use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
-	use crate::events::{Event, ClosureReason, MessageSendEvent, MessageSendEventsProvider};
+	use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider};
 	use crate::ln::channelmanager::{PaymentSendFailure, PaymentId, RecipientOnionFields};
 	use crate::ln::functional_test_utils::*;
 	use crate::ln::msgs::ChannelMessageHandler;
@@ -988,12 +974,8 @@ mod tests {
 		chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
 		unwrap_send_err!(nodes[0].node.send_payment_with_route(&route, second_payment_hash,
 				RecipientOnionFields::secret_only(second_payment_secret), PaymentId(second_payment_hash.0)
-			), true, APIError::ChannelUnavailable { ref err },
-			assert!(err.contains("ChannelMonitor storage failure")));
-		check_added_monitors!(nodes[0], 2); // After the failure we generate a close-channel monitor update
-		check_closed_broadcast!(nodes[0], true);
-		check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
-			[nodes[1].node.get_our_node_id()], 100000);
+			), false, APIError::MonitorUpdateInProgress, {});
+		check_added_monitors!(nodes[0], 1);
 
 		// However, as the ChainMonitor is still waiting for the original persistence to complete,
 		// it won't yet release the MonitorEvents.
@@ -1020,28 +1002,4 @@ mod tests {
 		do_chainsync_pauses_events(false);
 		do_chainsync_pauses_events(true);
 	}
-
-	#[test]
-	fn update_during_chainsync_fails_channel() {
-		let chanmon_cfgs = create_chanmon_cfgs(2);
-		let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
-		let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
-		let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
-		create_announced_chan_between_nodes(&nodes, 0, 1);
-
-		chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
-		chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-
-		connect_blocks(&nodes[0], 1);
-		// Before processing events, the ChannelManager will still think the Channel is open and
-		// there won't be any ChannelMonitorUpdates
-		assert_eq!(nodes[0].node.list_channels().len(), 1);
-		check_added_monitors!(nodes[0], 0);
-		// ... however once we get events once, the channel will close, creating a channel-closed
-		// ChannelMonitorUpdate.
-		check_closed_broadcast!(nodes[0], true);
-		check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() },
-			[nodes[1].node.get_our_node_id()], 100000);
-		check_added_monitors!(nodes[0], 1);
-	}
 }
diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs
@@ -151,10 +151,7 @@ pub enum MonitorEvent {
 		monitor_update_id: u64,
 	},
 
-	/// Indicates a [`ChannelMonitor`] update has failed. See
-	/// [`ChannelMonitorUpdateStatus::PermanentFailure`] for more information on how this is used.
-	///
-	/// [`ChannelMonitorUpdateStatus::PermanentFailure`]: super::ChannelMonitorUpdateStatus::PermanentFailure
+	/// Indicates a [`ChannelMonitor`] update has failed.
 	UpdateFailed(OutPoint),
 }
 impl_writeable_tlv_based_enum_upgradable!(MonitorEvent,
@@ -1488,21 +1485,20 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitor<Signer> {
 		self.inner.lock().unwrap().counterparty_node_id
 	}
 
-	/// Used by ChannelManager deserialization to broadcast the latest holder state if its copy of
-	/// the Channel was out-of-date.
+	/// Used by [`ChannelManager`] deserialization to broadcast the latest holder state if its copy
+	/// of the channel state was out-of-date.
 	///
 	/// You may also use this to broadcast the latest local commitment transaction, either because
-	/// a monitor update failed with [`ChannelMonitorUpdateStatus::PermanentFailure`] or because we've
-	/// fallen behind (i.e. we've received proof that our counterparty side knows a revocation
-	/// secret we gave them that they shouldn't know).
+	/// a monitor update failed or because we've fallen behind (i.e. we've received proof that our
+	/// counterparty side knows a revocation secret we gave them that they shouldn't know).
 	///
 	/// Broadcasting these transactions in the second case is UNSAFE, as they allow counterparty
 	/// side to punish you. Nevertheless you may want to broadcast them if counterparty doesn't
 	/// close channel with their commitment transaction after a substantial amount of time. Best
 	/// may be to contact the other node operator out-of-band to coordinate other options available
-	/// to you. In any-case, the choice is up to you.
+	/// to you.
 	///
-	/// [`ChannelMonitorUpdateStatus::PermanentFailure`]: super::ChannelMonitorUpdateStatus::PermanentFailure
+	/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
 	pub fn get_latest_holder_commitment_txn<L: Deref>(&self, logger: &L) -> Vec<Transaction>
 	where L::Target: Logger {
 		self.inner.lock().unwrap().get_latest_holder_commitment_txn(logger)
@@ -2599,6 +2595,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
 				ChannelMonitorUpdateStep::CommitmentSecret { idx, secret } => {
 					log_trace!(logger, "Updating ChannelMonitor with commitment secret");
 					if let Err(e) = self.provide_secret(*idx, *secret) {
+						debug_assert!(false, "Latest counterparty commitment secret was invalid");
 						log_error!(logger, "Providing latest counterparty commitment secret failed/was refused:");
 						log_error!(logger, "    {}", e);
 						ret = Err(());
@@ -4413,13 +4410,12 @@ mod tests {
 	use crate::chain::chaininterface::LowerBoundedFeeEstimator;
 
 	use super::ChannelMonitorUpdateStep;
-	use crate::{check_added_monitors, check_closed_broadcast, check_closed_event, check_spends, get_local_commitment_txn, get_monitor, get_route_and_payment_hash, unwrap_send_err};
+	use crate::{check_added_monitors, check_spends, get_local_commitment_txn, get_monitor, get_route_and_payment_hash, unwrap_send_err};
 	use crate::chain::{BestBlock, Confirm};
 	use crate::chain::channelmonitor::ChannelMonitor;
 	use crate::chain::package::{weight_offered_htlc, weight_received_htlc, weight_revoked_offered_htlc, weight_revoked_received_htlc, WEIGHT_REVOKED_OUTPUT};
 	use crate::chain::transaction::OutPoint;
 	use crate::sign::InMemorySigner;
-	use crate::events::ClosureReason;
 	use crate::ln::{PaymentPreimage, PaymentHash};
 	use crate::ln::chan_utils;
 	use crate::ln::chan_utils::{HTLCOutputInCommitment, ChannelPublicKeys, ChannelTransactionParameters, HolderCommitmentTransaction, CounterpartyChannelTransactionParameters};
@@ -4485,18 +4481,14 @@ mod tests {
 		let (route, payment_hash, _, payment_secret) = get_route_and_payment_hash!(nodes[1], nodes[0], 100_000);
 		unwrap_send_err!(nodes[1].node.send_payment_with_route(&route, payment_hash,
 				RecipientOnionFields::secret_only(payment_secret), PaymentId(payment_hash.0)
-			), true, APIError::ChannelUnavailable { ref err },
-			assert!(err.contains("ChannelMonitor storage failure")));
-		check_added_monitors!(nodes[1], 2); // After the failure we generate a close-channel monitor update
-		check_closed_broadcast!(nodes[1], true);
-		check_closed_event!(nodes[1], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() }, 
-			[nodes[0].node.get_our_node_id()], 100000);
+			), false, APIError::MonitorUpdateInProgress, {});
+		check_added_monitors!(nodes[1], 1);
 
 		// Build a new ChannelMonitorUpdate which contains both the failing commitment tx update
 		// and provides the claim preimages for the two pending HTLCs. The first update generates
 		// an error, but the point of this test is to ensure the later updates are still applied.
 		let monitor_updates = nodes[1].chain_monitor.monitor_updates.lock().unwrap();
-		let mut replay_update = monitor_updates.get(&channel.2).unwrap().iter().rev().skip(1).next().unwrap().clone();
+		let mut replay_update = monitor_updates.get(&channel.2).unwrap().iter().rev().next().unwrap().clone();
 		assert_eq!(replay_update.updates.len(), 1);
 		if let ChannelMonitorUpdateStep::LatestCounterpartyCommitmentTXInfo { .. } = replay_update.updates[0] {
 		} else { panic!(); }