Skip to content

Drop the ChannelMonitorUpdateStatus::PermanentFailure variant #2562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fuzz/src/chanmon_consistency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ impl TestChainMonitor {
}
}
impl chain::Watch<TestChannelSigner> for TestChainMonitor {
fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> chain::ChannelMonitorUpdateStatus {
fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> Result<chain::ChannelMonitorUpdateStatus, ()> {
let mut ser = VecWriter(Vec::new());
monitor.write(&mut ser).unwrap();
if let Some(_) = self.latest_monitors.lock().unwrap().insert(funding_txo, (monitor.get_latest_update_id(), ser.0)) {
Expand Down Expand Up @@ -500,7 +500,7 @@ pub fn do_test<Out: Output>(data: &[u8], underlying_out: Out) {
let res = (<(BlockHash, ChanMan)>::read(&mut Cursor::new(&$ser.0), read_args).expect("Failed to read manager").1, chain_monitor.clone());
for (funding_txo, mon) in monitors.drain() {
assert_eq!(chain_monitor.chain_monitor.watch_channel(funding_txo, mon),
ChannelMonitorUpdateStatus::Completed);
Ok(ChannelMonitorUpdateStatus::Completed));
}
res
} }
Expand Down
6 changes: 3 additions & 3 deletions lightning-persister/src/fs_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ mod tests {
}

// Test that if the store's path to channel data is read-only, writing a
// monitor to it results in the store returning a PermanentFailure.
// monitor to it results in the store returning an InProgress.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be UnrecoverableError now

// Windows ignores the read-only flag for folders, so this test is Unix-only.
#[cfg(not(target_os = "windows"))]
#[test]
Expand Down Expand Up @@ -470,7 +470,7 @@ mod tests {
index: 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

10 lines up is a reference to permanent failure, though I suppose it's not really incorrect per se. Also in the test name.

};
match store.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
ChannelMonitorUpdateStatus::PermanentFailure => {},
ChannelMonitorUpdateStatus::InProgress => {},
_ => panic!("unexpected result from persisting new channel")
}

Expand Down Expand Up @@ -507,7 +507,7 @@ mod tests {
index: 0
};
match store.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
ChannelMonitorUpdateStatus::PermanentFailure => {},
ChannelMonitorUpdateStatus::InProgress => {},
_ => panic!("unexpected result from persisting new channel")
}

Expand Down
74 changes: 16 additions & 58 deletions lightning/src/chain/chainmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ impl MonitorUpdateId {
/// `Persist` defines behavior for persisting channel monitors: this could mean
/// writing once to disk, and/or uploading to one or more backup services.
///
/// Each method can return three possible values:
/// Each method can return two possible values:
/// * If persistence (including any relevant `fsync()` calls) happens immediately, the
/// implementation should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal
/// channel operation should continue.
Expand All @@ -91,10 +91,9 @@ impl MonitorUpdateId {
/// Note that unlike the direct [`chain::Watch`] interface,
/// [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
///
/// * If persistence fails for some reason, implementations should return
/// [`ChannelMonitorUpdateStatus::PermanentFailure`], in which case the channel will likely be
/// closed without broadcasting the latest state. See
/// [`ChannelMonitorUpdateStatus::PermanentFailure`] for more details.
/// If persistence fails for some reason, implementations should still return
/// [`ChannelMonitorUpdateStatus::InProgress`] and attempt to shut down or otherwise resolve the
/// situation ASAP.
///
/// Third-party watchtowers may be built as a part of an implementation of this trait, with the
/// advantage that you can control whether to resume channel operation depending on if an update
Expand Down Expand Up @@ -335,11 +334,6 @@ where C::Target: chain::Filter,
match self.persister.update_persisted_channel(*funding_outpoint, None, monitor, update_id) {
ChannelMonitorUpdateStatus::Completed =>
log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
ChannelMonitorUpdateStatus::PermanentFailure => {
monitor_state.channel_perm_failed.store(true, Ordering::Release);
self.pending_monitor_events.lock().unwrap().push((*funding_outpoint, vec![MonitorEvent::UpdateFailed(*funding_outpoint)], monitor.get_counterparty_node_id()));
self.event_notifier.notify();
}
ChannelMonitorUpdateStatus::InProgress => {
log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
pending_monitor_updates.push(update_id);
Expand Down Expand Up @@ -673,12 +667,12 @@ where C::Target: chain::Filter,
///
/// Note that we persist the given `ChannelMonitor` while holding the `ChainMonitor`
/// monitors lock.
fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> ChannelMonitorUpdateStatus {
fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> Result<ChannelMonitorUpdateStatus, ()> {
let mut monitors = self.monitors.write().unwrap();
let entry = match monitors.entry(funding_outpoint) {
hash_map::Entry::Occupied(_) => {
log_error!(self.logger, "Failed to add new channel data: channel monitor for given outpoint is already present");
return ChannelMonitorUpdateStatus::PermanentFailure
return Err(());
},
hash_map::Entry::Vacant(e) => e,
};
Expand All @@ -691,10 +685,6 @@ where C::Target: chain::Filter,
log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} in progress", log_funding_info!(monitor));
pending_monitor_updates.push(update_id);
},
ChannelMonitorUpdateStatus::PermanentFailure => {
log_error!(self.logger, "Persistence of new ChannelMonitor for channel {} failed", log_funding_info!(monitor));
return persist_res;
},
ChannelMonitorUpdateStatus::Completed => {
log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} completed", log_funding_info!(monitor));
}
Expand All @@ -708,7 +698,7 @@ where C::Target: chain::Filter,
channel_perm_failed: AtomicBool::new(false),
last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)),
});
persist_res
Ok(persist_res)
}

/// Note that we persist the given `ChannelMonitor` update while holding the
Expand All @@ -723,10 +713,10 @@ where C::Target: chain::Filter,
// We should never ever trigger this from within ChannelManager. Technically a
// user could use this object with some proxying in between which makes this
// possible, but in tests and fuzzing, this should be a panic.
#[cfg(any(test, fuzzing))]
#[cfg(debug_assertions)]
panic!("ChannelManager generated a channel update for a channel that was not yet registered!");
#[cfg(not(any(test, fuzzing)))]
ChannelMonitorUpdateStatus::PermanentFailure
#[cfg(not(debug_assertions))]
ChannelMonitorUpdateStatus::InProgress
},
Some(monitor_state) => {
let monitor = &monitor_state.monitor;
Expand All @@ -745,18 +735,14 @@ where C::Target: chain::Filter,
pending_monitor_updates.push(update_id);
log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} in progress", log_funding_info!(monitor));
},
ChannelMonitorUpdateStatus::PermanentFailure => {
monitor_state.channel_perm_failed.store(true, Ordering::Release);
log_error!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} failed", log_funding_info!(monitor));
},
ChannelMonitorUpdateStatus::Completed => {
log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} completed", log_funding_info!(monitor));
},
}
if update_res.is_err() {
ChannelMonitorUpdateStatus::PermanentFailure
ChannelMonitorUpdateStatus::InProgress
} else if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
ChannelMonitorUpdateStatus::PermanentFailure
ChannelMonitorUpdateStatus::InProgress
} else {
persist_res
}
Expand Down Expand Up @@ -831,12 +817,12 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L

#[cfg(test)]
mod tests {
use crate::{check_added_monitors, check_closed_broadcast, check_closed_event};
use crate::check_added_monitors;
use crate::{expect_payment_claimed, expect_payment_path_successful, get_event_msg};
use crate::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Watch};
use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
use crate::events::{Event, ClosureReason, MessageSendEvent, MessageSendEventsProvider};
use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider};
use crate::ln::channelmanager::{PaymentSendFailure, PaymentId, RecipientOnionFields};
use crate::ln::functional_test_utils::*;
use crate::ln::msgs::ChannelMessageHandler;
Expand Down Expand Up @@ -988,12 +974,8 @@ mod tests {
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
unwrap_send_err!(nodes[0].node.send_payment_with_route(&route, second_payment_hash,
RecipientOnionFields::secret_only(second_payment_secret), PaymentId(second_payment_hash.0)
), true, APIError::ChannelUnavailable { ref err },
assert!(err.contains("ChannelMonitor storage failure")));
check_added_monitors!(nodes[0], 2); // After the failure we generate a close-channel monitor update
check_closed_broadcast!(nodes[0], true);
check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
[nodes[1].node.get_our_node_id()], 100000);
), false, APIError::MonitorUpdateInProgress, {});
check_added_monitors!(nodes[0], 1);

// However, as the ChainMonitor is still waiting for the original persistence to complete,
// it won't yet release the MonitorEvents.
Expand All @@ -1020,28 +1002,4 @@ mod tests {
do_chainsync_pauses_events(false);
do_chainsync_pauses_events(true);
}

#[test]
fn update_during_chainsync_fails_channel() {
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
create_announced_chan_between_nodes(&nodes, 0, 1);

chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);

connect_blocks(&nodes[0], 1);
// Before processing events, the ChannelManager will still think the Channel is open and
// there won't be any ChannelMonitorUpdates
assert_eq!(nodes[0].node.list_channels().len(), 1);
check_added_monitors!(nodes[0], 0);
// ... however once we get events once, the channel will close, creating a channel-closed
// ChannelMonitorUpdate.
check_closed_broadcast!(nodes[0], true);
check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() },
[nodes[1].node.get_our_node_id()], 100000);
check_added_monitors!(nodes[0], 1);
}
}
32 changes: 12 additions & 20 deletions lightning/src/chain/channelmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,7 @@ pub enum MonitorEvent {
monitor_update_id: u64,
},

/// Indicates a [`ChannelMonitor`] update has failed. See
/// [`ChannelMonitorUpdateStatus::PermanentFailure`] for more information on how this is used.
///
/// [`ChannelMonitorUpdateStatus::PermanentFailure`]: super::ChannelMonitorUpdateStatus::PermanentFailure
/// Indicates a [`ChannelMonitor`] update has failed.
UpdateFailed(OutPoint),
}
impl_writeable_tlv_based_enum_upgradable!(MonitorEvent,
Expand Down Expand Up @@ -1488,21 +1485,20 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitor<Signer> {
self.inner.lock().unwrap().counterparty_node_id
}

/// Used by ChannelManager deserialization to broadcast the latest holder state if its copy of
/// the Channel was out-of-date.
/// Used by [`ChannelManager`] deserialization to broadcast the latest holder state if its copy
/// of the channel state was out-of-date.
///
/// You may also use this to broadcast the latest local commitment transaction, either because
/// a monitor update failed with [`ChannelMonitorUpdateStatus::PermanentFailure`] or because we've
/// fallen behind (i.e. we've received proof that our counterparty side knows a revocation
/// secret we gave them that they shouldn't know).
/// a monitor update failed or because we've fallen behind (i.e. we've received proof that our
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mh, remind me how exactly we know would know that a monitor update failed for good at this point?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We know when we've fallen behind thanks to the your_last_per_commitment_secret field in ChannelReestablish.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mh, but assuming we now might get InProgess it still is kinda unclear for a user how/when to act exactly on what. The updated version reads a bit like "you may use this if X and Y happens, but we don't really tell you if they happened. Ah, btw., this might be unsafe. Good luck.". Do we at least need to mention UnrecoverableError here?

We know when we've fallen behind thanks to the your_last_per_commitment_secret field in ChannelReestablish.

Maybe I'm overlooking something, but access to that isn't exposed somewhere in the API really?

/// counterparty side knows a revocation secret we gave them that they shouldn't know).
///
/// Broadcasting these transactions in the second case is UNSAFE, as they allow counterparty
/// side to punish you. Nevertheless you may want to broadcast them if counterparty doesn't
/// close channel with their commitment transaction after a substantial amount of time. Best
/// may be to contact the other node operator out-of-band to coordinate other options available
/// to you. In any-case, the choice is up to you.
/// to you.
///
/// [`ChannelMonitorUpdateStatus::PermanentFailure`]: super::ChannelMonitorUpdateStatus::PermanentFailure
/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
pub fn get_latest_holder_commitment_txn<L: Deref>(&self, logger: &L) -> Vec<Transaction>
where L::Target: Logger {
self.inner.lock().unwrap().get_latest_holder_commitment_txn(logger)
Expand Down Expand Up @@ -2599,6 +2595,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
ChannelMonitorUpdateStep::CommitmentSecret { idx, secret } => {
log_trace!(logger, "Updating ChannelMonitor with commitment secret");
if let Err(e) = self.provide_secret(*idx, *secret) {
debug_assert!(false, "Latest counterparty commitment secret was invalid");
log_error!(logger, "Providing latest counterparty commitment secret failed/was refused:");
log_error!(logger, " {}", e);
ret = Err(());
Expand Down Expand Up @@ -4413,13 +4410,12 @@ mod tests {
use crate::chain::chaininterface::LowerBoundedFeeEstimator;

use super::ChannelMonitorUpdateStep;
use crate::{check_added_monitors, check_closed_broadcast, check_closed_event, check_spends, get_local_commitment_txn, get_monitor, get_route_and_payment_hash, unwrap_send_err};
use crate::{check_added_monitors, check_spends, get_local_commitment_txn, get_monitor, get_route_and_payment_hash, unwrap_send_err};
use crate::chain::{BestBlock, Confirm};
use crate::chain::channelmonitor::ChannelMonitor;
use crate::chain::package::{weight_offered_htlc, weight_received_htlc, weight_revoked_offered_htlc, weight_revoked_received_htlc, WEIGHT_REVOKED_OUTPUT};
use crate::chain::transaction::OutPoint;
use crate::sign::InMemorySigner;
use crate::events::ClosureReason;
use crate::ln::{PaymentPreimage, PaymentHash};
use crate::ln::chan_utils;
use crate::ln::chan_utils::{HTLCOutputInCommitment, ChannelPublicKeys, ChannelTransactionParameters, HolderCommitmentTransaction, CounterpartyChannelTransactionParameters};
Expand Down Expand Up @@ -4485,18 +4481,14 @@ mod tests {
let (route, payment_hash, _, payment_secret) = get_route_and_payment_hash!(nodes[1], nodes[0], 100_000);
unwrap_send_err!(nodes[1].node.send_payment_with_route(&route, payment_hash,
RecipientOnionFields::secret_only(payment_secret), PaymentId(payment_hash.0)
), true, APIError::ChannelUnavailable { ref err },
assert!(err.contains("ChannelMonitor storage failure")));
check_added_monitors!(nodes[1], 2); // After the failure we generate a close-channel monitor update
check_closed_broadcast!(nodes[1], true);
check_closed_event!(nodes[1], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
[nodes[0].node.get_our_node_id()], 100000);
), false, APIError::MonitorUpdateInProgress, {});
check_added_monitors!(nodes[1], 1);

// Build a new ChannelMonitorUpdate which contains both the failing commitment tx update
// and provides the claim preimages for the two pending HTLCs. The first update generates
// an error, but the point of this test is to ensure the later updates are still applied.
let monitor_updates = nodes[1].chain_monitor.monitor_updates.lock().unwrap();
let mut replay_update = monitor_updates.get(&channel.2).unwrap().iter().rev().skip(1).next().unwrap().clone();
let mut replay_update = monitor_updates.get(&channel.2).unwrap().iter().rev().next().unwrap().clone();
assert_eq!(replay_update.updates.len(), 1);
if let ChannelMonitorUpdateStep::LatestCounterpartyCommitmentTXInfo { .. } = replay_update.updates[0] {
} else { panic!(); }
Expand Down
Loading