Skip to content

Commit 019d47c

Browse files
committed
Block the mon update removing a preimage until upstream mon writes
When we forward a payment and receive an `update_fulfill_htlc` message from the downstream channel, we immediately claim the HTLC on the upstream channel, before even doing a `commitment_signed` dance on the downstream channel. This implies that our `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money by writing the preimage down, then we write the update that resolves giving money on the downstream node. This is safe as long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of course looking forward we want to support asynchronous updates, which may complete in any order. Thus, here, we enforce the correct ordering by blocking the downstream `ChannelMonitorUpdate` until the upstream one completes. Like the `PaymentSent` event handling we do so only for the `revoke_and_ack` `ChannelMonitorUpdate`, ensuring the preimage-containing upstream update has a full RTT to complete before we actually manage to slow anything down.
1 parent ba1207a commit 019d47c

File tree

3 files changed

+216
-40
lines changed

3 files changed

+216
-40
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 133 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3054,18 +3054,27 @@ fn test_blocked_chan_preimage_release() {
30543054
check_added_monitors(&nodes[1], 1); // We generate only a preimage monitor update
30553055
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30563056

3057-
// Finish the CS dance between nodes[0] and nodes[1].
3058-
commitment_signed_dance!(nodes[1], nodes[0], as_htlc_fulfill_updates.commitment_signed, false);
3057+
// Finish the CS dance between nodes[0] and nodes[1]. Note that until the final RAA CS is held
3058+
// until the full set of `ChannelMonitorUpdate`s on the nodes[1] <-> nodes[2] channel are
3059+
// complete, while the preimage that we care about ensuring is on disk did make it there above,
3060+
// the holding logic doesn't care about the type of update, it just cares that there is one.
3061+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.commitment_signed);
3062+
check_added_monitors(&nodes[1], 1);
3063+
let (a, raa) = do_main_commitment_signed_dance(&nodes[1], &nodes[0], false);
3064+
assert!(a.is_none());
3065+
3066+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &raa);
30593067
check_added_monitors(&nodes[1], 0);
3068+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30603069

30613070
let events = nodes[1].node.get_and_clear_pending_events();
30623071
assert_eq!(events.len(), 3);
30633072
if let Event::PaymentSent { .. } = events[0] {} else { panic!(); }
30643073
if let Event::PaymentPathSuccessful { .. } = events[2] {} else { panic!(); }
30653074
if let Event::PaymentForwarded { .. } = events[1] {} else { panic!(); }
30663075

3067-
// The event processing should release the last RAA update.
3068-
check_added_monitors(&nodes[1], 1);
3076+
// The event processing should release the last RAA updates on both channels.
3077+
check_added_monitors(&nodes[1], 2);
30693078

30703079
// When we fetch the next update the message getter will generate the next update for nodes[2],
30713080
// generating a further monitor update.
@@ -3076,3 +3085,123 @@ fn test_blocked_chan_preimage_release() {
30763085
commitment_signed_dance!(nodes[2], nodes[1], bs_htlc_fulfill_updates.commitment_signed, false);
30773086
expect_payment_sent(&nodes[2], payment_preimage_2, None, true, true);
30783087
}
3088+
3089+
fn do_test_inverted_mon_completion_order(complete_bc_commitment_dance: bool) {
3090+
// When we forward a payment and receive an `update_fulfill_htlc` message from the downstream
3091+
// channel, we immediately claim the HTLC on the upstream channel, before even doing a
3092+
// `commitment_signed` dance on the downstream channel. This implies that our
3093+
// `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money,
3094+
// then we write the update that resolves giving money on the downstream node. This is safe as
3095+
// long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of
3096+
// course this may not be the case. For asynchronous update writes, we have to ensure monitor
3097+
// updates can block each other, preventing the inversion all together.
3098+
let chanmon_cfgs = create_chanmon_cfgs(3);
3099+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3100+
3101+
let persister;
3102+
let new_chain_monitor;
3103+
let nodes_1_deserialized;
3104+
3105+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
3106+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3107+
3108+
let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2;
3109+
let chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2;
3110+
3111+
// Route a payment from A, through B, to C, then claim it on C. Once we pass B the
3112+
// `update_fulfill_htlc` we have a monitor update for both of B's channels. We complete the one
3113+
// on the B<->C channel but leave the A<->B monitor update pending, then reload B.
3114+
let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000);
3115+
3116+
let mon_ab = get_monitor!(nodes[1], chan_id_ab).encode();
3117+
3118+
nodes[2].node.claim_funds(payment_preimage);
3119+
check_added_monitors(&nodes[2], 1);
3120+
expect_payment_claimed!(nodes[2], payment_hash, 100_000);
3121+
3122+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3123+
let cs_updates = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
3124+
nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]);
3125+
3126+
// B generates a new monitor update for the A <-> B channel, but doesn't send the new messages
3127+
// for it since the monitor update is marked in-progress.
3128+
check_added_monitors(&nodes[1], 1);
3129+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3130+
3131+
// Now step the Commitment Signed Dance between B and C forward a bit (or fully), ensuring we
3132+
// won't get the preimage when the nodes reconnect, at which point we have to ensure we get it
3133+
// from the ChannelMonitor.
3134+
nodes[1].node.handle_commitment_signed(&nodes[2].node.get_our_node_id(), &cs_updates.commitment_signed);
3135+
check_added_monitors(&nodes[1], 1);
3136+
if complete_bc_commitment_dance {
3137+
let (bs_revoke_and_ack, bs_commitment_signed) = get_revoke_commit_msgs!(nodes[1], nodes[2].node.get_our_node_id());
3138+
nodes[2].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
3139+
check_added_monitors(&nodes[2], 1);
3140+
nodes[2].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_commitment_signed);
3141+
check_added_monitors(&nodes[2], 1);
3142+
let cs_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
3143+
3144+
// At this point node B still hasn't persisted the `ChannelMonitorUpdate` with the
3145+
// preimage in the A <-> B channel, which will prevent it from persisting the
3146+
// `ChannelMonitorUpdate` here to avoid "losing" the preimage.
3147+
nodes[1].node.handle_revoke_and_ack(&nodes[2].node.get_our_node_id(), &cs_raa);
3148+
check_added_monitors(&nodes[1], 0);
3149+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3150+
}
3151+
3152+
// Now reload node B
3153+
let manager_b = nodes[1].node.encode();
3154+
3155+
let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode();
3156+
reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, new_chain_monitor, nodes_1_deserialized);
3157+
3158+
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3159+
nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3160+
3161+
// If we used the latest ChannelManager to reload from, we should have both channels still
3162+
// live. The B <-> C channel's final RAA ChannelMonitorUpdate must still be blocked as
3163+
// before - the ChannelMonitorUpdate for the A <-> B channel hasn't completed.
3164+
// When we call `timer_tick_occurred` we will get that monitor update back, which we'll
3165+
// complete after reconnecting to our peers.
3166+
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3167+
nodes[1].node.timer_tick_occurred();
3168+
check_added_monitors(&nodes[1], 1);
3169+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3170+
3171+
// Now reconnect B to both A and C. If the B <-> C commitment signed dance wasn't run to
3172+
// the end go ahead and do that, though the -2 in `reconnect_nodes` indicates that we
3173+
// expect to *not* receive the final RAA ChannelMonitorUpdate.
3174+
if complete_bc_commitment_dance {
3175+
reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
3176+
} else {
3177+
reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, -2), (0, 0), (0, 0), (0, 0), (0, 0), (false, true));
3178+
}
3179+
3180+
reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
3181+
3182+
// (Finally) complete the A <-> B ChannelMonitorUpdate, ensuring the preimage is durably on
3183+
// disk in the proper ChannelMonitor, unblocking the B <-> C ChannelMonitor updating
3184+
// process.
3185+
let (outpoint, _, ab_update_id) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone();
3186+
nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, ab_update_id).unwrap();
3187+
3188+
// When we fetch B's HTLC update messages here (now that the ChannelMonitorUpdate has
3189+
// completed), it will also release the final RAA ChannelMonitorUpdate on the B <-> C
3190+
// channel.
3191+
let bs_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
3192+
check_added_monitors(&nodes[1], 1);
3193+
3194+
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
3195+
do_commitment_signed_dance(&nodes[0], &nodes[1], &bs_updates.commitment_signed, false, false);
3196+
3197+
expect_payment_forwarded!(nodes[1], &nodes[0], &nodes[2], Some(1_000), false, false);
3198+
3199+
// Finally, check that the payment was, ultimately, seen as sent by node A.
3200+
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
3201+
}
3202+
3203+
#[test]
3204+
fn test_inverted_mon_completion_order() {
3205+
do_test_inverted_mon_completion_order(true);
3206+
do_test_inverted_mon_completion_order(false);
3207+
}

lightning/src/ln/channelmanager.rs

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,6 @@ pub(crate) enum RAAMonitorUpdateBlockingAction {
603603
}
604604

605605
impl RAAMonitorUpdateBlockingAction {
606-
#[allow(unused)]
607606
fn from_prev_hop_data(prev_hop: &HTLCPreviousHopData) -> Self {
608607
Self::ForwardedPaymentInboundClaim {
609608
channel_id: prev_hop.outpoint.to_channel_id(),
@@ -4940,11 +4939,14 @@ where
49404939
self.pending_outbound_payments.finalize_claims(sources, &self.pending_events);
49414940
}
49424941

4943-
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage, forwarded_htlc_value_msat: Option<u64>, from_onchain: bool, next_channel_outpoint: OutPoint) {
4942+
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage, forwarded_htlc_value_msat: Option<u64>, from_onchain: bool, next_channel_counterparty_node_id: Option<PublicKey>, next_channel_outpoint: OutPoint) {
49444943
match source {
49454944
HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } => {
49464945
debug_assert!(self.background_events_processed_since_startup.load(Ordering::Acquire),
49474946
"We don't support claim_htlc claims during startup - monitors may not be available yet");
4947+
if let Some(pubkey) = next_channel_counterparty_node_id {
4948+
debug_assert_eq!(pubkey, path.hops[0].pubkey);
4949+
}
49484950
let ev_completion_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate {
49494951
channel_funding_outpoint: next_channel_outpoint,
49504952
counterparty_node_id: path.hops[0].pubkey,
@@ -4955,6 +4957,7 @@ where
49554957
},
49564958
HTLCSource::PreviousHopData(hop_data) => {
49574959
let prev_outpoint = hop_data.outpoint;
4960+
let completed_blocker = RAAMonitorUpdateBlockingAction::from_prev_hop_data(&hop_data);
49584961
let res = self.claim_funds_from_hop(hop_data, payment_preimage,
49594962
|htlc_claim_value_msat| {
49604963
if let Some(forwarded_htlc_value) = forwarded_htlc_value_msat {
@@ -4970,7 +4973,17 @@ where
49704973
next_channel_id: Some(next_channel_outpoint.to_channel_id()),
49714974
outbound_amount_forwarded_msat: forwarded_htlc_value_msat,
49724975
},
4973-
downstream_counterparty_and_funding_outpoint: None,
4976+
downstream_counterparty_and_funding_outpoint:
4977+
if let Some(node_id) = next_channel_counterparty_node_id {
4978+
Some((node_id, next_channel_outpoint, completed_blocker))
4979+
} else {
4980+
// We can only get `None` here if we are processing a
4981+
// `ChannelMonitor`-originated event, in which case we
4982+
// don't care about ensuring we wake the downstream
4983+
// channel's monitor updating - the channel is already
4984+
// closed.
4985+
None
4986+
},
49744987
})
49754988
} else { None }
49764989
});
@@ -5735,13 +5748,27 @@ where
57355748
match peer_state.channel_by_id.entry(msg.channel_id) {
57365749
hash_map::Entry::Occupied(mut chan) => {
57375750
let res = try_chan_entry!(self, chan.get_mut().update_fulfill_htlc(&msg), chan);
5751+
if let HTLCSource::PreviousHopData(prev_hop) = &res.0 {
5752+
peer_state.actions_blocking_raa_monitor_updates.entry(msg.channel_id)
5753+
.or_insert_with(Vec::new)
5754+
.push(RAAMonitorUpdateBlockingAction::from_prev_hop_data(&prev_hop));
5755+
}
5756+
// Note that we do not need to push an `actions_blocking_raa_monitor_updates`
5757+
// entry here, even though we *do* need to block the next RAA coming in from
5758+
// generating a monitor update which we let fly. We do this instead in the
5759+
// `claim_funds_internal` by attaching a `ReleaseRAAChannelMonitorUpdate`
5760+
// action to the event generated when we "claim" the sent payment. This is
5761+
// guaranteed to all complete before we process the RAA even though there is no
5762+
// lock held through that point as we aren't allowed to see another P2P message
5763+
// from the counterparty until we return, but `claim_funds_internal` runs
5764+
// first.
57385765
funding_txo = chan.get().context.get_funding_txo().expect("We won't accept a fulfill until funded");
57395766
res
57405767
},
57415768
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
57425769
}
57435770
};
5744-
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, funding_txo);
5771+
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, Some(*counterparty_node_id), funding_txo);
57455772
Ok(())
57465773
}
57475774

@@ -5927,6 +5954,23 @@ where
59275954
})
59285955
}
59295956

5957+
#[cfg(any(test, feature = "_test_utils"))]
5958+
pub(crate) fn test_raa_monitor_updates_held(&self, counterparty_node_id: PublicKey,
5959+
channel_id: [u8; 32])
5960+
-> bool {
5961+
let per_peer_state = self.per_peer_state.read().unwrap();
5962+
if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
5963+
let mut peer_state_lck = peer_state_mtx.lock().unwrap();
5964+
let peer_state = &mut *peer_state_lck;
5965+
5966+
if let Some(chan) = peer_state.channel_by_id.get(&channel_id) {
5967+
return self.raa_monitor_updates_held(&peer_state.actions_blocking_raa_monitor_updates,
5968+
chan.context.get_funding_txo().unwrap(), counterparty_node_id);
5969+
}
5970+
}
5971+
false
5972+
}
5973+
59305974
fn internal_revoke_and_ack(&self, counterparty_node_id: &PublicKey, msg: &msgs::RevokeAndACK) -> Result<(), MsgHandleErrInternal> {
59315975
let (htlcs_to_fail, res) = {
59325976
let per_peer_state = self.per_peer_state.read().unwrap();
@@ -5940,12 +5984,9 @@ where
59405984
hash_map::Entry::Occupied(mut chan) => {
59415985
let funding_txo_opt = chan.get().context.get_funding_txo();
59425986
let mon_update_blocked = if let Some(funding_txo) = funding_txo_opt {
5943-
self.pending_events.lock().unwrap().iter().any(|(_, action)| {
5944-
action == &Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate {
5945-
channel_funding_outpoint: funding_txo,
5946-
counterparty_node_id: *counterparty_node_id,
5947-
})
5948-
})
5987+
self.raa_monitor_updates_held(
5988+
&peer_state.actions_blocking_raa_monitor_updates, funding_txo,
5989+
*counterparty_node_id)
59495990
} else { false };
59505991
let (htlcs_to_fail, monitor_update_opt) = try_chan_entry!(self,
59515992
chan.get_mut().revoke_and_ack(&msg, &self.fee_estimator, &self.logger, mon_update_blocked), chan);
@@ -6125,7 +6166,7 @@ where
61256166
MonitorEvent::HTLCEvent(htlc_update) => {
61266167
if let Some(preimage) = htlc_update.payment_preimage {
61276168
log_trace!(self.logger, "Claiming HTLC with preimage {} from our monitor", log_bytes!(preimage.0));
6128-
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, funding_outpoint);
6169+
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, counterparty_node_id, funding_outpoint);
61296170
} else {
61306171
log_trace!(self.logger, "Failing HTLC with hash {} from our monitor", log_bytes!(htlc_update.payment_hash.0));
61316172
let receiver = HTLCDestination::NextHopChannel { node_id: counterparty_node_id, channel_id: funding_outpoint.to_channel_id() };
@@ -8843,6 +8884,7 @@ where
88438884
// downstream chan is closed (because we don't have a
88448885
// channel_id -> peer map entry).
88458886
counterparty_opt.is_none(),
8887+
counterparty_opt.cloned().or(monitor.get_counterparty_node_id()),
88468888
monitor.get_funding_txo().0))
88478889
} else { None }
88488890
} else {
@@ -9112,12 +9154,12 @@ where
91129154
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
91139155
}
91149156

9115-
for (source, preimage, downstream_value, downstream_closed, downstream_funding) in pending_claims_to_replay {
9157+
for (source, preimage, downstream_value, downstream_closed, downstream_node_id, downstream_funding) in pending_claims_to_replay {
91169158
// We use `downstream_closed` in place of `from_onchain` here just as a guess - we
91179159
// don't remember in the `ChannelMonitor` where we got a preimage from, but if the
91189160
// channel is closed we just assume that it probably came from an on-chain claim.
91199161
channel_manager.claim_funds_internal(source, preimage, Some(downstream_value),
9120-
downstream_closed, downstream_funding);
9162+
downstream_closed, downstream_node_id, downstream_funding);
91219163
}
91229164

91239165
//TODO: Broadcast channel update for closed channels, but only after we've made a

0 commit comments

Comments
 (0)