Skip to content

Commit 9fecfed

Browse files
committed
discovery: fix race access to syncer's state
This commit fixes the following race, 1. syncer(state=syncingChans) sends QueryChannelRange 2. remote peer replies ReplyChannelRange 3. ProcessQueryMsg fails to process the remote peer's msg as its state is neither waitingQueryChanReply nor waitingQueryRangeReply. 4. syncer marks its new state waitingQueryChanReply, but too late. The historical sync will now fail, and the syncer will be stuck at this state. What's worse is it cannot forward channel announcements to other connected peers now as it will skip the broadcasting during initial graph sync. This is now fixed to make sure the following two steps are atomic, 1. syncer(state=syncingChans) sends QueryChannelRange 2. syncer marks its new state waitingQueryChanReply.
1 parent 4b30b09 commit 9fecfed

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

discovery/gossiper.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -832,9 +832,13 @@ func (d *AuthenticatedGossiper) ProcessRemoteAnnouncement(msg lnwire.Message,
832832

833833
// If we've found the message target, then we'll dispatch the
834834
// message directly to it.
835-
syncer.ProcessQueryMsg(m, peer.QuitSignal())
835+
err := syncer.ProcessQueryMsg(m, peer.QuitSignal())
836+
if err != nil {
837+
log.Errorf("Process query msg from peer %x got %v",
838+
peer.PubKey(), err)
839+
}
836840

837-
errChan <- nil
841+
errChan <- err
838842
return errChan
839843

840844
// If a peer is updating its current update horizon, then we'll dispatch

discovery/syncer.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,15 @@ func (g *GossipSyncer) handleSyncingChans() {
486486
return
487487
}
488488

489+
// Acquire a lock so the following state transition is atomic.
490+
//
491+
// NOTE: We must lock the following steps as it's possible we get an
492+
// immediate response (ReplyChannelRange) after sending the query msg.
493+
// The response is handled in ProcessQueryMsg, which requires the
494+
// current state to be waitingQueryRangeReply.
495+
g.Lock()
496+
defer g.Unlock()
497+
489498
err = g.cfg.sendToPeer(queryRangeMsg)
490499
if err != nil {
491500
log.Errorf("Unable to send chan range query: %v", err)
@@ -1517,12 +1526,15 @@ func (g *GossipSyncer) ProcessQueryMsg(msg lnwire.Message, peerQuit <-chan struc
15171526
// Reply messages should only be expected in states where we're waiting
15181527
// for a reply.
15191528
case *lnwire.ReplyChannelRange, *lnwire.ReplyShortChanIDsEnd:
1529+
g.Lock()
15201530
syncState := g.syncState()
1531+
g.Unlock()
1532+
15211533
if syncState != waitingQueryRangeReply &&
15221534
syncState != waitingQueryChanReply {
15231535

1524-
return fmt.Errorf("received unexpected query reply "+
1525-
"message %T", msg)
1536+
return fmt.Errorf("unexpected msg %T received in "+
1537+
"state %v", msg, syncState)
15261538
}
15271539
msgChan = g.gossipMsgs
15281540

0 commit comments

Comments
 (0)