Skip to content

Commit ffd944e

Browse files
authored
Merge pull request #10012 from ziggie1984/fix-goroutine-leak
multi: prevent goroutine leak in brontide
2 parents 8a03414 + e6aff21 commit ffd944e

File tree

3 files changed

+25
-25
lines changed

3 files changed

+25
-25
lines changed

discovery/gossiper.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,10 @@ type processedNetworkMsg struct {
411411

412412
// cachedNetworkMsg is a wrapper around a network message that can be used with
413413
// *lru.Cache.
414+
//
415+
// NOTE: This struct is not thread safe which means you need to assure no
416+
// concurrent read write access to it and all its contents which are pointers
417+
// as well.
414418
type cachedNetworkMsg struct {
415419
msgs []*processedNetworkMsg
416420
}
@@ -3131,7 +3135,9 @@ func (d *AuthenticatedGossiper) handleChanUpdate(ctx context.Context,
31313135

31323136
// NOTE: We don't return anything on the error channel for this
31333137
// message, as we expect that will be done when this
3134-
// ChannelUpdate is later reprocessed.
3138+
// ChannelUpdate is later reprocessed. This might never happen
3139+
// if the corresponding ChannelAnnouncement is never received
3140+
// or the LRU cache is filled up and the entry is evicted.
31353141
return nil, false
31363142

31373143
default:

docs/release-notes/release-notes-0.19.2.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
- [Fixed](https://github.com/lightningnetwork/lnd/pull/9978) a deadlock which
3333
can happen when the peer start-up has not yet completed but a another p2p
3434
connection attempt tries to disconnect the peer.
35+
36+
- [Fixed](https://github.com/lightningnetwork/lnd/pull/10012) a case which
37+
could lead to a memory issues due to a goroutine leak in the peer/gossiper
38+
code.
3539

3640
# New Features
3741

peer/brontide.go

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,32 +1990,22 @@ func newDiscMsgStream(p *Brontide) *msgStream {
19901990
// so that a parent context can be passed in here.
19911991
ctx := context.TODO()
19921992

1993+
// Processing here means we send it to the gossiper which then
1994+
// decides whether this message is processed immediately or
1995+
// waits for dependent messages to be processed. It can also
1996+
// happen that the message is not processed at all if it is
1997+
// premature and the LRU cache fills up and the message is
1998+
// deleted.
19931999
p.log.Debugf("Processing remote msg %T", msg)
19942000

1995-
errChan := p.cfg.AuthGossiper.ProcessRemoteAnnouncement(
1996-
ctx, msg, p,
1997-
)
1998-
1999-
// Start a goroutine to process the error channel for logging
2000-
// purposes.
2001-
//
2002-
// TODO(ziggie): Maybe use the error to potentially punish the
2003-
// peer depending on the error ?
2004-
go func() {
2005-
select {
2006-
case <-p.cg.Done():
2007-
return
2008-
2009-
case err := <-errChan:
2010-
if err != nil {
2011-
p.log.Warnf("Error processing remote "+
2012-
"msg %T: %v", msg,
2013-
err)
2014-
}
2015-
}
2016-
2017-
p.log.Debugf("Processed remote msg %T", msg)
2018-
}()
2001+
// TODO(ziggie): ProcessRemoteAnnouncement returns an error
2002+
// channel, but we cannot rely on it being written to.
2003+
// Because some messages might never be processed (e.g.
2004+
// premature channel updates). We should change the design here
2005+
// and use the actor model pattern as soon as it is available.
2006+
// So for now we should NOT use the error channel.
2007+
// See https://github.com/lightningnetwork/lnd/pull/9820.
2008+
p.cfg.AuthGossiper.ProcessRemoteAnnouncement(ctx, msg, p)
20192009
}
20202010

20212011
return newMsgStream(

0 commit comments

Comments
 (0)