Skip to content

Commit 09b6745

Browse files
authored
Merge pull request #9600 from lightningnetwork/yy-more-flakes
lntest+itest: document and fix more flakes
2 parents b031002 + 49382a8 commit 09b6745

10 files changed

+105
-32
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ require (
4343
github.com/lightningnetwork/lnd/sqldb v1.0.7
4444
github.com/lightningnetwork/lnd/ticker v1.1.1
4545
github.com/lightningnetwork/lnd/tlv v1.3.0
46-
github.com/lightningnetwork/lnd/tor v1.1.5
46+
github.com/lightningnetwork/lnd/tor v1.1.6
4747
github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796
4848
github.com/miekg/dns v1.1.43
4949
github.com/pkg/errors v0.9.1

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,8 @@ github.com/lightningnetwork/lnd/ticker v1.1.1 h1:J/b6N2hibFtC7JLV77ULQp++QLtCwT6
470470
github.com/lightningnetwork/lnd/ticker v1.1.1/go.mod h1:waPTRAAcwtu7Ji3+3k+u/xH5GHovTsCoSVpho0KDvdA=
471471
github.com/lightningnetwork/lnd/tlv v1.3.0 h1:exS/KCPEgpOgviIttfiXAPaUqw2rHQrnUOpP7HPBPiY=
472472
github.com/lightningnetwork/lnd/tlv v1.3.0/go.mod h1:pJuiBj1ecr1WWLOtcZ+2+hu9Ey25aJWFIsjmAoPPnmc=
473-
github.com/lightningnetwork/lnd/tor v1.1.5 h1:kZ1Ab4EcPFCny3a179rZGKeDUlgYilEcfKx3tD5+L+k=
474-
github.com/lightningnetwork/lnd/tor v1.1.5/go.mod h1:qSRB8llhAK+a6kaTPWOLLXSZc6Hg8ZC0mq1sUQ/8JfI=
473+
github.com/lightningnetwork/lnd/tor v1.1.6 h1:WHUumk7WgU6BUFsqHuqszI9P6nfhMeIG+rjJBlVE6OE=
474+
github.com/lightningnetwork/lnd/tor v1.1.6/go.mod h1:qSRB8llhAK+a6kaTPWOLLXSZc6Hg8ZC0mq1sUQ/8JfI=
475475
github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796 h1:sjOGyegMIhvgfq5oaue6Td+hxZuf3tDC8lAPrFldqFw=
476476
github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796/go.mod h1:3p7ZTf9V1sNPI5H8P3NkTFF4LuwMdPl2DodF60qAKqY=
477477
github.com/ltcsuite/ltcutil v0.0.0-20181217130922-17f3b04680b6/go.mod h1:8Vg/LTOO0KYa/vlHWJ6XZAevPQThGH5sufO0Hrou/lA=

itest/flakes.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,60 @@ func flakeSkipPendingSweepsCheckDarwin(ht *lntest.HarnessTest,
104104
func isDarwin() bool {
105105
return runtime.GOOS == "darwin"
106106
}
107+
108+
// flakeInconsistentHTLCView documents a flake found that the `ListChannels` RPC
109+
// can give inaccurate HTLC states, which is found when we call
110+
// `AssertHTLCNotActive` after a commitment dance is finished. Suppose Carol is
111+
// settling an invoice with Bob, from Bob's PoV, a typical healthy settlement
112+
// flow goes like this:
113+
//
114+
// [DBG] PEER brontide.go:2412: Peer([Carol]): Received UpdateFulfillHTLC
115+
// [DBG] HSWC switch.go:1315: Closed completed SETTLE circuit for ...
116+
// [INF] HSWC switch.go:3044: Forwarded HTLC...
117+
// [DBG] PEER brontide.go:2412: Peer([Carol]): Received CommitSig
118+
// [DBG] PEER brontide.go:2412: Peer([Carol]): Sending RevokeAndAck
119+
// [DBG] PEER brontide.go:2412: Peer([Carol]): Sending CommitSig
120+
// [DBG] PEER brontide.go:2412: Peer([Carol]): Received RevokeAndAck
121+
// [DBG] HSWC link.go:3617: ChannelLink([ChanPoint: Bob=>Carol]): settle-fail-filter: count=1, filter=[0]
122+
// [DBG] HSWC switch.go:3001: Circuit is closing for packet...
123+
//
124+
// Bob receives the preimage, closes the circuit, and exchanges commit sig and
125+
// revoke msgs with Carol. Once Bob receives the `CommitSig` from Carol, the
126+
// HTLC should be removed from his `LocalCommitment` via
127+
// `RevokeCurrentCommitment`.
128+
//
129+
// However, in the test where `AssertHTLCNotActive` is called, although the
130+
// above process is finished, the `ListChannels“ still finds the HTLC. Also note
131+
// that the RPC makes direct call to the channeldb without any locks, which
132+
// should be fine as the struct `OpenChannel.LocalCommitment` is passed by
133+
// value, although we need to double check.
134+
//
135+
// TODO(yy): In order to fix it, we should make the RPC share the same view of
136+
// our channel state machine. Instead of making DB queries, it should instead
137+
// use `lnwallet.LightningChannel` instead to stay consistent.
138+
//
139+
//nolint:ll
140+
func flakeInconsistentHTLCView() {
141+
// Perform a sleep so the commiment dance can be finished before we call
142+
// the ListChannels.
143+
time.Sleep(2 * time.Second)
144+
}
145+
146+
// flakePaymentStreamReturnEarly documents a flake found in the test which
147+
// relies on a given payment to be settled before testing other state changes.
148+
// The issue comes from the payment stream created from the RPC `SendPaymentV2`
149+
// gives premature settled event for a given payment, which is found in,
150+
// - if we force close the channel immediately, we may get an error because
151+
// the commitment dance is not finished.
152+
// - if we subscribe HTLC events immediately, we may get extra events, which
153+
// is also related to the above unfinished commitment dance.
154+
//
155+
// TODO(yy): Make sure we only mark the payment being settled once the
156+
// commitment dance is finished. In addition, we should also fix the exit hop
157+
// logic in the invoice settlement flow to make sure the invoice is only marked
158+
// as settled after the commitment dance is finished.
159+
func flakePaymentStreamReturnEarly() {
160+
// Sleep 2 seconds so the pending HTLCs will be removed from the
161+
// commitment.
162+
time.Sleep(2 * time.Second)
163+
}

itest/lnd_channel_backup_test.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,13 +1224,7 @@ func testDataLossProtection(ht *lntest.HarnessTest) {
12241224
// payment hashes.
12251225
ht.CompletePaymentRequests(carol, payReqs[numInvoices/2:])
12261226

1227-
// TODO(yy): remove the sleep once the following bug is fixed.
1228-
//
1229-
// While the payment is reported as settled, the commitment
1230-
// dance may not be finished, which leaves several HTLCs in the
1231-
// commitment. Later on, when Carol force closes this channel,
1232-
// she would have HTLCs there and the test won't pass.
1233-
time.Sleep(2 * time.Second)
1227+
flakePaymentStreamReturnEarly()
12341228

12351229
// Now we shutdown Dave, copying over the its temporary
12361230
// database state which has the *prior* channel state over his

itest/lnd_coop_close_with_htlcs_test.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,14 @@ func coopCloseWithHTLCsWithRestart(ht *lntest.HarnessTest, alice,
222222
ht.AssertChannelInactive(bob, chanPoint)
223223
ht.AssertChannelInactive(alice, chanPoint)
224224

225-
// Now restart Alice and Bob.
226-
ht.RestartNode(alice)
227-
ht.RestartNode(bob)
228-
229-
ht.AssertConnected(alice, bob)
225+
// Shutdown both Alice and Bob.
226+
restartAlice := ht.SuspendNode(alice)
227+
restartBob := ht.SuspendNode(bob)
228+
229+
// Once shutdown, restart and connect them.
230+
require.NoError(ht, restartAlice())
231+
require.NoError(ht, restartBob())
232+
ht.EnsureConnected(alice, bob)
230233

231234
// Show that both nodes still see the channel as waiting for close after
232235
// the restart.

itest/lnd_open_channel_test.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,24 @@ func testFundingExpiryBlocksOnPending(ht *lntest.HarnessTest) {
837837
param := lntest.OpenChannelParams{Amt: 100000}
838838
ht.OpenChannelAssertPending(alice, bob, param)
839839

840+
// assertExpiry is a helper closure to assert the FundingExpiryBlocks
841+
// has been updated to the expected value.
842+
assertExpiry := func(hn *node.HarnessNode, expected int32) {
843+
err := wait.NoError(func() error {
844+
pending := ht.AssertNumPendingOpenChannels(hn, 1)
845+
expiry := pending[0].FundingExpiryBlocks
846+
847+
// Exit early if matched.
848+
if expected == expiry {
849+
return nil
850+
}
851+
852+
return fmt.Errorf("want %v, got %v", expected, expiry)
853+
}, wait.DefaultTimeout)
854+
require.NoErrorf(ht, err, "%s: assert FundingExpiryBlocks "+
855+
"timeout", hn.Name())
856+
}
857+
840858
// At this point, the channel's funding transaction will have been
841859
// broadcast, but not confirmed. Alice and Bob's nodes should reflect
842860
// this when queried via RPC. FundingExpiryBlock should decrease
@@ -846,15 +864,16 @@ func testFundingExpiryBlocksOnPending(ht *lntest.HarnessTest) {
846864
const numEmptyBlocks = 3
847865
for i := int32(0); i < numEmptyBlocks; i++ {
848866
expectedVal := lncfg.DefaultMaxWaitNumBlocksFundingConf - i
849-
pending := ht.AssertNumPendingOpenChannels(alice, 1)
850-
require.Equal(ht, expectedVal, pending[0].FundingExpiryBlocks)
851-
pending = ht.AssertNumPendingOpenChannels(bob, 1)
852-
require.Equal(ht, expectedVal, pending[0].FundingExpiryBlocks)
867+
868+
// Assert Alice and Bob have updated the FundingExpiryBlocks.
869+
assertExpiry(alice, expectedVal)
870+
assertExpiry(bob, expectedVal)
871+
853872
ht.MineEmptyBlocks(1)
854873
}
855874

856-
// Mine 1 block to confirm the funding transaction, and then close the
857-
// channel.
875+
// Mine 1 block to confirm the funding transaction so clean up the
876+
// mempool.
858877
ht.MineBlocksAndAssertNumTxes(1, 1)
859878
}
860879

itest/lnd_route_blinding_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,10 @@ func testRelayingBlindedError(ht *lntest.HarnessTest) {
667667
// so that she can't forward the payment to Dave.
668668
testCase.drainCarolLiquidity(false)
669669

670+
// NOTE: The above draining requires Carol to send a payment, which may
671+
// create extra events, causing the `AssertHtlcEvents` to fail below.
672+
flakePaymentStreamReturnEarly()
673+
670674
// Subscribe to Carol's HTLC events so that we can observe the payment
671675
// coming in.
672676
carolEvents := testCase.carol.RPC.SubscribeHtlcEvents()
@@ -815,6 +819,7 @@ func testErrorHandlingOnChainFailure(ht *lntest.HarnessTest) {
815819
ht.MineBlocksAndAssertNumTxes(1, 1)
816820

817821
// Assert that the HTLC has cleared.
822+
flakeInconsistentHTLCView()
818823
ht.AssertHTLCNotActive(bob, testCase.channels[0], hash[:])
819824
ht.AssertHTLCNotActive(alice, testCase.channels[0], hash[:])
820825

itest/lnd_sweep_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ func testSweepCPFPAnchorIncomingTimeout(ht *lntest.HarnessTest) {
500500
carol.RPC.SettleInvoice(preimage[:])
501501

502502
// Bob should have settled his outgoing HTLC with Carol.
503+
flakeInconsistentHTLCView()
503504
ht.AssertHTLCNotActive(bob, bcChanPoint, payHash[:])
504505

505506
// We'll now mine enough blocks to trigger Bob to force close channel
@@ -851,6 +852,7 @@ func testSweepHTLCs(ht *lntest.HarnessTest) {
851852
carol.RPC.SettleInvoice(preimageSettled[:])
852853

853854
// Bob should have settled his outgoing HTLC with Carol.
855+
flakeInconsistentHTLCView()
854856
ht.AssertHTLCNotActive(bob, bcChanPoint, payHashSettled[:])
855857

856858
// Let Carol go offline so we can focus on testing Bob's sweeping

itest/lnd_wipe_fwdpkgs_test.go

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package itest
22

33
import (
4-
"time"
5-
64
"github.com/lightningnetwork/lnd/chainreg"
75
"github.com/lightningnetwork/lnd/lnrpc"
86
"github.com/lightningnetwork/lnd/lntest"
@@ -49,13 +47,7 @@ func testWipeForwardingPackages(ht *lntest.HarnessTest) {
4947
ht.CompletePaymentRequests(alice, []string{resp.PaymentRequest})
5048
}
5149

52-
// TODO(yy): remove the sleep once the following bug is fixed.
53-
// When the invoice is reported settled, the commitment dance is not
54-
// yet finished, which can cause an error when closing the channel,
55-
// saying there's active HTLCs. We need to investigate this issue and
56-
// reverse the order to, first finish the commitment dance, then report
57-
// the invoice as settled.
58-
time.Sleep(2 * time.Second)
50+
flakePaymentStreamReturnEarly()
5951

6052
// Firstly, Bob force closes the channel.
6153
ht.CloseChannelAssertPending(bob, chanPointAB, true)

lntest/harness_assertion.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ func (h *HarnessTest) assertChannelStatus(hn *node.HarnessNode,
401401
}, DefaultTimeout)
402402

403403
require.NoErrorf(h, err, "%s: timeout checking for channel point: %v",
404-
hn.Name(), cp)
404+
hn.Name(), h.OutPointFromChannelPoint(cp))
405405

406406
return channel
407407
}
@@ -2329,7 +2329,8 @@ func (h *HarnessTest) AssertHtlcEvents(client rpc.HtlcEventsClient,
23292329
event := h.ReceiveHtlcEvent(client)
23302330

23312331
require.Containsf(h, eventTypes, event.EventType,
2332-
"wrong event type, got %v", userType, event.EventType)
2332+
"wrong event type, want %v, got %v", userType,
2333+
event.EventType)
23332334

23342335
events = append(events, event)
23352336

0 commit comments

Comments
 (0)