Skip to content

Commit 65dccea

Browse files
committed
pytest: fix flake in test_reconnect_signed
We can fail the fundchannel because of a reconnect race: funder tells us to reconnect so fast that we are still cleaning up from last time. We deliberately defer clean up, to give the subds a chance to process any final messages. However, on reconnection we force them to exit immediately: but this causes new `connect` commands to see the exit and fail. The workaround is to do this cleanup when a `connect` command is issued (as well as the current case, which covers an automatic reconnection or an incoming reconnection) ``` 2025-05-09T00:40:37.1769508Z def test_reconnect_signed(node_factory): 2025-05-09T00:40:37.1770273Z # This will fail *after* both sides consider channel opening. 2025-05-09T00:40:37.1770850Z disconnects = ['<WIRE_FUNDING_SIGNED'] 2025-05-09T00:40:37.1771298Z if EXPERIMENTAL_DUAL_FUND: 2025-05-09T00:40:37.1771735Z disconnects = ['<WIRE_COMMITMENT_SIGNED'] 2025-05-09T00:40:37.1772155Z 2025-05-09T00:40:37.1772598Z l1 = node_factory.get_node(may_reconnect=True, disconnect=disconnects) 2025-05-09T00:40:37.1773210Z l2 = node_factory.get_node(may_reconnect=True) 2025-05-09T00:40:37.1773632Z 2025-05-09T00:40:37.1773917Z l1.fundwallet(2000000) 2025-05-09T00:40:37.1774268Z 2025-05-09T00:40:37.1774611Z l1.rpc.connect(l2.info['id'], 'localhost', l2.port) 2025-05-09T00:40:37.1775151Z > l1.rpc.fundchannel(l2.info['id'], CHANNEL_SIZE) 2025-05-09T00:40:37.1775388Z 2025-05-09T00:40:37.1775485Z tests/test_connection.py:667: ... 2025-05-09T00:40:37.1799527Z > raise RpcError(method, payload, resp['error']) 2025-05-09T00:40:37.1800993Z E pyln.client.lightning.RpcError: RPC call failed: method: fundchannel, payload: {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'amount': 50000, 'announce': True}, error: {'code': -1, 'message': 'Disconnected', 'data': {'id': '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59', 'method': 'openchannel_update'}} ``` Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
1 parent 9ce3f5d commit 65dccea

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

lightningd/connect_control.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,12 @@ static struct command_result *json_connect(struct command *cmd,
236236
&peer->addr);
237237
}
238238

239+
/* When a peer disconnects, we give subds time to clean themselves up
240+
* (this lets connectd ensure they've seen the final messages). But
241+
* now it's going to try to reconnect, we've gotta force them out. */
242+
if (peer)
243+
peer_channels_cleanup(peer);
244+
239245
subd_send_msg(cmd->ld->connectd,
240246
take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true)));
241247

lightningd/peer_control.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,16 +177,10 @@ void maybe_delete_peer(struct peer *peer)
177177
delete_peer(peer);
178178
}
179179

180-
static void peer_channels_cleanup(struct lightningd *ld,
181-
const struct node_id *id)
180+
void peer_channels_cleanup(struct peer *peer)
182181
{
183-
struct peer *peer;
184182
struct channel *c, **channels;
185183

186-
peer = peer_by_id(ld, id);
187-
if (!peer)
188-
return;
189-
190184
/* Freeing channels can free peer, so gather first. */
191185
channels = tal_arr(tmpctx, struct channel *, 0);
192186
list_for_each(&peer->channels, c, list)
@@ -1726,11 +1720,6 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
17261720
fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
17271721
tal_hex(msg, msg));
17281722

1729-
/* When a peer disconnects, we give subds time to clean themselves up
1730-
* (this lets connectd ensure they've seen the final messages). But
1731-
* now it's reconnected, we've gotta force them out. */
1732-
peer_channels_cleanup(ld, &id);
1733-
17341723
/* If we connected, and it's a normal address */
17351724
if (!hook_payload->incoming
17361725
&& hook_payload->addr.itype == ADDR_INTERNAL_WIREADDR
@@ -1743,6 +1732,15 @@ void peer_connected(struct lightningd *ld, const u8 *msg)
17431732
/* If we're already dealing with this peer, hand off to correct
17441733
* subdaemon. Otherwise, we'll hand to openingd to wait there. */
17451734
peer = peer_by_id(ld, &id);
1735+
if (peer) {
1736+
/* When a peer disconnects, we give subds time to clean themselves up
1737+
* (this lets connectd ensure they've seen the final messages). But
1738+
* now it's reconnected, we've gotta force them out. This might free
1739+
* the peer! */
1740+
peer_channels_cleanup(peer);
1741+
peer = peer_by_id(ld, &id);
1742+
}
1743+
17461744
if (!peer) {
17471745
/* If we connected to them, we know this is a good address. */
17481746
peer = new_peer(ld, 0, &id, &hook_payload->addr,

lightningd/peer_control.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ command_find_channel(struct command *cmd,
165165
const char *buffer, const jsmntok_t *tok,
166166
struct channel **channel);
167167

168+
/* We do this lazily, when reconnecting */
169+
void peer_channels_cleanup(struct peer *peer);
170+
168171
/* Ancient (0.7.0 and before) releases could create invalid commitment txs! */
169172
bool invalid_last_tx(const struct bitcoin_tx *tx);
170173

0 commit comments

Comments
 (0)