2
2
3
3
use anyhow:: { anyhow, Context } ;
4
4
use bytes:: { Bytes , BytesMut } ;
5
- use futures_lite:: stream:: Stream ;
5
+ use futures_lite:: { stream:: Stream , StreamExt } ;
6
+ use futures_util:: future:: FutureExt ;
6
7
use genawaiter:: sync:: { Co , Gen } ;
7
8
use iroh_net:: {
8
- dialer :: Dialer ,
9
- endpoint:: { get_remote_node_id , Connection } ,
9
+ conn_manager :: { ConnDirection , ConnInfo , ConnManager } ,
10
+ endpoint:: Connection ,
10
11
key:: PublicKey ,
11
12
AddrInfo , Endpoint , NodeAddr ,
12
13
} ;
@@ -15,7 +16,7 @@ use rand_core::SeedableRng;
15
16
use std:: { collections:: HashMap , future:: Future , pin:: Pin , sync:: Arc , task:: Poll , time:: Instant } ;
16
17
use tokio:: {
17
18
sync:: { broadcast, mpsc, oneshot} ,
18
- task:: JoinHandle ,
19
+ task:: { JoinHandle , JoinSet } ,
19
20
} ;
20
21
use tracing:: { debug, error_span, trace, warn, Instrument } ;
21
22
@@ -82,7 +83,7 @@ impl Gossip {
82
83
/// Spawn a gossip actor and get a handle for it
83
84
pub fn from_endpoint ( endpoint : Endpoint , config : proto:: Config , my_addr : & AddrInfo ) -> Self {
84
85
let peer_id = endpoint. node_id ( ) ;
85
- let dialer = Dialer :: new ( endpoint. clone ( ) ) ;
86
+ let conn_manager = ConnManager :: new ( endpoint. clone ( ) , GOSSIP_ALPN ) ;
86
87
let state = proto:: State :: new (
87
88
peer_id,
88
89
encode_peer_data ( my_addr) . unwrap ( ) ,
@@ -97,12 +98,12 @@ impl Gossip {
97
98
let actor = Actor {
98
99
endpoint,
99
100
state,
100
- dialer,
101
+ conn_manager,
102
+ conn_tasks : Default :: default ( ) ,
101
103
to_actor_rx,
102
104
in_event_rx,
103
105
in_event_tx,
104
106
on_endpoints_rx,
105
- conns : Default :: default ( ) ,
106
107
conn_send_tx : Default :: default ( ) ,
107
108
pending_sends : Default :: default ( ) ,
108
109
timers : Timers :: new ( ) ,
@@ -231,9 +232,7 @@ impl Gossip {
231
232
///
232
233
/// Make sure to check the ALPN protocol yourself before passing the connection.
233
234
pub async fn handle_connection ( & self , conn : Connection ) -> anyhow:: Result < ( ) > {
234
- let peer_id = get_remote_node_id ( & conn) ?;
235
- self . send ( ToActor :: ConnIncoming ( peer_id, ConnOrigin :: Accept , conn) )
236
- . await ?;
235
+ self . send ( ToActor :: ConnIncoming ( conn) ) . await ?;
237
236
Ok ( ( ) )
238
237
}
239
238
@@ -283,19 +282,11 @@ impl Future for JoinTopicFut {
283
282
}
284
283
}
285
284
286
- /// Whether a connection is initiated by us (Dial) or by the remote peer (Accept)
287
- #[ derive( Debug ) ]
288
- enum ConnOrigin {
289
- Accept ,
290
- Dial ,
291
- }
292
-
293
285
/// Input messages for the gossip [`Actor`].
294
286
#[ derive( derive_more:: Debug ) ]
295
287
enum ToActor {
296
- /// Handle a new QUIC connection, either from accept (external to the actor) or from connect
297
- /// (happens internally in the actor).
298
- ConnIncoming ( PublicKey , ConnOrigin , #[ debug( skip) ] Connection ) ,
288
+ /// Handle a new incoming QUIC connection.
289
+ ConnIncoming ( iroh_net:: endpoint:: Connection ) ,
299
290
/// Join a topic with a list of peers. Reply with oneshot once at least one peer joined.
300
291
Join (
301
292
TopicId ,
@@ -329,8 +320,8 @@ struct Actor {
329
320
/// Protocol state
330
321
state : proto:: State < PublicKey , StdRng > ,
331
322
endpoint : Endpoint ,
332
- /// Dial machine to connect to peers
333
- dialer : Dialer ,
323
+ /// Connection manager to dial and accept connections.
324
+ conn_manager : ConnManager ,
334
325
/// Input messages to the actor
335
326
to_actor_rx : mpsc:: Receiver < ToActor > ,
336
327
/// Sender for the state input (cloned into the connection loops)
@@ -341,10 +332,10 @@ struct Actor {
341
332
on_endpoints_rx : mpsc:: Receiver < Vec < iroh_net:: config:: Endpoint > > ,
342
333
/// Queued timers
343
334
timers : Timers < Timer > ,
344
- /// Currently opened quinn connections to peers
345
- conns : HashMap < PublicKey , Connection > ,
346
335
/// Channels to send outbound messages into the connection loops
347
336
conn_send_tx : HashMap < PublicKey , mpsc:: Sender < ProtoMessage > > ,
337
+ /// Connection loop tasks
338
+ conn_tasks : JoinSet < ( PublicKey , anyhow:: Result < ( ) > ) > ,
348
339
/// Queued messages that were to be sent before a dial completed
349
340
pending_sends : HashMap < PublicKey , Vec < ProtoMessage > > ,
350
341
/// Broadcast senders for active topic subscriptions from the application
@@ -353,6 +344,12 @@ struct Actor {
353
344
subscribers_all : Option < broadcast:: Sender < ( TopicId , Event ) > > ,
354
345
}
355
346
347
+ impl Drop for Actor {
348
+ fn drop ( & mut self ) {
349
+ self . conn_tasks . abort_all ( ) ;
350
+ }
351
+ }
352
+
356
353
impl Actor {
357
354
pub async fn run ( mut self ) -> anyhow:: Result < ( ) > {
358
355
let mut i = 0 ;
@@ -384,15 +381,27 @@ impl Actor {
384
381
}
385
382
}
386
383
}
387
- ( peer_id , res) = self . dialer . next_conn ( ) => {
388
- trace!( ?i, "tick: dialer " ) ;
384
+ Some ( res) = self . conn_manager . next ( ) => {
385
+ trace!( ?i, "tick: conn_manager " ) ;
389
386
match res {
390
- Ok ( conn) => {
391
- debug!( peer = ?peer_id, "dial successful" ) ;
392
- self . handle_to_actor_msg( ToActor :: ConnIncoming ( peer_id, ConnOrigin :: Dial , conn) , Instant :: now( ) ) . await . context( "dialer.next -> conn -> handle_to_actor_msg" ) ?;
393
- }
387
+ Ok ( conn) => self . handle_new_connection( conn) . await ,
394
388
Err ( err) => {
395
- warn!( peer = ?peer_id, "dial failed: {err}" ) ;
389
+ self . handle_in_event( InEvent :: PeerDisconnected ( err. node_id) , Instant :: now( ) ) . await ?;
390
+ }
391
+ }
392
+ }
393
+ Some ( res) = self . conn_tasks. join_next( ) , if !self . conn_tasks. is_empty( ) => {
394
+ match res {
395
+ Err ( err) if !err. is_cancelled( ) => warn!( ?err, "connection loop panicked" ) ,
396
+ Err ( _err) => { } ,
397
+ Ok ( ( node_id, result) ) => {
398
+ self . conn_manager. remove( & node_id) ;
399
+ self . conn_send_tx. remove( & node_id) ;
400
+ self . handle_in_event( InEvent :: PeerDisconnected ( node_id) , Instant :: now( ) ) . await ?;
401
+ match result {
402
+ Ok ( ( ) ) => debug!( peer=%node_id. fmt_short( ) , "connection closed without error" ) ,
403
+ Err ( err) => debug!( peer=%node_id. fmt_short( ) , "connection closed with error {err:?}" ) ,
404
+ }
396
405
}
397
406
}
398
407
}
@@ -421,38 +430,9 @@ impl Actor {
421
430
async fn handle_to_actor_msg ( & mut self , msg : ToActor , now : Instant ) -> anyhow:: Result < ( ) > {
422
431
trace ! ( "handle to_actor {msg:?}" ) ;
423
432
match msg {
424
- ToActor :: ConnIncoming ( peer_id, origin, conn) => {
425
- self . conns . insert ( peer_id, conn. clone ( ) ) ;
426
- self . dialer . abort_dial ( & peer_id) ;
427
- let ( send_tx, send_rx) = mpsc:: channel ( SEND_QUEUE_CAP ) ;
428
- self . conn_send_tx . insert ( peer_id, send_tx. clone ( ) ) ;
429
-
430
- // Spawn a task for this connection
431
- let in_event_tx = self . in_event_tx . clone ( ) ;
432
- tokio:: spawn (
433
- async move {
434
- debug ! ( "connection established" ) ;
435
- match connection_loop ( peer_id, conn, origin, send_rx, & in_event_tx) . await {
436
- Ok ( ( ) ) => {
437
- debug ! ( "connection closed without error" )
438
- }
439
- Err ( err) => {
440
- debug ! ( "connection closed with error {err:?}" )
441
- }
442
- }
443
- in_event_tx
444
- . send ( InEvent :: PeerDisconnected ( peer_id) )
445
- . await
446
- . ok ( ) ;
447
- }
448
- . instrument ( error_span ! ( "gossip_conn" , peer = %peer_id. fmt_short( ) ) ) ,
449
- ) ;
450
-
451
- // Forward queued pending sends
452
- if let Some ( send_queue) = self . pending_sends . remove ( & peer_id) {
453
- for msg in send_queue {
454
- send_tx. send ( msg) . await ?;
455
- }
433
+ ToActor :: ConnIncoming ( conn) => {
434
+ if let Err ( err) = self . conn_manager . accept ( conn) {
435
+ warn ! ( ?err, "failed to accept connection" ) ;
456
436
}
457
437
}
458
438
ToActor :: Join ( topic_id, peers, reply) => {
@@ -502,9 +482,6 @@ impl Actor {
502
482
} else {
503
483
debug ! ( "handle in_event {event:?}" ) ;
504
484
} ;
505
- if let InEvent :: PeerDisconnected ( peer) = & event {
506
- self . conn_send_tx . remove ( peer) ;
507
- }
508
485
let out = self . state . handle ( event, now) ;
509
486
for event in out {
510
487
if matches ! ( event, OutEvent :: ScheduleTimer ( _, _) ) {
@@ -518,10 +495,13 @@ impl Actor {
518
495
if let Err ( _err) = send. send ( message) . await {
519
496
warn ! ( "conn receiver for {peer_id:?} dropped" ) ;
520
497
self . conn_send_tx . remove ( & peer_id) ;
498
+ self . conn_manager . remove ( & peer_id) ;
521
499
}
522
500
} else {
523
- debug ! ( peer = ?peer_id, "dial" ) ;
524
- self . dialer . queue_dial ( peer_id, GOSSIP_ALPN ) ;
501
+ if !self . conn_manager . is_pending ( & peer_id) {
502
+ debug ! ( peer = ?peer_id, "dial" ) ;
503
+ self . conn_manager . dial ( peer_id) ;
504
+ }
525
505
// TODO: Enforce max length
526
506
self . pending_sends . entry ( peer_id) . or_default ( ) . push ( message) ;
527
507
}
@@ -544,12 +524,11 @@ impl Actor {
544
524
self . timers . insert ( now + delay, timer) ;
545
525
}
546
526
OutEvent :: DisconnectPeer ( peer) => {
547
- if let Some ( conn) = self . conns . remove ( & peer) {
548
- conn. close ( 0u8 . into ( ) , b"close from disconnect" ) ;
549
- }
550
527
self . conn_send_tx . remove ( & peer) ;
551
528
self . pending_sends . remove ( & peer) ;
552
- self . dialer . abort_dial ( & peer) ;
529
+ if let Some ( conn) = self . conn_manager . remove ( & peer) {
530
+ conn. close ( 0u8 . into ( ) , b"close from disconnect" ) ;
531
+ }
553
532
}
554
533
OutEvent :: PeerData ( node_id, data) => match decode_peer_data ( & data) {
555
534
Err ( err) => warn ! ( "Failed to decode {data:?} from {node_id}: {err}" ) ,
@@ -566,6 +545,33 @@ impl Actor {
566
545
Ok ( ( ) )
567
546
}
568
547
548
+ async fn handle_new_connection ( & mut self , new_conn : ConnInfo ) {
549
+ let ConnInfo {
550
+ conn,
551
+ node_id,
552
+ direction,
553
+ } = new_conn;
554
+ let ( send_tx, send_rx) = mpsc:: channel ( SEND_QUEUE_CAP ) ;
555
+ self . conn_send_tx . insert ( node_id, send_tx. clone ( ) ) ;
556
+
557
+ // Spawn a task for this connection
558
+ let pending_sends = self . pending_sends . remove ( & node_id) ;
559
+ let in_event_tx = self . in_event_tx . clone ( ) ;
560
+ debug ! ( peer=%node_id. fmt_short( ) , ?direction, "connection established" ) ;
561
+ self . conn_tasks . spawn (
562
+ connection_loop (
563
+ node_id,
564
+ conn,
565
+ direction,
566
+ send_rx,
567
+ in_event_tx,
568
+ pending_sends,
569
+ )
570
+ . map ( move |r| ( node_id, r) )
571
+ . instrument ( error_span ! ( "gossip_conn" , peer = %node_id. fmt_short( ) ) ) ,
572
+ ) ;
573
+ }
574
+
569
575
fn subscribe_all ( & mut self ) -> broadcast:: Receiver < ( TopicId , Event ) > {
570
576
if let Some ( tx) = self . subscribers_all . as_mut ( ) {
571
577
tx. subscribe ( )
@@ -602,16 +608,26 @@ async fn wait_for_neighbor_up(mut sub: broadcast::Receiver<Event>) -> anyhow::Re
602
608
async fn connection_loop (
603
609
from : PublicKey ,
604
610
conn : Connection ,
605
- origin : ConnOrigin ,
611
+ direction : ConnDirection ,
606
612
mut send_rx : mpsc:: Receiver < ProtoMessage > ,
607
- in_event_tx : & mpsc:: Sender < InEvent > ,
613
+ in_event_tx : mpsc:: Sender < InEvent > ,
614
+ mut pending_sends : Option < Vec < ProtoMessage > > ,
608
615
) -> anyhow:: Result < ( ) > {
609
- let ( mut send, mut recv) = match origin {
610
- ConnOrigin :: Accept => conn. accept_bi ( ) . await ?,
611
- ConnOrigin :: Dial => conn. open_bi ( ) . await ?,
616
+ let ( mut send, mut recv) = match direction {
617
+ ConnDirection :: Accept => conn. accept_bi ( ) . await ?,
618
+ ConnDirection :: Dial => conn. open_bi ( ) . await ?,
612
619
} ;
613
620
let mut send_buf = BytesMut :: new ( ) ;
614
621
let mut recv_buf = BytesMut :: new ( ) ;
622
+
623
+ // Forward queued pending sends
624
+ if let Some ( mut send_queue) = pending_sends. take ( ) {
625
+ for msg in send_queue. drain ( ..) {
626
+ write_message ( & mut send, & mut send_buf, & msg) . await ?;
627
+ }
628
+ }
629
+
630
+ // loop over sending and receiving messages
615
631
loop {
616
632
tokio:: select! {
617
633
biased;
0 commit comments