Skip to content

Commit 361acc4

Browse files
authored
Fixed throughput issues by fixing lack of guarentee on SCTP -> DTLS packet ordering (#513)
* Fixed lack of guarentee on SCTP -> DTLS packet ordering by removing tokio::spawn. The ordering mixup triggered SCTPs congestion control, severely limitting throughput in practice. * Formatted * Fixed typo
1 parent d751e94 commit 361acc4

File tree

1 file changed

+25
-29
lines changed

1 file changed

+25
-29
lines changed

sctp/src/association/mod.rs

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use association_internal::*;
1414
use association_stats::*;
1515
use bytes::{Bytes, BytesMut};
1616
use rand::random;
17-
use tokio::sync::{broadcast, mpsc, Mutex, Semaphore};
17+
use tokio::sync::{broadcast, mpsc, Mutex};
1818
use util::Conn;
1919

2020
use crate::chunk::chunk_abort::ChunkAbort;
@@ -487,18 +487,6 @@ impl Association {
487487
let done = Arc::new(AtomicBool::new(false));
488488
let name = Arc::new(name);
489489

490-
let limit = {
491-
#[cfg(test)]
492-
{
493-
1
494-
}
495-
#[cfg(not(test))]
496-
{
497-
8
498-
}
499-
};
500-
501-
let sem = Arc::new(Semaphore::new(limit));
502490
while !done.load(Ordering::Relaxed) {
503491
//log::debug!("[{}] gather_outbound begin", name);
504492
let (packets, continue_loop) = {
@@ -507,35 +495,43 @@ impl Association {
507495
};
508496
//log::debug!("[{}] gather_outbound done with {}", name, packets.len());
509497

510-
// We schedule a new task here for a reason:
511-
// If we don't tokio tends to run the write_loop and read_loop of one connection on the same OS thread
512-
// This means that even though we release the lock above, the read_loop isn't able to take it, simply because it is not being scheduled by tokio
513-
// Doing it this way, tokio schedules this to a new thread, this future is suspended, and the read_loop can make progress
514498
let net_conn = Arc::clone(&net_conn);
515499
let bytes_sent = Arc::clone(&bytes_sent);
516500
let name2 = Arc::clone(&name);
517501
let done2 = Arc::clone(&done);
518-
let sem = Arc::clone(&sem);
519-
sem.acquire().await.unwrap().forget();
520-
tokio::task::spawn(async move {
521-
let mut buf = BytesMut::with_capacity(16 * 1024);
522-
for raw in packets {
523-
buf.clear();
524-
if let Err(err) = raw.marshal_to(&mut buf) {
525-
log::warn!("[{}] failed to serialize a packet: {:?}", name2, err);
526-
} else {
502+
let mut buffer = None;
503+
for raw in packets {
504+
let mut buf = buffer
505+
.take()
506+
.unwrap_or_else(|| BytesMut::with_capacity(16 * 1024));
507+
508+
// We do the marshalling work in a blocking task here for a reason:
509+
// If we don't tokio tends to run the write_loop and read_loop of one connection on the same OS thread
510+
// This means that even though we release the lock above, the read_loop isn't able to take it, simply because it is not being scheduled by tokio
511+
// Doing it this way, tokio schedules this work on a dedicated blocking thread, this future is suspended, and the read_loop can make progress
512+
match tokio::task::spawn_blocking(move || raw.marshal_to(&mut buf).map(|_| buf))
513+
.await
514+
.unwrap()
515+
{
516+
Ok(mut buf) => {
527517
let raw = buf.as_ref();
528518
if let Err(err) = net_conn.send(raw.as_ref()).await {
529519
log::warn!("[{}] failed to write packets on net_conn: {}", name2, err);
530520
done2.store(true, Ordering::Relaxed)
531521
} else {
532522
bytes_sent.fetch_add(raw.len(), Ordering::SeqCst);
533523
}
524+
525+
// Reuse allocation. Have to use options, since spawn blocking can't borrow, has to take ownership.
526+
buf.clear();
527+
buffer = Some(buf);
528+
}
529+
Err(err) => {
530+
log::warn!("[{}] failed to serialize a packet: {:?}", name2, err);
534531
}
535-
//log::debug!("[{}] sending {} bytes done", name, raw.len());
536532
}
537-
sem.add_permits(1);
538-
});
533+
//log::debug!("[{}] sending {} bytes done", name, raw.len());
534+
}
539535

540536
if !continue_loop {
541537
break;

0 commit comments

Comments
 (0)