Skip to content

Commit 53af6fa

Browse files
authored
perf(jetsocat,dgw): HashSet to track channels waiting for adjustment (#980)
In JMUX proxy, a HashSet is used to track channels waiting for window adjustement instead of iterating through the whole list. Throughput and CPU-usage is slightly improved. 1. Benchmark results before this patch a. With 50ms delay on loopback 1 connection: [ 1] 0.0000-600.4660 sec 16.0 GBytes 229 Mbits/sec 2 connections: [ 2] 0.0000-605.0918 sec 8.18 GBytes 116 Mbits/sec [ 1] 0.0000-605.1930 sec 8.18 GBytes 116 Mbits/sec [SUM] 0.0000-605.1930 sec 16.4 GBytes 232 Mbits/sec 10 connections: [ 9] 0.0000-626.1216 sec 1.69 GBytes 23.1 Mbits/sec [ 10] 0.0000-626.2644 sec 1.69 GBytes 23.1 Mbits/sec [ 1] 0.0000-626.2643 sec 1.69 GBytes 23.1 Mbits/sec [ 3] 0.0000-626.2633 sec 1.69 GBytes 23.1 Mbits/sec [ 8] 0.0000-626.2619 sec 1.69 GBytes 23.2 Mbits/sec [ 6] 0.0000-626.3628 sec 1.69 GBytes 23.1 Mbits/sec [ 7] 0.0000-626.3644 sec 1.69 GBytes 23.1 Mbits/sec [ 5] 0.0000-626.3640 sec 1.69 GBytes 23.2 Mbits/sec [ 4] 0.0000-626.3627 sec 1.69 GBytes 23.2 Mbits/sec [ 2] 0.0000-626.4901 sec 1.69 GBytes 23.1 Mbits/sec [SUM] 0.0000-626.4902 sec 16.9 GBytes 231 Mbits/sec b. Without delay 1 connection: [ 1] 0.0000-600.0847 sec 1.28 TBytes 18.8 Gbits/sec 2 connections: [ 1] 0.0000-600.0795 sec 656 GBytes 9.39 Gbits/sec [ 2] 0.0000-600.0958 sec 656 GBytes 9.38 Gbits/sec [SUM] 0.0000-600.0958 sec 1.28 TBytes 18.8 Gbits/sec 10 connections: [ 8] 0.0000-600.3803 sec 108 GBytes 1.54 Gbits/sec [ 4] 0.0000-600.3804 sec 108 GBytes 1.54 Gbits/sec [ 2] 0.0000-600.3789 sec 108 GBytes 1.54 Gbits/sec [ 6] 0.0000-600.3795 sec 108 GBytes 1.54 Gbits/sec [ 3] 0.0000-600.3793 sec 108 GBytes 1.54 Gbits/sec [ 9] 0.0000-600.3789 sec 108 GBytes 1.54 Gbits/sec [ 10] 0.0000-600.3802 sec 108 GBytes 1.54 Gbits/sec [ 5] 0.0000-600.3791 sec 108 GBytes 1.54 Gbits/sec [ 1] 0.0000-600.3794 sec 108 GBytes 1.54 Gbits/sec [ 7] 0.0000-600.3803 sec 108 GBytes 1.54 Gbits/sec [SUM] 0.0000-600.3803 sec 1.05 TBytes 15.4 Gbits/sec 2. Benchmark results after this patch a. With 50ms delay on loopback 1 connection: [ 1] 0.0000-600.4197 sec 16.1 GBytes 230 Mbits/sec 2 connections: [ 1] 0.0000-605.0387 sec 8.19 GBytes 116 Mbits/sec [ 2] 0.0000-605.1395 sec 8.19 GBytes 116 Mbits/sec [SUM] 0.0000-605.1395 sec 16.4 GBytes 233 Mbits/sec 10 connections: [ 3] 0.0000-625.7966 sec 1.69 GBytes 23.2 Mbits/sec [ 8] 0.0000-625.9956 sec 1.69 GBytes 23.2 Mbits/sec [ 1] 0.0000-626.0966 sec 1.69 GBytes 23.2 Mbits/sec [ 5] 0.0000-626.0964 sec 1.69 GBytes 23.2 Mbits/sec [ 2] 0.0000-626.1983 sec 1.69 GBytes 23.2 Mbits/sec [ 7] 0.0000-626.1964 sec 1.69 GBytes 23.2 Mbits/sec [ 6] 0.0000-626.1964 sec 1.69 GBytes 23.2 Mbits/sec [ 9] 0.0000-626.1981 sec 1.69 GBytes 23.2 Mbits/sec [ 10] 0.0000-626.2973 sec 1.69 GBytes 23.2 Mbits/sec [ 4] 0.0000-626.3984 sec 1.69 GBytes 23.2 Mbits/sec [SUM] 0.0000-626.3986 sec 16.9 GBytes 232 Mbits/sec b. Without delay 1 connection: [ 1] 0.0000-600.0518 sec 1.33 TBytes 19.4 Gbits/sec 2 connections: [ 2] 0.0000-600.0706 sec 681 GBytes 9.75 Gbits/sec [ 1] 0.0000-600.0705 sec 681 GBytes 9.75 Gbits/sec [SUM] 0.0000-600.0705 sec 1.33 TBytes 19.5 Gbits/sec 10 connections: [ 3] 0.0000-600.3608 sec 112 GBytes 1.60 Gbits/sec [ 5] 0.0000-600.3606 sec 112 GBytes 1.60 Gbits/sec [ 6] 0.0000-600.3605 sec 112 GBytes 1.60 Gbits/sec [ 8] 0.0000-600.3598 sec 112 GBytes 1.60 Gbits/sec [ 7] 0.0000-600.3594 sec 112 GBytes 1.60 Gbits/sec [ 1] 0.0000-600.3606 sec 112 GBytes 1.60 Gbits/sec [ 9] 0.0000-600.3597 sec 112 GBytes 1.60 Gbits/sec [ 10] 0.0000-600.3606 sec 112 GBytes 1.60 Gbits/sec [ 2] 0.0000-600.3602 sec 112 GBytes 1.60 Gbits/sec [ 4] 0.0000-600.3719 sec 112 GBytes 1.60 Gbits/sec [SUM] 0.0000-600.3721 sec 1.09 TBytes 16.0 Gbits/sec
1 parent ac60d0e commit 53af6fa

File tree

1 file changed

+11
-24
lines changed

1 file changed

+11
-24
lines changed

crates/jmux-proxy/src/lib.rs

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use self::id_allocator::IdAllocator;
1717
use anyhow::Context as _;
1818
use bytes::Bytes;
1919
use jmux_proto::{ChannelData, DistantChannelId, Header, LocalChannelId, Message, ReasonCode};
20-
use std::collections::HashMap;
20+
use std::collections::{HashMap, HashSet};
2121
use std::convert::TryFrom;
2222
use std::io;
2323
use std::sync::atomic::{AtomicUsize, Ordering};
@@ -30,16 +30,6 @@ use tokio::task::JoinHandle;
3030
use tokio_util::codec::FramedRead;
3131
use tracing::{Instrument as _, Span};
3232

33-
// PERF/FIXME: changing this parameter to 16 * 1024 greatly improves the throughput,
34-
// but we need to wait until 2025 before making this change.
35-
//
36-
// iperf result for 4 * 1024:
37-
// > 0.0000-10.0490 sec 23.0 GBytes 19.7 Gbits/sec
38-
//
39-
// iperf result for 16 * 1024:
40-
// > 0.0000-10.0393 sec 30.6 GBytes 26.2 Gbits/sec
41-
//
42-
// This is an improvement of ~32.9%.
4333
const MAXIMUM_PACKET_SIZE_IN_BYTES: u16 = 4 * 1024; // 4 kiB
4434
const WINDOW_ADJUSTMENT_THRESHOLD: u32 = 4 * 1024; // 4 kiB
4535

@@ -323,14 +313,13 @@ async fn scheduler_task_impl<T: AsyncRead + Unpin + Send + 'static>(task: JmuxSc
323313
let mut jmux_ctx = JmuxCtx::new();
324314
let mut data_senders: HashMap<LocalChannelId, DataSender> = HashMap::new();
325315
let mut pending_channels: HashMap<LocalChannelId, (DestinationUrl, ApiResponseSender)> = HashMap::new();
316+
let mut needs_window_adjustment: HashSet<LocalChannelId> = HashSet::new();
326317
let (internal_msg_tx, mut internal_msg_rx) = mpsc::unbounded_channel::<InternalMessage>();
327318

328319
// Safety net against poor AsyncRead trait implementations.
329320
const MAX_CONSECUTIVE_PIPE_FAILURES: u8 = 5;
330321
let mut nb_consecutive_pipe_failures = 0;
331322

332-
let mut needs_window_adjustment = false;
333-
334323
loop {
335324
// NOTE: Current task is the "jmux scheduler" or "jmux orchestrator".
336325
// It handles the JMUX context and communicates with other tasks.
@@ -368,7 +357,7 @@ async fn scheduler_task_impl<T: AsyncRead + Unpin + Send + 'static>(task: JmuxSc
368357
if let Some(leftover) = leftover {
369358
if let Err(error) = msg_to_send_tx.send(Message::data(channel.distant_id, leftover)) {
370359
error!(%error, "Couldn't send leftover bytes");
371-
} ;
360+
}
372361
}
373362

374363
let (reader, writer) = stream.into_split();
@@ -646,7 +635,7 @@ async fn scheduler_task_impl<T: AsyncRead + Unpin + Send + 'static>(task: JmuxSc
646635

647636
let _ = data_tx.send(msg.transfer_data);
648637

649-
needs_window_adjustment = true;
638+
needs_window_adjustment.insert(id);
650639
}
651640
Message::Eof(msg) => {
652641
// Per the spec:
@@ -722,24 +711,22 @@ async fn scheduler_task_impl<T: AsyncRead + Unpin + Send + 'static>(task: JmuxSc
722711
}
723712
}
724713
}
725-
_ = core::future::ready(()), if needs_window_adjustment => {
726-
for channel in jmux_ctx.channels.values_mut() {
714+
_ = core::future::ready(()), if !needs_window_adjustment.is_empty() => {
715+
for channel_id in needs_window_adjustment.drain() {
716+
let Some(channel) = jmux_ctx.get_channel_mut(channel_id) else {
717+
continue;
718+
};
719+
727720
let window_adjustment = channel.initial_window_size - channel.remote_window_size;
728721

729722
if window_adjustment > WINDOW_ADJUSTMENT_THRESHOLD {
730-
channel.span.in_scope(|| {
731-
trace!(%channel.distant_id, "Send WindowAdjust message");
732-
});
733-
734723
msg_to_send_tx
735724
.send(Message::window_adjust(channel.distant_id, window_adjustment))
736725
.context("couldn’t send WINDOW ADJUST message")?;
737726

738727
channel.remote_window_size = channel.initial_window_size;
739728
}
740729
}
741-
742-
needs_window_adjustment = false;
743730
}
744731
}
745732
}
@@ -820,7 +807,7 @@ impl DataReaderTask {
820807
trace!(
821808
window_size_now,
822809
chunk_length = chunk.len(),
823-
"Window size insufficient to send full chunk. Truncate and wait."
810+
"Window size insufficient to send full chunk; truncate and wait"
824811
);
825812

826813
if window_size_now > 0 {

0 commit comments

Comments
 (0)