Skip to content

Commit ae4ceb7

Browse files
committed
Create a simple FairRwLock to avoid readers starving writers
Because we handle messages (which can take some time, persisting things to disk or validating cryptographic signatures) with the top-level read lock, but require the top-level write lock to connect new peers or handle disconnection, we are particularly sensitive to writer starvation issues. Rust's libstd RwLock does not provide any fairness guarantees, using whatever the OS provides as-is. On Linux, pthreads defaults to starving writers, which Rust's RwLock exposes to us (without any configurability). Here we work around that issue by blocking readers if there are pending writers, optimizing for readable code over perfectly-optimized blocking.
1 parent f909831 commit ae4ceb7

File tree

6 files changed

+61
-3
lines changed

6 files changed

+61
-3
lines changed

lightning/src/debug_sync.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,5 @@ fn read_write_lockorder_fail() {
362362
let _a = a.write().unwrap();
363363
}
364364
}
365+
366+
pub type FairRwLock<T> = RwLock<T>;

lightning/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ mod sync {
159159
pub use debug_sync::*;
160160
#[cfg(not(test))]
161161
pub use ::std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard};
162+
#[cfg(not(test))]
163+
pub use crate::util::fairrwlock::FairRwLock;
162164
}
163165

164166
#[cfg(not(feature = "std"))]

lightning/src/ln/peer_handler.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use routing::network_graph::{NetworkGraph, NetGraphMsgHandler};
3333
use prelude::*;
3434
use io;
3535
use alloc::collections::LinkedList;
36-
use sync::{Arc, Mutex, MutexGuard, RwLock};
36+
use sync::{Arc, Mutex, MutexGuard, FairRwLock};
3737
use core::sync::atomic::{AtomicBool, Ordering};
3838
use core::{cmp, hash, fmt, mem};
3939
use core::ops::Deref;
@@ -428,7 +428,7 @@ pub struct PeerManager<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: De
428428
L::Target: Logger,
429429
CMH::Target: CustomMessageHandler {
430430
message_handler: MessageHandler<CM, RM>,
431-
peers: RwLock<PeerHolder<Descriptor>>,
431+
peers: FairRwLock<PeerHolder<Descriptor>>,
432432
/// Only add to this set when noise completes.
433433
/// Locked *after* peers. When an item is removed, it must be removed with the `peers` write
434434
/// lock held. Entries may be added with only the `peers` read lock held (though the
@@ -570,7 +570,7 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
570570

571571
PeerManager {
572572
message_handler,
573-
peers: RwLock::new(PeerHolder {
573+
peers: FairRwLock::new(PeerHolder {
574574
peers: HashMap::new(),
575575
}),
576576
node_id_to_descriptor: Mutex::new(HashMap::new()),

lightning/src/sync.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,5 @@ impl<T> RwLock<T> {
113113
Err(())
114114
}
115115
}
116+
117+
pub type FairRwLock<T> = RwLock<T>;

lightning/src/util/fairrwlock.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
use std::sync::{TryLockResult, LockResult, RwLock, RwLockReadGuard, RwLockWriteGuard};
2+
use std::sync::atomic::{AtomicUsize, Ordering};
3+
4+
/// Rust libstd's RwLock does not provide any fairness guarantees (and, in fact, when used on
5+
/// Linux with pthreads under the hood, readers trivially and completely starve writers).
6+
/// Because we often hold read locks while doing message processing in multiple threads which
7+
/// can use significant CPU time, with write locks being time-sensitive but relatively small in
8+
/// CPU time, we can end up with starvation completely blocking incoming connections or pings,
9+
/// especially during initial graph sync.
10+
///
11+
/// Thus, we need to block readers when a writer is pending, which we do with a trivial RwLock
12+
/// wrapper here. Its not particularly optimized, but provides some reasonable fairness by
13+
/// blocking readers (by taking the write lock) if there are writers pending when we go to take
14+
/// a read lock.
15+
pub struct FairRwLock<T> {
16+
lock: RwLock<T>,
17+
waiting_writers: AtomicUsize,
18+
}
19+
20+
impl<T> FairRwLock<T> {
21+
pub fn new(t: T) -> Self {
22+
Self { lock: RwLock::new(t), waiting_writers: AtomicUsize::new(0) }
23+
}
24+
25+
// Note that all atomic accesses are relaxed, as we do not rely on the atomics here for any
26+
// ordering at all, instead relying on the underlying RwLock to provide ordering of unrelated
27+
// memory.
28+
pub fn write(&self) -> LockResult<RwLockWriteGuard<T>> {
29+
self.waiting_writers.fetch_add(1, Ordering::Relaxed);
30+
let res = self.lock.write();
31+
self.waiting_writers.fetch_sub(1, Ordering::Relaxed);
32+
res
33+
}
34+
35+
pub fn try_write(&self) -> TryLockResult<RwLockWriteGuard<T>> {
36+
self.lock.try_write()
37+
}
38+
39+
pub fn read(&self) -> LockResult<RwLockReadGuard<T>> {
40+
if self.waiting_writers.load(Ordering::Relaxed) != 0 {
41+
let _write_queue_lock = self.lock.write();
42+
}
43+
// Note that we don't consider ensuring that an underlying RwLock allowing writers to
44+
// starve readers doesn't exhibit the same behavior here. I'm not aware of any
45+
// libstd-backing RwLock which exhibits this behavior, and as documented in the
46+
// struct-level documentation, it shouldn't pose a significant issue for our current
47+
// codebase.
48+
self.lock.read()
49+
}
50+
}

lightning/src/util/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ pub mod persist;
2525
pub(crate) mod atomic_counter;
2626
pub(crate) mod byte_utils;
2727
pub(crate) mod chacha20;
28+
#[cfg(feature = "std")]
29+
pub(crate) mod fairrwlock;
2830
#[cfg(fuzzing)]
2931
pub mod zbase32;
3032
#[cfg(not(fuzzing))]

0 commit comments

Comments
 (0)