Skip to content

Commit 05fe93b

Browse files
Back off from running job on high load
1 parent 11335c1 commit 05fe93b

File tree

2 files changed

+27
-12
lines changed

2 files changed

+27
-12
lines changed

src/runner/mod.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,15 @@ pub fn run_ex<DB: WriteResults + Sync>(
112112
let mut threads = Vec::new();
113113

114114
for worker in &workers {
115-
let join =
116-
scope
117-
.builder()
118-
.name(worker.name().into())
119-
.spawn(move || match worker.run() {
120-
Ok(()) => Ok(()),
121-
Err(r) => {
122-
log::warn!("worker {} failed: {:?}", worker.name(), r);
123-
Err(r)
124-
}
125-
})?;
115+
let join = scope.builder().name(worker.name().into()).spawn(move || {
116+
match worker.run(threads_count) {
117+
Ok(()) => Ok(()),
118+
Err(r) => {
119+
log::warn!("worker {} failed: {:?}", worker.name(), r);
120+
Err(r)
121+
}
122+
}
123+
})?;
126124
threads.push(join);
127125
}
128126
let disk_watcher_thread =

src/runner/worker.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::sync::{
1212
Mutex,
1313
};
1414
use std::time::Duration;
15+
use systemstat::{Platform, System};
1516

1617
pub(super) struct Worker<'a, DB: WriteResults + Sync> {
1718
name: String,
@@ -55,16 +56,32 @@ impl<'a, DB: WriteResults + Sync> Worker<'a, DB> {
5556
&self.name
5657
}
5758

58-
pub(super) fn run(&self) -> Fallible<()> {
59+
pub(super) fn run(&self, threads_count: usize) -> Fallible<()> {
5960
// This uses a `loop` instead of a `while let` to avoid locking the graph too much
6061
let mut guard = self.graph.lock().unwrap();
62+
let system = System::new();
6163
loop {
6264
self.maybe_cleanup_target_dir()?;
6365
let walk_result = guard.next_task(self.ex, self.db, &self.name);
6466
match walk_result {
6567
WalkResult::Task(id, task) => {
6668
drop(guard);
6769
info!("running task: {:?}", task);
70+
71+
// Wait for 15 seconds before running if the 1 minute load
72+
// average exceeds the thread count. This tries to back off
73+
// from spawning too many jobs on the server, hopefully
74+
// improving performance.
75+
loop {
76+
let avg = system.load_average()?;
77+
78+
if avg.one > threads_count as f32 {
79+
std::thread::sleep(std::time::Duration::new(15, 0));
80+
} else {
81+
break;
82+
}
83+
}
84+
6885
let res = task.run(
6986
self.config,
7087
self.workspace,

0 commit comments

Comments
 (0)